aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/ipv6
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig20
-rw-r--r--net/ipv6/Makefile6
-rw-r--r--net/ipv6/addrconf.c737
-rw-r--r--net/ipv6/addrconf_core.c5
-rw-r--r--net/ipv6/addrlabel.c52
-rw-r--r--net/ipv6/af_inet6.c380
-rw-r--r--net/ipv6/ah6.c63
-rw-r--r--net/ipv6/anycast.c56
-rw-r--r--net/ipv6/datagram.c72
-rw-r--r--net/ipv6/esp6.c45
-rw-r--r--net/ipv6/exthdrs.c199
-rw-r--r--net/ipv6/exthdrs_core.c185
-rw-r--r--net/ipv6/exthdrs_offload.c41
-rw-r--r--net/ipv6/fib6_rules.c25
-rw-r--r--net/ipv6/icmp.c108
-rw-r--r--net/ipv6/inet6_connection_sock.c142
-rw-r--r--net/ipv6/inet6_hashtables.c27
-rw-r--r--net/ipv6/ip6_fib.c331
-rw-r--r--net/ipv6/ip6_flowlabel.c80
-rw-r--r--net/ipv6/ip6_gre.c1754
-rw-r--r--net/ipv6/ip6_input.c40
-rw-r--r--net/ipv6/ip6_offload.c282
-rw-r--r--net/ipv6/ip6_offload.h18
-rw-r--r--net/ipv6/ip6_output.c340
-rw-r--r--net/ipv6/ip6_tunnel.c597
-rw-r--r--net/ipv6/ip6mr.c204
-rw-r--r--net/ipv6/ipcomp6.c22
-rw-r--r--net/ipv6/ipv6_sockglue.c61
-rw-r--r--net/ipv6/mcast.c128
-rw-r--r--net/ipv6/mip6.c56
-rw-r--r--net/ipv6/ndisc.c543
-rw-r--r--net/ipv6/netfilter.c22
-rw-r--r--net/ipv6/netfilter/Kconfig91
-rw-r--r--net/ipv6/netfilter/Makefile9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c116
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c135
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c146
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c27
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c4
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c4
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c4
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c133
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c4
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c13
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c7
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c330
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c352
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c117
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c241
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/output_core.c76
-rw-r--r--net/ipv6/proc.c24
-rw-r--r--net/ipv6/protocol.c33
-rw-r--r--net/ipv6/raw.c82
-rw-r--r--net/ipv6/reassembly.c156
-rw-r--r--net/ipv6/route.c1333
-rw-r--r--net/ipv6/sit.c572
-rw-r--r--net/ipv6/syncookies.c22
-rw-r--r--net/ipv6/sysctl_net_ipv6.c84
-rw-r--r--net/ipv6/tcp_ipv6.c834
-rw-r--r--net/ipv6/tcpv6_offload.c95
-rw-r--r--net/ipv6/tunnel6.c10
-rw-r--r--net/ipv6/udp.c342
-rw-r--r--net/ipv6/udp_offload.c120
-rw-r--r--net/ipv6/udplite.c14
-rw-r--r--net/ipv6/xfrm6_mode_beet.c14
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c10
-rw-r--r--net/ipv6/xfrm6_output.c58
-rw-r--r--net/ipv6/xfrm6_policy.c63
-rw-r--r--net/ipv6/xfrm6_state.c9
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
75 files changed, 4155 insertions, 8569 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f7fe7270e3..36d7437ac05 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -69,7 +69,7 @@ config IPV6_OPTIMISTIC_DAD
69 69
70config INET6_AH 70config INET6_AH
71 tristate "IPv6: AH transformation" 71 tristate "IPv6: AH transformation"
72 select XFRM_ALGO 72 select XFRM
73 select CRYPTO 73 select CRYPTO
74 select CRYPTO_HMAC 74 select CRYPTO_HMAC
75 select CRYPTO_MD5 75 select CRYPTO_MD5
@@ -81,7 +81,7 @@ config INET6_AH
81 81
82config INET6_ESP 82config INET6_ESP
83 tristate "IPv6: ESP transformation" 83 tristate "IPv6: ESP transformation"
84 select XFRM_ALGO 84 select XFRM
85 select CRYPTO 85 select CRYPTO
86 select CRYPTO_AUTHENC 86 select CRYPTO_AUTHENC
87 select CRYPTO_HMAC 87 select CRYPTO_HMAC
@@ -201,22 +201,6 @@ config IPV6_TUNNEL
201 201
202 If unsure, say N. 202 If unsure, say N.
203 203
204config IPV6_GRE
205 tristate "IPv6: GRE tunnel"
206 select IPV6_TUNNEL
207 ---help---
208 Tunneling means encapsulating data of one protocol type within
209 another protocol and sending it over a channel that understands the
210 encapsulating protocol. This particular tunneling driver implements
211 GRE (Generic Routing Encapsulation) and at this time allows
212 encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
213 This driver is useful if the other endpoint is a Cisco router: Cisco
214 likes GRE much better than the other Linux tunneling driver ("IP
215 tunneling" above). In addition, GRE allows multicast redistribution
216 through the tunnel.
217
218 Saying M here will produce a module called ip6_gre. If unsure, say N.
219
220config IPV6_MULTIPLE_TABLES 204config IPV6_MULTIPLE_TABLES
221 bool "IPv6: Multiple Routing Tables" 205 bool "IPv6: Multiple Routing Tables"
222 depends on EXPERIMENTAL 206 depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 4ea244891b5..686934acfac 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,11 +7,9 @@ obj-$(CONFIG_IPV6) += ipv6.o
7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ 7ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
8 addrlabel.o \ 8 addrlabel.o \
9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ 9 route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
10 raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ 10 raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o 11 exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
12 12
13ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
14
15ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o 13ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
16ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o 14ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
17 15
@@ -38,9 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
38 36
39obj-$(CONFIG_IPV6_SIT) += sit.o 37obj-$(CONFIG_IPV6_SIT) += sit.o
40obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o 38obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
41obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
42 39
43obj-y += addrconf_core.o exthdrs_core.o 40obj-y += addrconf_core.o exthdrs_core.o
44obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
45 41
46obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o 42obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 420e5632638..1587d0d9295 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -38,8 +38,6 @@
38 * status etc. 38 * status etc.
39 */ 39 */
40 40
41#define pr_fmt(fmt) "IPv6: " fmt
42
43#include <linux/errno.h> 41#include <linux/errno.h>
44#include <linux/types.h> 42#include <linux/types.h>
45#include <linux/kernel.h> 43#include <linux/kernel.h>
@@ -63,13 +61,11 @@
63#include <linux/delay.h> 61#include <linux/delay.h>
64#include <linux/notifier.h> 62#include <linux/notifier.h>
65#include <linux/string.h> 63#include <linux/string.h>
66#include <linux/hash.h>
67 64
68#include <net/net_namespace.h> 65#include <net/net_namespace.h>
69#include <net/sock.h> 66#include <net/sock.h>
70#include <net/snmp.h> 67#include <net/snmp.h>
71 68
72#include <net/af_ieee802154.h>
73#include <net/ipv6.h> 69#include <net/ipv6.h>
74#include <net/protocol.h> 70#include <net/protocol.h>
75#include <net/ndisc.h> 71#include <net/ndisc.h>
@@ -81,7 +77,6 @@
81#include <net/pkt_sched.h> 77#include <net/pkt_sched.h>
82#include <linux/if_tunnel.h> 78#include <linux/if_tunnel.h>
83#include <linux/rtnetlink.h> 79#include <linux/rtnetlink.h>
84#include <linux/netconf.h>
85 80
86#ifdef CONFIG_IPV6_PRIVACY 81#ifdef CONFIG_IPV6_PRIVACY
87#include <linux/random.h> 82#include <linux/random.h>
@@ -92,7 +87,6 @@
92 87
93#include <linux/proc_fs.h> 88#include <linux/proc_fs.h>
94#include <linux/seq_file.h> 89#include <linux/seq_file.h>
95#include <linux/export.h>
96 90
97/* Set to 3 to get tracing... */ 91/* Set to 3 to get tracing... */
98#define ACONF_DEBUG 2 92#define ACONF_DEBUG 2
@@ -128,8 +122,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
128#endif 122#endif
129 123
130#ifdef CONFIG_IPV6_PRIVACY 124#ifdef CONFIG_IPV6_PRIVACY
131static void __ipv6_regen_rndid(struct inet6_dev *idev); 125static int __ipv6_regen_rndid(struct inet6_dev *idev);
132static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 126static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
133static void ipv6_regen_rndid(unsigned long data); 127static void ipv6_regen_rndid(unsigned long data);
134#endif 128#endif
135 129
@@ -154,12 +148,7 @@ static void addrconf_type_change(struct net_device *dev,
154 unsigned long event); 148 unsigned long event);
155static int addrconf_ifdown(struct net_device *dev, int how); 149static int addrconf_ifdown(struct net_device *dev, int how);
156 150
157static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, 151static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
158 int plen,
159 const struct net_device *dev,
160 u32 flags, u32 noflags);
161
162static void addrconf_dad_start(struct inet6_ifaddr *ifp);
163static void addrconf_dad_timer(unsigned long data); 152static void addrconf_dad_timer(unsigned long data);
164static void addrconf_dad_completed(struct inet6_ifaddr *ifp); 153static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
165static void addrconf_dad_run(struct inet6_dev *idev); 154static void addrconf_dad_run(struct inet6_dev *idev);
@@ -255,6 +244,12 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
255 return !qdisc_tx_is_noop(dev); 244 return !qdisc_tx_is_noop(dev);
256} 245}
257 246
247/* Check if a route is valid prefix route */
248static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
249{
250 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
251}
252
258static void addrconf_del_timer(struct inet6_ifaddr *ifp) 253static void addrconf_del_timer(struct inet6_ifaddr *ifp)
259{ 254{
260 if (del_timer(&ifp->timer)) 255 if (del_timer(&ifp->timer))
@@ -330,19 +325,20 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
330 WARN_ON(idev->mc_list != NULL); 325 WARN_ON(idev->mc_list != NULL);
331 326
332#ifdef NET_REFCNT_DEBUG 327#ifdef NET_REFCNT_DEBUG
333 pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); 328 printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
334#endif 329#endif
335 dev_put(dev); 330 dev_put(dev);
336 if (!idev->dead) { 331 if (!idev->dead) {
337 pr_warn("Freeing alive inet6 device %p\n", idev); 332 pr_warning("Freeing alive inet6 device %p\n", idev);
338 return; 333 return;
339 } 334 }
340 snmp6_free_dev(idev); 335 snmp6_free_dev(idev);
341 kfree_rcu(idev, rcu); 336 kfree_rcu(idev, rcu);
342} 337}
338
343EXPORT_SYMBOL(in6_dev_finish_destroy); 339EXPORT_SYMBOL(in6_dev_finish_destroy);
344 340
345static struct inet6_dev *ipv6_add_dev(struct net_device *dev) 341static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
346{ 342{
347 struct inet6_dev *ndev; 343 struct inet6_dev *ndev;
348 344
@@ -375,7 +371,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
375 371
376 if (snmp6_alloc_dev(ndev) < 0) { 372 if (snmp6_alloc_dev(ndev) < 0) {
377 ADBG((KERN_WARNING 373 ADBG((KERN_WARNING
378 "%s: cannot allocate memory for statistics; dev=%s.\n", 374 "%s(): cannot allocate memory for statistics; dev=%s.\n",
379 __func__, dev->name)); 375 __func__, dev->name));
380 neigh_parms_release(&nd_tbl, ndev->nd_parms); 376 neigh_parms_release(&nd_tbl, ndev->nd_parms);
381 dev_put(dev); 377 dev_put(dev);
@@ -385,7 +381,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
385 381
386 if (snmp6_register_dev(ndev) < 0) { 382 if (snmp6_register_dev(ndev) < 0) {
387 ADBG((KERN_WARNING 383 ADBG((KERN_WARNING
388 "%s: cannot create /proc/net/dev_snmp6/%s\n", 384 "%s(): cannot create /proc/net/dev_snmp6/%s\n",
389 __func__, dev->name)); 385 __func__, dev->name));
390 neigh_parms_release(&nd_tbl, ndev->nd_parms); 386 neigh_parms_release(&nd_tbl, ndev->nd_parms);
391 ndev->dead = 1; 387 ndev->dead = 1;
@@ -401,9 +397,11 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
401 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 397 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
402 ndev->cnf.accept_dad = -1; 398 ndev->cnf.accept_dad = -1;
403 399
404#if IS_ENABLED(CONFIG_IPV6_SIT) 400#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
405 if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { 401 if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
406 pr_info("%s: Disabled Multicast RS\n", dev->name); 402 printk(KERN_INFO
403 "%s: Disabled Multicast RS\n",
404 dev->name);
407 ndev->cnf.rtr_solicits = 0; 405 ndev->cnf.rtr_solicits = 0;
408 } 406 }
409#endif 407#endif
@@ -435,14 +433,10 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
435 /* Join all-node multicast group */ 433 /* Join all-node multicast group */
436 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); 434 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
437 435
438 /* Join all-router multicast group if forwarding is set */
439 if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
440 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
441
442 return ndev; 436 return ndev;
443} 437}
444 438
445static struct inet6_dev *ipv6_find_idev(struct net_device *dev) 439static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
446{ 440{
447 struct inet6_dev *idev; 441 struct inet6_dev *idev;
448 442
@@ -460,149 +454,6 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
460 return idev; 454 return idev;
461} 455}
462 456
463static int inet6_netconf_msgsize_devconf(int type)
464{
465 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
466 + nla_total_size(4); /* NETCONFA_IFINDEX */
467
468 /* type -1 is used for ALL */
469 if (type == -1 || type == NETCONFA_FORWARDING)
470 size += nla_total_size(4);
471#ifdef CONFIG_IPV6_MROUTE
472 if (type == -1 || type == NETCONFA_MC_FORWARDING)
473 size += nla_total_size(4);
474#endif
475
476 return size;
477}
478
479static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
480 struct ipv6_devconf *devconf, u32 portid,
481 u32 seq, int event, unsigned int flags,
482 int type)
483{
484 struct nlmsghdr *nlh;
485 struct netconfmsg *ncm;
486
487 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
488 flags);
489 if (nlh == NULL)
490 return -EMSGSIZE;
491
492 ncm = nlmsg_data(nlh);
493 ncm->ncm_family = AF_INET6;
494
495 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
496 goto nla_put_failure;
497
498 /* type -1 is used for ALL */
499 if ((type == -1 || type == NETCONFA_FORWARDING) &&
500 nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
501 goto nla_put_failure;
502#ifdef CONFIG_IPV6_MROUTE
503 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
504 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
505 devconf->mc_forwarding) < 0)
506 goto nla_put_failure;
507#endif
508 return nlmsg_end(skb, nlh);
509
510nla_put_failure:
511 nlmsg_cancel(skb, nlh);
512 return -EMSGSIZE;
513}
514
515void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
516 struct ipv6_devconf *devconf)
517{
518 struct sk_buff *skb;
519 int err = -ENOBUFS;
520
521 skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
522 if (skb == NULL)
523 goto errout;
524
525 err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
526 RTM_NEWNETCONF, 0, type);
527 if (err < 0) {
528 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
529 WARN_ON(err == -EMSGSIZE);
530 kfree_skb(skb);
531 goto errout;
532 }
533 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
534 return;
535errout:
536 rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
537}
538
539static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
540 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
541 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
542};
543
544static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
545 struct nlmsghdr *nlh,
546 void *arg)
547{
548 struct net *net = sock_net(in_skb->sk);
549 struct nlattr *tb[NETCONFA_MAX+1];
550 struct netconfmsg *ncm;
551 struct sk_buff *skb;
552 struct ipv6_devconf *devconf;
553 struct inet6_dev *in6_dev;
554 struct net_device *dev;
555 int ifindex;
556 int err;
557
558 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
559 devconf_ipv6_policy);
560 if (err < 0)
561 goto errout;
562
563 err = EINVAL;
564 if (!tb[NETCONFA_IFINDEX])
565 goto errout;
566
567 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
568 switch (ifindex) {
569 case NETCONFA_IFINDEX_ALL:
570 devconf = net->ipv6.devconf_all;
571 break;
572 case NETCONFA_IFINDEX_DEFAULT:
573 devconf = net->ipv6.devconf_dflt;
574 break;
575 default:
576 dev = __dev_get_by_index(net, ifindex);
577 if (dev == NULL)
578 goto errout;
579 in6_dev = __in6_dev_get(dev);
580 if (in6_dev == NULL)
581 goto errout;
582 devconf = &in6_dev->cnf;
583 break;
584 }
585
586 err = -ENOBUFS;
587 skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC);
588 if (skb == NULL)
589 goto errout;
590
591 err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
592 NETLINK_CB(in_skb).portid,
593 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
594 -1);
595 if (err < 0) {
596 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
597 WARN_ON(err == -EMSGSIZE);
598 kfree_skb(skb);
599 goto errout;
600 }
601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
602errout:
603 return err;
604}
605
606#ifdef CONFIG_SYSCTL 457#ifdef CONFIG_SYSCTL
607static void dev_forward_change(struct inet6_dev *idev) 458static void dev_forward_change(struct inet6_dev *idev)
608{ 459{
@@ -614,7 +465,7 @@ static void dev_forward_change(struct inet6_dev *idev)
614 dev = idev->dev; 465 dev = idev->dev;
615 if (idev->cnf.forwarding) 466 if (idev->cnf.forwarding)
616 dev_disable_lro(dev); 467 dev_disable_lro(dev);
617 if (dev->flags & IFF_MULTICAST) { 468 if (dev && (dev->flags & IFF_MULTICAST)) {
618 if (idev->cnf.forwarding) 469 if (idev->cnf.forwarding)
619 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); 470 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
620 else 471 else
@@ -629,8 +480,6 @@ static void dev_forward_change(struct inet6_dev *idev)
629 else 480 else
630 addrconf_leave_anycast(ifa); 481 addrconf_leave_anycast(ifa);
631 } 482 }
632 inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
633 dev->ifindex, &idev->cnf);
634} 483}
635 484
636 485
@@ -639,7 +488,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
639 struct net_device *dev; 488 struct net_device *dev;
640 struct inet6_dev *idev; 489 struct inet6_dev *idev;
641 490
642 for_each_netdev(net, dev) { 491 rcu_read_lock();
492 for_each_netdev_rcu(net, dev) {
643 idev = __in6_dev_get(dev); 493 idev = __in6_dev_get(dev);
644 if (idev) { 494 if (idev) {
645 int changed = (!idev->cnf.forwarding) ^ (!newf); 495 int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -648,41 +498,32 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
648 dev_forward_change(idev); 498 dev_forward_change(idev);
649 } 499 }
650 } 500 }
501 rcu_read_unlock();
651} 502}
652 503
653static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) 504static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
654{ 505{
655 struct net *net; 506 struct net *net;
656 int old;
657
658 if (!rtnl_trylock())
659 return restart_syscall();
660 507
661 net = (struct net *)table->extra2; 508 net = (struct net *)table->extra2;
662 old = *p; 509 if (p == &net->ipv6.devconf_dflt->forwarding)
663 *p = newf;
664
665 if (p == &net->ipv6.devconf_dflt->forwarding) {
666 if ((!newf) ^ (!old))
667 inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
668 NETCONFA_IFINDEX_DEFAULT,
669 net->ipv6.devconf_dflt);
670 rtnl_unlock();
671 return 0; 510 return 0;
511
512 if (!rtnl_trylock()) {
513 /* Restore the original values before restarting */
514 *p = old;
515 return restart_syscall();
672 } 516 }
673 517
674 if (p == &net->ipv6.devconf_all->forwarding) { 518 if (p == &net->ipv6.devconf_all->forwarding) {
519 __s32 newf = net->ipv6.devconf_all->forwarding;
675 net->ipv6.devconf_dflt->forwarding = newf; 520 net->ipv6.devconf_dflt->forwarding = newf;
676 addrconf_forward_change(net, newf); 521 addrconf_forward_change(net, newf);
677 if ((!newf) ^ (!old)) 522 } else if ((!*p) ^ (!old))
678 inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
679 NETCONFA_IFINDEX_ALL,
680 net->ipv6.devconf_all);
681 } else if ((!newf) ^ (!old))
682 dev_forward_change((struct inet6_dev *)table->extra1); 523 dev_forward_change((struct inet6_dev *)table->extra1);
683 rtnl_unlock(); 524 rtnl_unlock();
684 525
685 if (newf) 526 if (*p)
686 rt6_purge_dflt_routers(net); 527 rt6_purge_dflt_routers(net);
687 return 1; 528 return 1;
688} 529}
@@ -694,7 +535,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
694 WARN_ON(!hlist_unhashed(&ifp->addr_lst)); 535 WARN_ON(!hlist_unhashed(&ifp->addr_lst));
695 536
696#ifdef NET_REFCNT_DEBUG 537#ifdef NET_REFCNT_DEBUG
697 pr_debug("%s\n", __func__); 538 printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
698#endif 539#endif
699 540
700 in6_dev_put(ifp->idev); 541 in6_dev_put(ifp->idev);
@@ -703,10 +544,10 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
703 pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); 544 pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
704 545
705 if (ifp->state != INET6_IFADDR_STATE_DEAD) { 546 if (ifp->state != INET6_IFADDR_STATE_DEAD) {
706 pr_warn("Freeing alive inet6 address %p\n", ifp); 547 pr_warning("Freeing alive inet6 address %p\n", ifp);
707 return; 548 return;
708 } 549 }
709 ip6_rt_put(ifp->rt); 550 dst_release(&ifp->rt->dst);
710 551
711 kfree_rcu(ifp, rcu); 552 kfree_rcu(ifp, rcu);
712} 553}
@@ -731,9 +572,15 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
731 list_add_tail(&ifp->if_list, p); 572 list_add_tail(&ifp->if_list, p);
732} 573}
733 574
734static u32 inet6_addr_hash(const struct in6_addr *addr) 575static u32 ipv6_addr_hash(const struct in6_addr *addr)
735{ 576{
736 return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); 577 /*
578 * We perform the hash function over the last 64 bits of the address
579 * This will include the IEEE address token on links that support it.
580 */
581 return jhash_2words((__force u32)addr->s6_addr32[2],
582 (__force u32)addr->s6_addr32[3], 0)
583 & (IN6_ADDR_HSIZE - 1);
737} 584}
738 585
739/* On success it returns ifp with increased reference count */ 586/* On success it returns ifp with increased reference count */
@@ -782,13 +629,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
782 goto out; 629 goto out;
783 } 630 }
784 631
785 rt = addrconf_dst_alloc(idev, addr, false); 632 rt = addrconf_dst_alloc(idev, addr, 0);
786 if (IS_ERR(rt)) { 633 if (IS_ERR(rt)) {
787 err = PTR_ERR(rt); 634 err = PTR_ERR(rt);
788 goto out; 635 goto out;
789 } 636 }
790 637
791 ifa->addr = *addr; 638 ipv6_addr_copy(&ifa->addr, addr);
792 639
793 spin_lock_init(&ifa->lock); 640 spin_lock_init(&ifa->lock);
794 spin_lock_init(&ifa->state_lock); 641 spin_lock_init(&ifa->state_lock);
@@ -802,13 +649,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
802 649
803 ifa->rt = rt; 650 ifa->rt = rt;
804 651
652 /*
653 * part one of RFC 4429, section 3.3
654 * We should not configure an address as
655 * optimistic if we do not yet know the link
656 * layer address of our nexhop router
657 */
658
659 if (dst_get_neighbour_raw(&rt->dst) == NULL)
660 ifa->flags &= ~IFA_F_OPTIMISTIC;
661
805 ifa->idev = idev; 662 ifa->idev = idev;
806 in6_dev_hold(idev); 663 in6_dev_hold(idev);
807 /* For caller */ 664 /* For caller */
808 in6_ifa_hold(ifa); 665 in6_ifa_hold(ifa);
809 666
810 /* Add to big hash table */ 667 /* Add to big hash table */
811 hash = inet6_addr_hash(addr); 668 hash = ipv6_addr_hash(addr);
812 669
813 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); 670 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
814 spin_unlock(&addrconf_hash_lock); 671 spin_unlock(&addrconf_hash_lock);
@@ -940,23 +797,20 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
940 if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { 797 if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
941 struct in6_addr prefix; 798 struct in6_addr prefix;
942 struct rt6_info *rt; 799 struct rt6_info *rt;
943 800 struct net *net = dev_net(ifp->idev->dev);
944 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); 801 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
802 rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
945 803
946 rt = addrconf_get_prefix_route(&prefix, 804 if (rt && addrconf_is_prefix_route(rt)) {
947 ifp->prefix_len,
948 ifp->idev->dev,
949 0, RTF_GATEWAY | RTF_DEFAULT);
950
951 if (rt) {
952 if (onlink == 0) { 805 if (onlink == 0) {
953 ip6_del_rt(rt); 806 ip6_del_rt(rt);
954 rt = NULL; 807 rt = NULL;
955 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { 808 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
956 rt6_set_expires(rt, expires); 809 rt->rt6i_expires = expires;
810 rt->rt6i_flags |= RTF_EXPIRES;
957 } 811 }
958 } 812 }
959 ip6_rt_put(rt); 813 dst_release(&rt->dst);
960 } 814 }
961 815
962 /* clean up prefsrc entries */ 816 /* clean up prefsrc entries */
@@ -991,7 +845,8 @@ retry:
991 in6_dev_hold(idev); 845 in6_dev_hold(idev);
992 if (idev->cnf.use_tempaddr <= 0) { 846 if (idev->cnf.use_tempaddr <= 0) {
993 write_unlock(&idev->lock); 847 write_unlock(&idev->lock);
994 pr_info("%s: use_tempaddr is disabled\n", __func__); 848 printk(KERN_INFO
849 "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
995 in6_dev_put(idev); 850 in6_dev_put(idev);
996 ret = -1; 851 ret = -1;
997 goto out; 852 goto out;
@@ -1001,15 +856,24 @@ retry:
1001 idev->cnf.use_tempaddr = -1; /*XXX*/ 856 idev->cnf.use_tempaddr = -1; /*XXX*/
1002 spin_unlock_bh(&ifp->lock); 857 spin_unlock_bh(&ifp->lock);
1003 write_unlock(&idev->lock); 858 write_unlock(&idev->lock);
1004 pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", 859 printk(KERN_WARNING
1005 __func__); 860 "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
1006 in6_dev_put(idev); 861 in6_dev_put(idev);
1007 ret = -1; 862 ret = -1;
1008 goto out; 863 goto out;
1009 } 864 }
1010 in6_ifa_hold(ifp); 865 in6_ifa_hold(ifp);
1011 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); 866 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
1012 __ipv6_try_regen_rndid(idev, tmpaddr); 867 if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
868 spin_unlock_bh(&ifp->lock);
869 write_unlock(&idev->lock);
870 printk(KERN_WARNING
871 "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
872 in6_ifa_put(ifp);
873 in6_dev_put(idev);
874 ret = -1;
875 goto out;
876 }
1013 memcpy(&addr.s6_addr[8], idev->rndid, 8); 877 memcpy(&addr.s6_addr[8], idev->rndid, 8);
1014 age = (now - ifp->tstamp) / HZ; 878 age = (now - ifp->tstamp) / HZ;
1015 tmp_valid_lft = min_t(__u32, 879 tmp_valid_lft = min_t(__u32,
@@ -1054,7 +918,8 @@ retry:
1054 if (!ift || IS_ERR(ift)) { 918 if (!ift || IS_ERR(ift)) {
1055 in6_ifa_put(ifp); 919 in6_ifa_put(ifp);
1056 in6_dev_put(idev); 920 in6_dev_put(idev);
1057 pr_info("%s: retry temporary address regeneration\n", __func__); 921 printk(KERN_INFO
922 "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
1058 tmpaddr = &addr; 923 tmpaddr = &addr;
1059 write_lock(&idev->lock); 924 write_lock(&idev->lock);
1060 goto retry; 925 goto retry;
@@ -1068,7 +933,7 @@ retry:
1068 ift->tstamp = tmp_tstamp; 933 ift->tstamp = tmp_tstamp;
1069 spin_unlock_bh(&ift->lock); 934 spin_unlock_bh(&ift->lock);
1070 935
1071 addrconf_dad_start(ift); 936 addrconf_dad_start(ift, 0);
1072 in6_ifa_put(ift); 937 in6_ifa_put(ift);
1073 in6_dev_put(idev); 938 in6_dev_put(idev);
1074out: 939out:
@@ -1227,10 +1092,8 @@ static int ipv6_get_saddr_eval(struct net *net,
1227 break; 1092 break;
1228 case IPV6_SADDR_RULE_PREFIX: 1093 case IPV6_SADDR_RULE_PREFIX:
1229 /* Rule 8: Use longest matching prefix */ 1094 /* Rule 8: Use longest matching prefix */
1230 ret = ipv6_addr_diff(&score->ifa->addr, dst->addr); 1095 score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr,
1231 if (ret > score->ifa->prefix_len) 1096 dst->addr);
1232 ret = score->ifa->prefix_len;
1233 score->matchlen = ret;
1234 break; 1097 break;
1235 default: 1098 default:
1236 ret = 0; 1099 ret = 0;
@@ -1243,7 +1106,7 @@ out:
1243 return ret; 1106 return ret;
1244} 1107}
1245 1108
1246int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, 1109int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
1247 const struct in6_addr *daddr, unsigned int prefs, 1110 const struct in6_addr *daddr, unsigned int prefs,
1248 struct in6_addr *saddr) 1111 struct in6_addr *saddr)
1249{ 1112{
@@ -1364,7 +1227,7 @@ try_nextdev:
1364 if (!hiscore->ifa) 1227 if (!hiscore->ifa)
1365 return -EADDRNOTAVAIL; 1228 return -EADDRNOTAVAIL;
1366 1229
1367 *saddr = hiscore->ifa->addr; 1230 ipv6_addr_copy(saddr, &hiscore->ifa->addr);
1368 in6_ifa_put(hiscore->ifa); 1231 in6_ifa_put(hiscore->ifa);
1369 return 0; 1232 return 0;
1370} 1233}
@@ -1385,7 +1248,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1385 list_for_each_entry(ifp, &idev->addr_list, if_list) { 1248 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1386 if (ifp->scope == IFA_LINK && 1249 if (ifp->scope == IFA_LINK &&
1387 !(ifp->flags & banned_flags)) { 1250 !(ifp->flags & banned_flags)) {
1388 *addr = ifp->addr; 1251 ipv6_addr_copy(addr, &ifp->addr);
1389 err = 0; 1252 err = 0;
1390 break; 1253 break;
1391 } 1254 }
@@ -1413,7 +1276,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
1413{ 1276{
1414 struct inet6_ifaddr *ifp; 1277 struct inet6_ifaddr *ifp;
1415 struct hlist_node *node; 1278 struct hlist_node *node;
1416 unsigned int hash = inet6_addr_hash(addr); 1279 unsigned int hash = ipv6_addr_hash(addr);
1417 1280
1418 rcu_read_lock_bh(); 1281 rcu_read_lock_bh();
1419 hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { 1282 hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1436,7 +1299,7 @@ EXPORT_SYMBOL(ipv6_chk_addr);
1436static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, 1299static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1437 struct net_device *dev) 1300 struct net_device *dev)
1438{ 1301{
1439 unsigned int hash = inet6_addr_hash(addr); 1302 unsigned int hash = ipv6_addr_hash(addr);
1440 struct inet6_ifaddr *ifp; 1303 struct inet6_ifaddr *ifp;
1441 struct hlist_node *node; 1304 struct hlist_node *node;
1442 1305
@@ -1473,13 +1336,14 @@ int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1473 rcu_read_unlock(); 1336 rcu_read_unlock();
1474 return onlink; 1337 return onlink;
1475} 1338}
1339
1476EXPORT_SYMBOL(ipv6_chk_prefix); 1340EXPORT_SYMBOL(ipv6_chk_prefix);
1477 1341
1478struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, 1342struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
1479 struct net_device *dev, int strict) 1343 struct net_device *dev, int strict)
1480{ 1344{
1481 struct inet6_ifaddr *ifp, *result = NULL; 1345 struct inet6_ifaddr *ifp, *result = NULL;
1482 unsigned int hash = inet6_addr_hash(addr); 1346 unsigned int hash = ipv6_addr_hash(addr);
1483 struct hlist_node *node; 1347 struct hlist_node *node;
1484 1348
1485 rcu_read_lock_bh(); 1349 rcu_read_lock_bh();
@@ -1556,8 +1420,9 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1556 return; 1420 return;
1557 } 1421 }
1558 1422
1559 net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", 1423 if (net_ratelimit())
1560 ifp->idev->dev->name, &ifp->addr); 1424 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
1425 ifp->idev->dev->name, &ifp->addr);
1561 1426
1562 if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { 1427 if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
1563 struct in6_addr addr; 1428 struct in6_addr addr;
@@ -1570,7 +1435,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1570 /* DAD failed for link-local based on MAC address */ 1435 /* DAD failed for link-local based on MAC address */
1571 idev->cnf.disable_ipv6 = 1; 1436 idev->cnf.disable_ipv6 = 1;
1572 1437
1573 pr_info("%s: IPv6 being disabled!\n", 1438 printk(KERN_INFO "%s: IPv6 being disabled!\n",
1574 ifp->idev->dev->name); 1439 ifp->idev->dev->name);
1575 } 1440 }
1576 } 1441 }
@@ -1655,21 +1520,13 @@ static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
1655 return 0; 1520 return 0;
1656} 1521}
1657 1522
1658static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
1659{
1660 if (dev->addr_len != IEEE802154_ADDR_LEN)
1661 return -1;
1662 memcpy(eui, dev->dev_addr, 8);
1663 return 0;
1664}
1665
1666static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) 1523static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
1667{ 1524{
1668 /* XXX: inherit EUI-64 from other interface -- yoshfuji */ 1525 /* XXX: inherit EUI-64 from other interface -- yoshfuji */
1669 if (dev->addr_len != ARCNET_ALEN) 1526 if (dev->addr_len != ARCNET_ALEN)
1670 return -1; 1527 return -1;
1671 memset(eui, 0, 7); 1528 memset(eui, 0, 7);
1672 eui[7] = *(u8 *)dev->dev_addr; 1529 eui[7] = *(u8*)dev->dev_addr;
1673 return 0; 1530 return 0;
1674} 1531}
1675 1532
@@ -1716,6 +1573,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1716 switch (dev->type) { 1573 switch (dev->type) {
1717 case ARPHRD_ETHER: 1574 case ARPHRD_ETHER:
1718 case ARPHRD_FDDI: 1575 case ARPHRD_FDDI:
1576 case ARPHRD_IEEE802_TR:
1719 return addrconf_ifid_eui48(eui, dev); 1577 return addrconf_ifid_eui48(eui, dev);
1720 case ARPHRD_ARCNET: 1578 case ARPHRD_ARCNET:
1721 return addrconf_ifid_arcnet(eui, dev); 1579 return addrconf_ifid_arcnet(eui, dev);
@@ -1725,8 +1583,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1725 return addrconf_ifid_sit(eui, dev); 1583 return addrconf_ifid_sit(eui, dev);
1726 case ARPHRD_IPGRE: 1584 case ARPHRD_IPGRE:
1727 return addrconf_ifid_gre(eui, dev); 1585 return addrconf_ifid_gre(eui, dev);
1728 case ARPHRD_IEEE802154:
1729 return addrconf_ifid_eui64(eui, dev);
1730 } 1586 }
1731 return -1; 1587 return -1;
1732} 1588}
@@ -1750,7 +1606,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
1750 1606
1751#ifdef CONFIG_IPV6_PRIVACY 1607#ifdef CONFIG_IPV6_PRIVACY
1752/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ 1608/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
1753static void __ipv6_regen_rndid(struct inet6_dev *idev) 1609static int __ipv6_regen_rndid(struct inet6_dev *idev)
1754{ 1610{
1755regen: 1611regen:
1756 get_random_bytes(idev->rndid, sizeof(idev->rndid)); 1612 get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1777,6 +1633,8 @@ regen:
1777 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) 1633 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
1778 goto regen; 1634 goto regen;
1779 } 1635 }
1636
1637 return 0;
1780} 1638}
1781 1639
1782static void ipv6_regen_rndid(unsigned long data) 1640static void ipv6_regen_rndid(unsigned long data)
@@ -1790,15 +1648,17 @@ static void ipv6_regen_rndid(unsigned long data)
1790 if (idev->dead) 1648 if (idev->dead)
1791 goto out; 1649 goto out;
1792 1650
1793 __ipv6_regen_rndid(idev); 1651 if (__ipv6_regen_rndid(idev) < 0)
1652 goto out;
1794 1653
1795 expires = jiffies + 1654 expires = jiffies +
1796 idev->cnf.temp_prefered_lft * HZ - 1655 idev->cnf.temp_prefered_lft * HZ -
1797 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - 1656 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
1798 idev->cnf.max_desync_factor * HZ; 1657 idev->cnf.max_desync_factor * HZ;
1799 if (time_before(expires, jiffies)) { 1658 if (time_before(expires, jiffies)) {
1800 pr_warn("%s: too short regeneration interval; timer disabled for %s\n", 1659 printk(KERN_WARNING
1801 __func__, idev->dev->name); 1660 "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
1661 idev->dev->name);
1802 goto out; 1662 goto out;
1803 } 1663 }
1804 1664
@@ -1811,10 +1671,12 @@ out:
1811 in6_dev_put(idev); 1671 in6_dev_put(idev);
1812} 1672}
1813 1673
1814static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) 1674static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
1815{ 1675 int ret = 0;
1676
1816 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) 1677 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
1817 __ipv6_regen_rndid(idev); 1678 ret = __ipv6_regen_rndid(idev);
1679 return ret;
1818} 1680}
1819#endif 1681#endif
1820 1682
@@ -1837,13 +1699,13 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1837 .fc_protocol = RTPROT_KERNEL, 1699 .fc_protocol = RTPROT_KERNEL,
1838 }; 1700 };
1839 1701
1840 cfg.fc_dst = *pfx; 1702 ipv6_addr_copy(&cfg.fc_dst, pfx);
1841 1703
1842 /* Prevent useless cloning on PtP SIT. 1704 /* Prevent useless cloning on PtP SIT.
1843 This thing is done here expecting that the whole 1705 This thing is done here expecting that the whole
1844 class of non-broadcast devices need not cloning. 1706 class of non-broadcast devices need not cloning.
1845 */ 1707 */
1846#if IS_ENABLED(CONFIG_IPV6_SIT) 1708#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
1847 if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) 1709 if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
1848 cfg.fc_flags |= RTF_NONEXTHOP; 1710 cfg.fc_flags |= RTF_NONEXTHOP;
1849#endif 1711#endif
@@ -1851,40 +1713,6 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1851 ip6_route_add(&cfg); 1713 ip6_route_add(&cfg);
1852} 1714}
1853 1715
1854
1855static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
1856 int plen,
1857 const struct net_device *dev,
1858 u32 flags, u32 noflags)
1859{
1860 struct fib6_node *fn;
1861 struct rt6_info *rt = NULL;
1862 struct fib6_table *table;
1863
1864 table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
1865 if (table == NULL)
1866 return NULL;
1867
1868 read_lock_bh(&table->tb6_lock);
1869 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
1870 if (!fn)
1871 goto out;
1872 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1873 if (rt->dst.dev->ifindex != dev->ifindex)
1874 continue;
1875 if ((rt->rt6i_flags & flags) != flags)
1876 continue;
1877 if ((rt->rt6i_flags & noflags) != 0)
1878 continue;
1879 dst_hold(&rt->dst);
1880 break;
1881 }
1882out:
1883 read_unlock_bh(&table->tb6_lock);
1884 return rt;
1885}
1886
1887
1888/* Create "default" multicast route to the interface */ 1716/* Create "default" multicast route to the interface */
1889 1717
1890static void addrconf_add_mroute(struct net_device *dev) 1718static void addrconf_add_mroute(struct net_device *dev)
@@ -1903,7 +1731,7 @@ static void addrconf_add_mroute(struct net_device *dev)
1903 ip6_route_add(&cfg); 1731 ip6_route_add(&cfg);
1904} 1732}
1905 1733
1906#if IS_ENABLED(CONFIG_IPV6_SIT) 1734#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
1907static void sit_route_add(struct net_device *dev) 1735static void sit_route_add(struct net_device *dev)
1908{ 1736{
1909 struct fib6_config cfg = { 1737 struct fib6_config cfg = {
@@ -1920,6 +1748,14 @@ static void sit_route_add(struct net_device *dev)
1920} 1748}
1921#endif 1749#endif
1922 1750
1751static void addrconf_add_lroute(struct net_device *dev)
1752{
1753 struct in6_addr addr;
1754
1755 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
1756 addrconf_prefix_route(&addr, 64, dev, 0, 0);
1757}
1758
1923static struct inet6_dev *addrconf_add_dev(struct net_device *dev) 1759static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1924{ 1760{
1925 struct inet6_dev *idev; 1761 struct inet6_dev *idev;
@@ -1934,13 +1770,14 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1934 return ERR_PTR(-EACCES); 1770 return ERR_PTR(-EACCES);
1935 1771
1936 /* Add default multicast route */ 1772 /* Add default multicast route */
1937 if (!(dev->flags & IFF_LOOPBACK)) 1773 addrconf_add_mroute(dev);
1938 addrconf_add_mroute(dev);
1939 1774
1775 /* Add link local route */
1776 addrconf_add_lroute(dev);
1940 return idev; 1777 return idev;
1941} 1778}
1942 1779
1943void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) 1780void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1944{ 1781{
1945 struct prefix_info *pinfo; 1782 struct prefix_info *pinfo;
1946 __u32 valid_lft; 1783 __u32 valid_lft;
@@ -1969,15 +1806,16 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
1969 prefered_lft = ntohl(pinfo->prefered); 1806 prefered_lft = ntohl(pinfo->prefered);
1970 1807
1971 if (prefered_lft > valid_lft) { 1808 if (prefered_lft > valid_lft) {
1972 net_warn_ratelimited("addrconf: prefix option has invalid lifetime\n"); 1809 if (net_ratelimit())
1810 printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
1973 return; 1811 return;
1974 } 1812 }
1975 1813
1976 in6_dev = in6_dev_get(dev); 1814 in6_dev = in6_dev_get(dev);
1977 1815
1978 if (in6_dev == NULL) { 1816 if (in6_dev == NULL) {
1979 net_dbg_ratelimited("addrconf: device %s not configured\n", 1817 if (net_ratelimit())
1980 dev->name); 1818 printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
1981 return; 1819 return;
1982 } 1820 }
1983 1821
@@ -2004,22 +1842,21 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2004 if (addrconf_finite_timeout(rt_expires)) 1842 if (addrconf_finite_timeout(rt_expires))
2005 rt_expires *= HZ; 1843 rt_expires *= HZ;
2006 1844
2007 rt = addrconf_get_prefix_route(&pinfo->prefix, 1845 rt = rt6_lookup(net, &pinfo->prefix, NULL,
2008 pinfo->prefix_len, 1846 dev->ifindex, 1);
2009 dev,
2010 RTF_ADDRCONF | RTF_PREFIX_RT,
2011 RTF_GATEWAY | RTF_DEFAULT);
2012 1847
2013 if (rt) { 1848 if (rt && addrconf_is_prefix_route(rt)) {
2014 /* Autoconf prefix route */ 1849 /* Autoconf prefix route */
2015 if (valid_lft == 0) { 1850 if (valid_lft == 0) {
2016 ip6_del_rt(rt); 1851 ip6_del_rt(rt);
2017 rt = NULL; 1852 rt = NULL;
2018 } else if (addrconf_finite_timeout(rt_expires)) { 1853 } else if (addrconf_finite_timeout(rt_expires)) {
2019 /* not infinity */ 1854 /* not infinity */
2020 rt6_set_expires(rt, jiffies + rt_expires); 1855 rt->rt6i_expires = jiffies + rt_expires;
1856 rt->rt6i_flags |= RTF_EXPIRES;
2021 } else { 1857 } else {
2022 rt6_clean_expires(rt); 1858 rt->rt6i_flags &= ~RTF_EXPIRES;
1859 rt->rt6i_expires = 0;
2023 } 1860 }
2024 } else if (valid_lft) { 1861 } else if (valid_lft) {
2025 clock_t expires = 0; 1862 clock_t expires = 0;
@@ -2032,13 +1869,14 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2032 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, 1869 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
2033 dev, expires, flags); 1870 dev, expires, flags);
2034 } 1871 }
2035 ip6_rt_put(rt); 1872 if (rt)
1873 dst_release(&rt->dst);
2036 } 1874 }
2037 1875
2038 /* Try to figure out our local address for this prefix */ 1876 /* Try to figure out our local address for this prefix */
2039 1877
2040 if (pinfo->autoconf && in6_dev->cnf.autoconf) { 1878 if (pinfo->autoconf && in6_dev->cnf.autoconf) {
2041 struct inet6_ifaddr *ifp; 1879 struct inet6_ifaddr * ifp;
2042 struct in6_addr addr; 1880 struct in6_addr addr;
2043 int create = 0, update_lft = 0; 1881 int create = 0, update_lft = 0;
2044 1882
@@ -2051,8 +1889,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2051 } 1889 }
2052 goto ok; 1890 goto ok;
2053 } 1891 }
2054 net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n", 1892 if (net_ratelimit())
2055 pinfo->prefix_len); 1893 printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
1894 pinfo->prefix_len);
2056 in6_dev_put(in6_dev); 1895 in6_dev_put(in6_dev);
2057 return; 1896 return;
2058 1897
@@ -2066,7 +1905,7 @@ ok:
2066 1905
2067#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 1906#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
2068 if (in6_dev->cnf.optimistic_dad && 1907 if (in6_dev->cnf.optimistic_dad &&
2069 !net->ipv6.devconf_all->forwarding && sllao) 1908 !net->ipv6.devconf_all->forwarding)
2070 addr_flags = IFA_F_OPTIMISTIC; 1909 addr_flags = IFA_F_OPTIMISTIC;
2071#endif 1910#endif
2072 1911
@@ -2086,7 +1925,7 @@ ok:
2086 1925
2087 update_lft = create = 1; 1926 update_lft = create = 1;
2088 ifp->cstamp = jiffies; 1927 ifp->cstamp = jiffies;
2089 addrconf_dad_start(ifp); 1928 addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
2090 } 1929 }
2091 1930
2092 if (ifp) { 1931 if (ifp) {
@@ -2254,7 +2093,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
2254 if (dev == NULL) 2093 if (dev == NULL)
2255 goto err_exit; 2094 goto err_exit;
2256 2095
2257#if IS_ENABLED(CONFIG_IPV6_SIT) 2096#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2258 if (dev->type == ARPHRD_SIT) { 2097 if (dev->type == ARPHRD_SIT) {
2259 const struct net_device_ops *ops = dev->netdev_ops; 2098 const struct net_device_ops *ops = dev->netdev_ops;
2260 struct ifreq ifr; 2099 struct ifreq ifr;
@@ -2365,7 +2204,7 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
2365 * that the Optimistic flag should not be set for 2204 * that the Optimistic flag should not be set for
2366 * manually configured addresses 2205 * manually configured addresses
2367 */ 2206 */
2368 addrconf_dad_start(ifp); 2207 addrconf_dad_start(ifp, 0);
2369 in6_ifa_put(ifp); 2208 in6_ifa_put(ifp);
2370 addrconf_verify(0); 2209 addrconf_verify(0);
2371 return 0; 2210 return 0;
@@ -2418,7 +2257,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
2418 struct in6_ifreq ireq; 2257 struct in6_ifreq ireq;
2419 int err; 2258 int err;
2420 2259
2421 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2260 if (!capable(CAP_NET_ADMIN))
2422 return -EPERM; 2261 return -EPERM;
2423 2262
2424 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) 2263 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2437,7 +2276,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
2437 struct in6_ifreq ireq; 2276 struct in6_ifreq ireq;
2438 int err; 2277 int err;
2439 2278
2440 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2279 if (!capable(CAP_NET_ADMIN))
2441 return -EPERM; 2280 return -EPERM;
2442 2281
2443 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) 2282 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
@@ -2465,7 +2304,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2465 } 2304 }
2466} 2305}
2467 2306
2468#if IS_ENABLED(CONFIG_IPV6_SIT) 2307#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2469static void sit_add_v4_addrs(struct inet6_dev *idev) 2308static void sit_add_v4_addrs(struct inet6_dev *idev)
2470{ 2309{
2471 struct in6_addr addr; 2310 struct in6_addr addr;
@@ -2491,9 +2330,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2491 } 2330 }
2492 2331
2493 for_each_netdev(net, dev) { 2332 for_each_netdev(net, dev) {
2494 struct in_device *in_dev = __in_dev_get_rtnl(dev); 2333 struct in_device * in_dev = __in_dev_get_rtnl(dev);
2495 if (in_dev && (dev->flags & IFF_UP)) { 2334 if (in_dev && (dev->flags & IFF_UP)) {
2496 struct in_ifaddr *ifa; 2335 struct in_ifaddr * ifa;
2497 2336
2498 int flag = scope; 2337 int flag = scope;
2499 2338
@@ -2530,7 +2369,7 @@ static void init_loopback(struct net_device *dev)
2530 ASSERT_RTNL(); 2369 ASSERT_RTNL();
2531 2370
2532 if ((idev = ipv6_find_idev(dev)) == NULL) { 2371 if ((idev = ipv6_find_idev(dev)) == NULL) {
2533 pr_debug("%s: add_dev failed\n", __func__); 2372 printk(KERN_DEBUG "init loopback: add_dev failed\n");
2534 return; 2373 return;
2535 } 2374 }
2536 2375
@@ -2539,7 +2378,7 @@ static void init_loopback(struct net_device *dev)
2539 2378
2540static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) 2379static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
2541{ 2380{
2542 struct inet6_ifaddr *ifp; 2381 struct inet6_ifaddr * ifp;
2543 u32 addr_flags = IFA_F_PERMANENT; 2382 u32 addr_flags = IFA_F_PERMANENT;
2544 2383
2545#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 2384#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -2552,7 +2391,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
2552 ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); 2391 ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
2553 if (!IS_ERR(ifp)) { 2392 if (!IS_ERR(ifp)) {
2554 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); 2393 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
2555 addrconf_dad_start(ifp); 2394 addrconf_dad_start(ifp, 0);
2556 in6_ifa_put(ifp); 2395 in6_ifa_put(ifp);
2557 } 2396 }
2558} 2397}
@@ -2560,15 +2399,15 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
2560static void addrconf_dev_config(struct net_device *dev) 2399static void addrconf_dev_config(struct net_device *dev)
2561{ 2400{
2562 struct in6_addr addr; 2401 struct in6_addr addr;
2563 struct inet6_dev *idev; 2402 struct inet6_dev * idev;
2564 2403
2565 ASSERT_RTNL(); 2404 ASSERT_RTNL();
2566 2405
2567 if ((dev->type != ARPHRD_ETHER) && 2406 if ((dev->type != ARPHRD_ETHER) &&
2568 (dev->type != ARPHRD_FDDI) && 2407 (dev->type != ARPHRD_FDDI) &&
2408 (dev->type != ARPHRD_IEEE802_TR) &&
2569 (dev->type != ARPHRD_ARCNET) && 2409 (dev->type != ARPHRD_ARCNET) &&
2570 (dev->type != ARPHRD_INFINIBAND) && 2410 (dev->type != ARPHRD_INFINIBAND)) {
2571 (dev->type != ARPHRD_IEEE802154)) {
2572 /* Alas, we support only Ethernet autoconfiguration. */ 2411 /* Alas, we support only Ethernet autoconfiguration. */
2573 return; 2412 return;
2574 } 2413 }
@@ -2584,7 +2423,7 @@ static void addrconf_dev_config(struct net_device *dev)
2584 addrconf_add_linklocal(idev, &addr); 2423 addrconf_add_linklocal(idev, &addr);
2585} 2424}
2586 2425
2587#if IS_ENABLED(CONFIG_IPV6_SIT) 2426#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2588static void addrconf_sit_config(struct net_device *dev) 2427static void addrconf_sit_config(struct net_device *dev)
2589{ 2428{
2590 struct inet6_dev *idev; 2429 struct inet6_dev *idev;
@@ -2598,7 +2437,7 @@ static void addrconf_sit_config(struct net_device *dev)
2598 */ 2437 */
2599 2438
2600 if ((idev = ipv6_find_idev(dev)) == NULL) { 2439 if ((idev = ipv6_find_idev(dev)) == NULL) {
2601 pr_debug("%s: add_dev failed\n", __func__); 2440 printk(KERN_DEBUG "init sit: add_dev failed\n");
2602 return; 2441 return;
2603 } 2442 }
2604 2443
@@ -2614,25 +2453,26 @@ static void addrconf_sit_config(struct net_device *dev)
2614 2453
2615 sit_add_v4_addrs(idev); 2454 sit_add_v4_addrs(idev);
2616 2455
2617 if (dev->flags&IFF_POINTOPOINT) 2456 if (dev->flags&IFF_POINTOPOINT) {
2618 addrconf_add_mroute(dev); 2457 addrconf_add_mroute(dev);
2619 else 2458 addrconf_add_lroute(dev);
2459 } else
2620 sit_route_add(dev); 2460 sit_route_add(dev);
2621} 2461}
2622#endif 2462#endif
2623 2463
2624#if IS_ENABLED(CONFIG_NET_IPGRE) 2464#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
2625static void addrconf_gre_config(struct net_device *dev) 2465static void addrconf_gre_config(struct net_device *dev)
2626{ 2466{
2627 struct inet6_dev *idev; 2467 struct inet6_dev *idev;
2628 struct in6_addr addr; 2468 struct in6_addr addr;
2629 2469
2630 pr_info("%s(%s)\n", __func__, dev->name); 2470 pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name);
2631 2471
2632 ASSERT_RTNL(); 2472 ASSERT_RTNL();
2633 2473
2634 if ((idev = ipv6_find_idev(dev)) == NULL) { 2474 if ((idev = ipv6_find_idev(dev)) == NULL) {
2635 pr_debug("%s: add_dev failed\n", __func__); 2475 printk(KERN_DEBUG "init gre: add_dev failed\n");
2636 return; 2476 return;
2637 } 2477 }
2638 2478
@@ -2672,7 +2512,7 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
2672 if (!ipv6_inherit_linklocal(idev, link_dev)) 2512 if (!ipv6_inherit_linklocal(idev, link_dev))
2673 return; 2513 return;
2674 } 2514 }
2675 pr_debug("init ip6-ip6: add_linklocal failed\n"); 2515 printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
2676} 2516}
2677 2517
2678/* 2518/*
@@ -2688,14 +2528,14 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
2688 2528
2689 idev = addrconf_add_dev(dev); 2529 idev = addrconf_add_dev(dev);
2690 if (IS_ERR(idev)) { 2530 if (IS_ERR(idev)) {
2691 pr_debug("init ip6-ip6: add_dev failed\n"); 2531 printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
2692 return; 2532 return;
2693 } 2533 }
2694 ip6_tnl_add_linklocal(idev); 2534 ip6_tnl_add_linklocal(idev);
2695} 2535}
2696 2536
2697static int addrconf_notify(struct notifier_block *this, unsigned long event, 2537static int addrconf_notify(struct notifier_block *this, unsigned long event,
2698 void *data) 2538 void * data)
2699{ 2539{
2700 struct net_device *dev = (struct net_device *) data; 2540 struct net_device *dev = (struct net_device *) data;
2701 struct inet6_dev *idev = __in6_dev_get(dev); 2541 struct inet6_dev *idev = __in6_dev_get(dev);
@@ -2719,7 +2559,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2719 if (event == NETDEV_UP) { 2559 if (event == NETDEV_UP) {
2720 if (!addrconf_qdisc_ok(dev)) { 2560 if (!addrconf_qdisc_ok(dev)) {
2721 /* device is not ready yet. */ 2561 /* device is not ready yet. */
2722 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", 2562 printk(KERN_INFO
2563 "ADDRCONF(NETDEV_UP): %s: "
2564 "link is not ready\n",
2723 dev->name); 2565 dev->name);
2724 break; 2566 break;
2725 } 2567 }
@@ -2744,19 +2586,21 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2744 idev->if_flags |= IF_READY; 2586 idev->if_flags |= IF_READY;
2745 } 2587 }
2746 2588
2747 pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n", 2589 printk(KERN_INFO
2748 dev->name); 2590 "ADDRCONF(NETDEV_CHANGE): %s: "
2591 "link becomes ready\n",
2592 dev->name);
2749 2593
2750 run_pending = 1; 2594 run_pending = 1;
2751 } 2595 }
2752 2596
2753 switch (dev->type) { 2597 switch (dev->type) {
2754#if IS_ENABLED(CONFIG_IPV6_SIT) 2598#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2755 case ARPHRD_SIT: 2599 case ARPHRD_SIT:
2756 addrconf_sit_config(dev); 2600 addrconf_sit_config(dev);
2757 break; 2601 break;
2758#endif 2602#endif
2759#if IS_ENABLED(CONFIG_NET_IPGRE) 2603#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE)
2760 case ARPHRD_IPGRE: 2604 case ARPHRD_IPGRE:
2761 addrconf_gre_config(dev); 2605 addrconf_gre_config(dev);
2762 break; 2606 break;
@@ -2889,7 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2889 idev->dead = 1; 2733 idev->dead = 1;
2890 2734
2891 /* protected by rtnl_lock */ 2735 /* protected by rtnl_lock */
2892 RCU_INIT_POINTER(dev->ip6_ptr, NULL); 2736 rcu_assign_pointer(dev->ip6_ptr, NULL);
2893 2737
2894 /* Step 1.5: remove snmp6 entry */ 2738 /* Step 1.5: remove snmp6 entry */
2895 snmp6_unregister_dev(idev); 2739 snmp6_unregister_dev(idev);
@@ -2993,7 +2837,7 @@ static void addrconf_rs_timer(unsigned long data)
2993 if (idev->dead || !(idev->if_flags & IF_READY)) 2837 if (idev->dead || !(idev->if_flags & IF_READY))
2994 goto out; 2838 goto out;
2995 2839
2996 if (!ipv6_accept_ra(idev)) 2840 if (idev->cnf.forwarding)
2997 goto out; 2841 goto out;
2998 2842
2999 /* Announcement received after solicitation was sent */ 2843 /* Announcement received after solicitation was sent */
@@ -3016,7 +2860,8 @@ static void addrconf_rs_timer(unsigned long data)
3016 * Note: we do not support deprecated "all on-link" 2860 * Note: we do not support deprecated "all on-link"
3017 * assumption any longer. 2861 * assumption any longer.
3018 */ 2862 */
3019 pr_debug("%s: no IPv6 routers present\n", idev->dev->name); 2863 printk(KERN_DEBUG "%s: no IPv6 routers present\n",
2864 idev->dev->name);
3020 } 2865 }
3021 2866
3022out: 2867out:
@@ -3041,7 +2886,7 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
3041 addrconf_mod_timer(ifp, AC_DAD, rand_num); 2886 addrconf_mod_timer(ifp, AC_DAD, rand_num);
3042} 2887}
3043 2888
3044static void addrconf_dad_start(struct inet6_ifaddr *ifp) 2889static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
3045{ 2890{
3046 struct inet6_dev *idev = ifp->idev; 2891 struct inet6_dev *idev = ifp->idev;
3047 struct net_device *dev = idev->dev; 2892 struct net_device *dev = idev->dev;
@@ -3151,11 +2996,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
3151 2996
3152 ipv6_ifa_notify(RTM_NEWADDR, ifp); 2997 ipv6_ifa_notify(RTM_NEWADDR, ifp);
3153 2998
3154 /* If added prefix is link local and we are prepared to process 2999 /* If added prefix is link local and forwarding is off,
3155 router advertisements, start sending router solicitations. 3000 start sending router solicitations.
3156 */ 3001 */
3157 3002
3158 if (ipv6_accept_ra(ifp->idev) && 3003 if ((ifp->idev->cnf.forwarding == 0 ||
3004 ifp->idev->cnf.forwarding == 2) &&
3159 ifp->idev->cnf.rtr_solicits > 0 && 3005 ifp->idev->cnf.rtr_solicits > 0 &&
3160 (dev->flags&IFF_LOOPBACK) == 0 && 3006 (dev->flags&IFF_LOOPBACK) == 0 &&
3161 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 3007 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
@@ -3193,40 +3039,20 @@ static void addrconf_dad_run(struct inet6_dev *idev)
3193struct if6_iter_state { 3039struct if6_iter_state {
3194 struct seq_net_private p; 3040 struct seq_net_private p;
3195 int bucket; 3041 int bucket;
3196 int offset;
3197}; 3042};
3198 3043
3199static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) 3044static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
3200{ 3045{
3201 struct inet6_ifaddr *ifa = NULL; 3046 struct inet6_ifaddr *ifa = NULL;
3202 struct if6_iter_state *state = seq->private; 3047 struct if6_iter_state *state = seq->private;
3203 struct net *net = seq_file_net(seq); 3048 struct net *net = seq_file_net(seq);
3204 int p = 0;
3205 3049
3206 /* initial bucket if pos is 0 */ 3050 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
3207 if (pos == 0) {
3208 state->bucket = 0;
3209 state->offset = 0;
3210 }
3211
3212 for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
3213 struct hlist_node *n; 3051 struct hlist_node *n;
3214 hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], 3052 hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
3215 addr_lst) { 3053 addr_lst)
3216 if (!net_eq(dev_net(ifa->idev->dev), net)) 3054 if (net_eq(dev_net(ifa->idev->dev), net))
3217 continue; 3055 return ifa;
3218 /* sync with offset */
3219 if (p < state->offset) {
3220 p++;
3221 continue;
3222 }
3223 state->offset++;
3224 return ifa;
3225 }
3226
3227 /* prepare for next bucket */
3228 state->offset = 0;
3229 p = 0;
3230 } 3056 }
3231 return NULL; 3057 return NULL;
3232} 3058}
@@ -3238,32 +3064,36 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
3238 struct net *net = seq_file_net(seq); 3064 struct net *net = seq_file_net(seq);
3239 struct hlist_node *n = &ifa->addr_lst; 3065 struct hlist_node *n = &ifa->addr_lst;
3240 3066
3241 hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) { 3067 hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst)
3242 if (!net_eq(dev_net(ifa->idev->dev), net)) 3068 if (net_eq(dev_net(ifa->idev->dev), net))
3243 continue; 3069 return ifa;
3244 state->offset++;
3245 return ifa;
3246 }
3247 3070
3248 while (++state->bucket < IN6_ADDR_HSIZE) { 3071 while (++state->bucket < IN6_ADDR_HSIZE) {
3249 state->offset = 0;
3250 hlist_for_each_entry_rcu_bh(ifa, n, 3072 hlist_for_each_entry_rcu_bh(ifa, n,
3251 &inet6_addr_lst[state->bucket], addr_lst) { 3073 &inet6_addr_lst[state->bucket], addr_lst) {
3252 if (!net_eq(dev_net(ifa->idev->dev), net)) 3074 if (net_eq(dev_net(ifa->idev->dev), net))
3253 continue; 3075 return ifa;
3254 state->offset++;
3255 return ifa;
3256 } 3076 }
3257 } 3077 }
3258 3078
3259 return NULL; 3079 return NULL;
3260} 3080}
3261 3081
3082static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
3083{
3084 struct inet6_ifaddr *ifa = if6_get_first(seq);
3085
3086 if (ifa)
3087 while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
3088 --pos;
3089 return pos ? NULL : ifa;
3090}
3091
3262static void *if6_seq_start(struct seq_file *seq, loff_t *pos) 3092static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
3263 __acquires(rcu_bh) 3093 __acquires(rcu_bh)
3264{ 3094{
3265 rcu_read_lock_bh(); 3095 rcu_read_lock_bh();
3266 return if6_get_first(seq, *pos); 3096 return if6_get_idx(seq, *pos);
3267} 3097}
3268 3098
3269static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3099static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -3343,14 +3173,14 @@ void if6_proc_exit(void)
3343} 3173}
3344#endif /* CONFIG_PROC_FS */ 3174#endif /* CONFIG_PROC_FS */
3345 3175
3346#if IS_ENABLED(CONFIG_IPV6_MIP6) 3176#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
3347/* Check if address is a home address configured on any interface. */ 3177/* Check if address is a home address configured on any interface. */
3348int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) 3178int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
3349{ 3179{
3350 int ret = 0; 3180 int ret = 0;
3351 struct inet6_ifaddr *ifp = NULL; 3181 struct inet6_ifaddr *ifp = NULL;
3352 struct hlist_node *n; 3182 struct hlist_node *n;
3353 unsigned int hash = inet6_addr_hash(addr); 3183 unsigned int hash = ipv6_addr_hash(addr);
3354 3184
3355 rcu_read_lock_bh(); 3185 rcu_read_lock_bh();
3356 hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { 3186 hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
@@ -3683,12 +3513,12 @@ static inline int inet6_ifaddr_msgsize(void)
3683} 3513}
3684 3514
3685static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 3515static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3686 u32 portid, u32 seq, int event, unsigned int flags) 3516 u32 pid, u32 seq, int event, unsigned int flags)
3687{ 3517{
3688 struct nlmsghdr *nlh; 3518 struct nlmsghdr *nlh;
3689 u32 preferred, valid; 3519 u32 preferred, valid;
3690 3520
3691 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); 3521 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
3692 if (nlh == NULL) 3522 if (nlh == NULL)
3693 return -EMSGSIZE; 3523 return -EMSGSIZE;
3694 3524
@@ -3726,7 +3556,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3726} 3556}
3727 3557
3728static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 3558static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3729 u32 portid, u32 seq, int event, u16 flags) 3559 u32 pid, u32 seq, int event, u16 flags)
3730{ 3560{
3731 struct nlmsghdr *nlh; 3561 struct nlmsghdr *nlh;
3732 u8 scope = RT_SCOPE_UNIVERSE; 3562 u8 scope = RT_SCOPE_UNIVERSE;
@@ -3735,7 +3565,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3735 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) 3565 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
3736 scope = RT_SCOPE_SITE; 3566 scope = RT_SCOPE_SITE;
3737 3567
3738 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); 3568 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
3739 if (nlh == NULL) 3569 if (nlh == NULL)
3740 return -EMSGSIZE; 3570 return -EMSGSIZE;
3741 3571
@@ -3751,7 +3581,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3751} 3581}
3752 3582
3753static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 3583static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3754 u32 portid, u32 seq, int event, unsigned int flags) 3584 u32 pid, u32 seq, int event, unsigned int flags)
3755{ 3585{
3756 struct nlmsghdr *nlh; 3586 struct nlmsghdr *nlh;
3757 u8 scope = RT_SCOPE_UNIVERSE; 3587 u8 scope = RT_SCOPE_UNIVERSE;
@@ -3760,7 +3590,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3760 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) 3590 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
3761 scope = RT_SCOPE_SITE; 3591 scope = RT_SCOPE_SITE;
3762 3592
3763 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); 3593 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
3764 if (nlh == NULL) 3594 if (nlh == NULL)
3765 return -EMSGSIZE; 3595 return -EMSGSIZE;
3766 3596
@@ -3801,7 +3631,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3801 if (++ip_idx < s_ip_idx) 3631 if (++ip_idx < s_ip_idx)
3802 continue; 3632 continue;
3803 err = inet6_fill_ifaddr(skb, ifa, 3633 err = inet6_fill_ifaddr(skb, ifa,
3804 NETLINK_CB(cb->skb).portid, 3634 NETLINK_CB(cb->skb).pid,
3805 cb->nlh->nlmsg_seq, 3635 cb->nlh->nlmsg_seq,
3806 RTM_NEWADDR, 3636 RTM_NEWADDR,
3807 NLM_F_MULTI); 3637 NLM_F_MULTI);
@@ -3817,7 +3647,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3817 if (ip_idx < s_ip_idx) 3647 if (ip_idx < s_ip_idx)
3818 continue; 3648 continue;
3819 err = inet6_fill_ifmcaddr(skb, ifmca, 3649 err = inet6_fill_ifmcaddr(skb, ifmca,
3820 NETLINK_CB(cb->skb).portid, 3650 NETLINK_CB(cb->skb).pid,
3821 cb->nlh->nlmsg_seq, 3651 cb->nlh->nlmsg_seq,
3822 RTM_GETMULTICAST, 3652 RTM_GETMULTICAST,
3823 NLM_F_MULTI); 3653 NLM_F_MULTI);
@@ -3832,7 +3662,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3832 if (ip_idx < s_ip_idx) 3662 if (ip_idx < s_ip_idx)
3833 continue; 3663 continue;
3834 err = inet6_fill_ifacaddr(skb, ifaca, 3664 err = inet6_fill_ifacaddr(skb, ifaca,
3835 NETLINK_CB(cb->skb).portid, 3665 NETLINK_CB(cb->skb).pid,
3836 cb->nlh->nlmsg_seq, 3666 cb->nlh->nlmsg_seq,
3837 RTM_GETANYCAST, 3667 RTM_GETANYCAST,
3838 NLM_F_MULTI); 3668 NLM_F_MULTI);
@@ -3916,7 +3746,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
3916 return inet6_dump_addr(skb, cb, type); 3746 return inet6_dump_addr(skb, cb, type);
3917} 3747}
3918 3748
3919static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, 3749static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3920 void *arg) 3750 void *arg)
3921{ 3751{
3922 struct net *net = sock_net(in_skb->sk); 3752 struct net *net = sock_net(in_skb->sk);
@@ -3954,7 +3784,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3954 goto errout_ifa; 3784 goto errout_ifa;
3955 } 3785 }
3956 3786
3957 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, 3787 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
3958 nlh->nlmsg_seq, RTM_NEWADDR, 0); 3788 nlh->nlmsg_seq, RTM_NEWADDR, 0);
3959 if (err < 0) { 3789 if (err < 0) {
3960 /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ 3790 /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3962,7 +3792,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3962 kfree_skb(skb); 3792 kfree_skb(skb);
3963 goto errout_ifa; 3793 goto errout_ifa;
3964 } 3794 }
3965 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3795 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
3966errout_ifa: 3796errout_ifa:
3967 in6_ifa_put(ifa); 3797 in6_ifa_put(ifa);
3968errout: 3798errout:
@@ -4041,7 +3871,6 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4041 array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; 3871 array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
4042 array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; 3872 array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
4043 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; 3873 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
4044 array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
4045} 3874}
4046 3875
4047static inline size_t inet6_ifla6_size(void) 3876static inline size_t inet6_ifla6_size(void)
@@ -4112,14 +3941,14 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
4112 struct nlattr *nla; 3941 struct nlattr *nla;
4113 struct ifla_cacheinfo ci; 3942 struct ifla_cacheinfo ci;
4114 3943
4115 if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags)) 3944 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
4116 goto nla_put_failure; 3945
4117 ci.max_reasm_len = IPV6_MAXPLEN; 3946 ci.max_reasm_len = IPV6_MAXPLEN;
4118 ci.tstamp = cstamp_delta(idev->tstamp); 3947 ci.tstamp = cstamp_delta(idev->tstamp);
4119 ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); 3948 ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
4120 ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); 3949 ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
4121 if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) 3950 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
4122 goto nla_put_failure; 3951
4123 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); 3952 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
4124 if (nla == NULL) 3953 if (nla == NULL)
4125 goto nla_put_failure; 3954 goto nla_put_failure;
@@ -4165,14 +3994,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
4165} 3994}
4166 3995
4167static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 3996static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4168 u32 portid, u32 seq, int event, unsigned int flags) 3997 u32 pid, u32 seq, int event, unsigned int flags)
4169{ 3998{
4170 struct net_device *dev = idev->dev; 3999 struct net_device *dev = idev->dev;
4171 struct ifinfomsg *hdr; 4000 struct ifinfomsg *hdr;
4172 struct nlmsghdr *nlh; 4001 struct nlmsghdr *nlh;
4173 void *protoinfo; 4002 void *protoinfo;
4174 4003
4175 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); 4004 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
4176 if (nlh == NULL) 4005 if (nlh == NULL)
4177 return -EMSGSIZE; 4006 return -EMSGSIZE;
4178 4007
@@ -4184,13 +4013,15 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4184 hdr->ifi_flags = dev_get_flags(dev); 4013 hdr->ifi_flags = dev_get_flags(dev);
4185 hdr->ifi_change = 0; 4014 hdr->ifi_change = 0;
4186 4015
4187 if (nla_put_string(skb, IFLA_IFNAME, dev->name) || 4016 NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
4188 (dev->addr_len && 4017
4189 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || 4018 if (dev->addr_len)
4190 nla_put_u32(skb, IFLA_MTU, dev->mtu) || 4019 NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
4191 (dev->ifindex != dev->iflink && 4020
4192 nla_put_u32(skb, IFLA_LINK, dev->iflink))) 4021 NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
4193 goto nla_put_failure; 4022 if (dev->ifindex != dev->iflink)
4023 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
4024
4194 protoinfo = nla_nest_start(skb, IFLA_PROTINFO); 4025 protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
4195 if (protoinfo == NULL) 4026 if (protoinfo == NULL)
4196 goto nla_put_failure; 4027 goto nla_put_failure;
@@ -4230,7 +4061,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
4230 if (!idev) 4061 if (!idev)
4231 goto cont; 4062 goto cont;
4232 if (inet6_fill_ifinfo(skb, idev, 4063 if (inet6_fill_ifinfo(skb, idev,
4233 NETLINK_CB(cb->skb).portid, 4064 NETLINK_CB(cb->skb).pid,
4234 cb->nlh->nlmsg_seq, 4065 cb->nlh->nlmsg_seq,
4235 RTM_NEWLINK, NLM_F_MULTI) <= 0) 4066 RTM_NEWLINK, NLM_F_MULTI) <= 0)
4236 goto out; 4067 goto out;
@@ -4278,14 +4109,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
4278} 4109}
4279 4110
4280static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 4111static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
4281 struct prefix_info *pinfo, u32 portid, u32 seq, 4112 struct prefix_info *pinfo, u32 pid, u32 seq,
4282 int event, unsigned int flags) 4113 int event, unsigned int flags)
4283{ 4114{
4284 struct prefixmsg *pmsg; 4115 struct prefixmsg *pmsg;
4285 struct nlmsghdr *nlh; 4116 struct nlmsghdr *nlh;
4286 struct prefix_cacheinfo ci; 4117 struct prefix_cacheinfo ci;
4287 4118
4288 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); 4119 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
4289 if (nlh == NULL) 4120 if (nlh == NULL)
4290 return -EMSGSIZE; 4121 return -EMSGSIZE;
4291 4122
@@ -4303,12 +4134,12 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
4303 if (pinfo->autoconf) 4134 if (pinfo->autoconf)
4304 pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; 4135 pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
4305 4136
4306 if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix)) 4137 NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
4307 goto nla_put_failure; 4138
4308 ci.preferred_time = ntohl(pinfo->prefered); 4139 ci.preferred_time = ntohl(pinfo->prefered);
4309 ci.valid_time = ntohl(pinfo->valid); 4140 ci.valid_time = ntohl(pinfo->valid);
4310 if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci)) 4141 NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
4311 goto nla_put_failure; 4142
4312 return nlmsg_end(skb, nlh); 4143 return nlmsg_end(skb, nlh);
4313 4144
4314nla_put_failure: 4145nla_put_failure:
@@ -4387,17 +4218,9 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
4387 int *valp = ctl->data; 4218 int *valp = ctl->data;
4388 int val = *valp; 4219 int val = *valp;
4389 loff_t pos = *ppos; 4220 loff_t pos = *ppos;
4390 ctl_table lctl;
4391 int ret; 4221 int ret;
4392 4222
4393 /* 4223 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
4394 * ctl->data points to idev->cnf.forwarding, we should
4395 * not modify it until we get the rtnl lock.
4396 */
4397 lctl = *ctl;
4398 lctl.data = &val;
4399
4400 ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
4401 4224
4402 if (write) 4225 if (write)
4403 ret = addrconf_fixup_forwarding(ctl, valp, val); 4226 ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4435,27 +4258,26 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
4435 rcu_read_unlock(); 4258 rcu_read_unlock();
4436} 4259}
4437 4260
4438static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) 4261static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
4439{ 4262{
4440 struct net *net; 4263 struct net *net;
4441 int old;
4442
4443 if (!rtnl_trylock())
4444 return restart_syscall();
4445 4264
4446 net = (struct net *)table->extra2; 4265 net = (struct net *)table->extra2;
4447 old = *p;
4448 *p = newf;
4449 4266
4450 if (p == &net->ipv6.devconf_dflt->disable_ipv6) { 4267 if (p == &net->ipv6.devconf_dflt->disable_ipv6)
4451 rtnl_unlock();
4452 return 0; 4268 return 0;
4269
4270 if (!rtnl_trylock()) {
4271 /* Restore the original values before restarting */
4272 *p = old;
4273 return restart_syscall();
4453 } 4274 }
4454 4275
4455 if (p == &net->ipv6.devconf_all->disable_ipv6) { 4276 if (p == &net->ipv6.devconf_all->disable_ipv6) {
4277 __s32 newf = net->ipv6.devconf_all->disable_ipv6;
4456 net->ipv6.devconf_dflt->disable_ipv6 = newf; 4278 net->ipv6.devconf_dflt->disable_ipv6 = newf;
4457 addrconf_disable_change(net, newf); 4279 addrconf_disable_change(net, newf);
4458 } else if ((!newf) ^ (!old)) 4280 } else if ((!*p) ^ (!old))
4459 dev_disable_change((struct inet6_dev *)table->extra1); 4281 dev_disable_change((struct inet6_dev *)table->extra1);
4460 4282
4461 rtnl_unlock(); 4283 rtnl_unlock();
@@ -4469,17 +4291,9 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
4469 int *valp = ctl->data; 4291 int *valp = ctl->data;
4470 int val = *valp; 4292 int val = *valp;
4471 loff_t pos = *ppos; 4293 loff_t pos = *ppos;
4472 ctl_table lctl;
4473 int ret; 4294 int ret;
4474 4295
4475 /* 4296 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
4476 * ctl->data points to idev->cnf.disable_ipv6, we should
4477 * not modify it until we get the rtnl lock.
4478 */
4479 lctl = *ctl;
4480 lctl.data = &val;
4481
4482 ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
4483 4297
4484 if (write) 4298 if (write)
4485 ret = addrconf_disable_ipv6(ctl, valp, val); 4299 ret = addrconf_disable_ipv6(ctl, valp, val);
@@ -4492,6 +4306,7 @@ static struct addrconf_sysctl_table
4492{ 4306{
4493 struct ctl_table_header *sysctl_header; 4307 struct ctl_table_header *sysctl_header;
4494 ctl_table addrconf_vars[DEVCONF_MAX+1]; 4308 ctl_table addrconf_vars[DEVCONF_MAX+1];
4309 char *dev_name;
4495} addrconf_sysctl __read_mostly = { 4310} addrconf_sysctl __read_mostly = {
4496 .sysctl_header = NULL, 4311 .sysctl_header = NULL,
4497 .addrconf_vars = { 4312 .addrconf_vars = {
@@ -4710,13 +4525,6 @@ static struct addrconf_sysctl_table
4710 .proc_handler = proc_dointvec 4525 .proc_handler = proc_dointvec
4711 }, 4526 },
4712 { 4527 {
4713 .procname = "ndisc_notify",
4714 .data = &ipv6_devconf.ndisc_notify,
4715 .maxlen = sizeof(int),
4716 .mode = 0644,
4717 .proc_handler = proc_dointvec
4718 },
4719 {
4720 /* sentinel */ 4528 /* sentinel */
4721 } 4529 }
4722 }, 4530 },
@@ -4727,7 +4535,17 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
4727{ 4535{
4728 int i; 4536 int i;
4729 struct addrconf_sysctl_table *t; 4537 struct addrconf_sysctl_table *t;
4730 char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; 4538
4539#define ADDRCONF_CTL_PATH_DEV 3
4540
4541 struct ctl_path addrconf_ctl_path[] = {
4542 { .procname = "net", },
4543 { .procname = "ipv6", },
4544 { .procname = "conf", },
4545 { /* to be set */ },
4546 { },
4547 };
4548
4731 4549
4732 t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); 4550 t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
4733 if (t == NULL) 4551 if (t == NULL)
@@ -4739,15 +4557,27 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
4739 t->addrconf_vars[i].extra2 = net; 4557 t->addrconf_vars[i].extra2 = net;
4740 } 4558 }
4741 4559
4742 snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); 4560 /*
4561 * Make a copy of dev_name, because '.procname' is regarded as const
4562 * by sysctl and we wouldn't want anyone to change it under our feet
4563 * (see SIOCSIFNAME).
4564 */
4565 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
4566 if (!t->dev_name)
4567 goto free;
4568
4569 addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name;
4743 4570
4744 t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); 4571 t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path,
4572 t->addrconf_vars);
4745 if (t->sysctl_header == NULL) 4573 if (t->sysctl_header == NULL)
4746 goto free; 4574 goto free_procname;
4747 4575
4748 p->sysctl = t; 4576 p->sysctl = t;
4749 return 0; 4577 return 0;
4750 4578
4579free_procname:
4580 kfree(t->dev_name);
4751free: 4581free:
4752 kfree(t); 4582 kfree(t);
4753out: 4583out:
@@ -4764,6 +4594,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
4764 t = p->sysctl; 4594 t = p->sysctl;
4765 p->sysctl = NULL; 4595 p->sysctl = NULL;
4766 unregister_net_sysctl_table(t->sysctl_header); 4596 unregister_net_sysctl_table(t->sysctl_header);
4597 kfree(t->dev_name);
4767 kfree(t); 4598 kfree(t);
4768} 4599}
4769 4600
@@ -4882,8 +4713,8 @@ int __init addrconf_init(void)
4882 4713
4883 err = ipv6_addr_label_init(); 4714 err = ipv6_addr_label_init();
4884 if (err < 0) { 4715 if (err < 0) {
4885 pr_crit("%s: cannot initialize default policy table: %d\n", 4716 printk(KERN_CRIT "IPv6 Addrconf:"
4886 __func__, err); 4717 " cannot initialize default policy table: %d.\n", err);
4887 goto out; 4718 goto out;
4888 } 4719 }
4889 4720
@@ -4941,8 +4772,6 @@ int __init addrconf_init(void)
4941 inet6_dump_ifmcaddr, NULL); 4772 inet6_dump_ifmcaddr, NULL);
4942 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 4773 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
4943 inet6_dump_ifacaddr, NULL); 4774 inet6_dump_ifacaddr, NULL);
4944 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
4945 NULL, NULL);
4946 4775
4947 ipv6_addr_label_rtnl_register(); 4776 ipv6_addr_label_rtnl_register();
4948 4777
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d051e5f4bf3..6b03826552e 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -3,14 +3,13 @@
3 * not configured or static. 3 * not configured or static.
4 */ 4 */
5 5
6#include <linux/export.h>
7#include <net/ipv6.h> 6#include <net/ipv6.h>
8 7
9#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) 8#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
10 9
11static inline unsigned int ipv6_addr_scope2type(unsigned int scope) 10static inline unsigned ipv6_addr_scope2type(unsigned scope)
12{ 11{
13 switch (scope) { 12 switch(scope) {
14 case IPV6_ADDR_SCOPE_NODELOCAL: 13 case IPV6_ADDR_SCOPE_NODELOCAL:
15 return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) | 14 return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) |
16 IPV6_ADDR_LOOPBACK); 15 IPV6_ADDR_LOOPBACK);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ff76eecfd62..2d8ddba9ee5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
57} 57}
58 58
59/* 59/*
60 * Default policy table (RFC6724 + extensions) 60 * Default policy table (RFC3484 + extensions)
61 * 61 *
62 * prefix addr_type label 62 * prefix addr_type label
63 * ------------------------------------------------------------------------- 63 * -------------------------------------------------------------------------
@@ -69,17 +69,13 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
69 * fc00::/7 N/A 5 ULA (RFC 4193) 69 * fc00::/7 N/A 5 ULA (RFC 4193)
70 * 2001::/32 N/A 6 Teredo (RFC 4380) 70 * 2001::/32 N/A 6 Teredo (RFC 4380)
71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843)
72 * fec0::/10 N/A 11 Site-local
73 * (deprecated by RFC3879)
74 * 3ffe::/16 N/A 12 6bone
75 * 72 *
76 * Note: 0xffffffff is used if we do not have any policies. 73 * Note: 0xffffffff is used if we do not have any policies.
77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
78 */ 74 */
79 75
80#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 76#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
81 77
82static const __net_initconst struct ip6addrlbl_init_table 78static const __net_initdata struct ip6addrlbl_init_table
83{ 79{
84 const struct in6_addr *prefix; 80 const struct in6_addr *prefix;
85 int prefixlen; 81 int prefixlen;
@@ -92,18 +88,10 @@ static const __net_initconst struct ip6addrlbl_init_table
92 .prefix = &(struct in6_addr){{{ 0xfc }}}, 88 .prefix = &(struct in6_addr){{{ 0xfc }}},
93 .prefixlen = 7, 89 .prefixlen = 7,
94 .label = 5, 90 .label = 5,
95 },{ /* fec0::/10 */
96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
97 .prefixlen = 10,
98 .label = 11,
99 },{ /* 2002::/16 */ 91 },{ /* 2002::/16 */
100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 92 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
101 .prefixlen = 16, 93 .prefixlen = 16,
102 .label = 2, 94 .label = 2,
103 },{ /* 3ffe::/16 */
104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
105 .prefixlen = 16,
106 .label = 12,
107 },{ /* 2001::/32 */ 95 },{ /* 2001::/32 */
108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 96 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
109 .prefixlen = 32, 97 .prefixlen = 32,
@@ -141,7 +129,7 @@ static void ip6addrlbl_free_rcu(struct rcu_head *h)
141 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 129 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
142} 130}
143 131
144static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 132static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
145{ 133{
146 return atomic_inc_not_zero(&p->refcnt); 134 return atomic_inc_not_zero(&p->refcnt);
147} 135}
@@ -153,20 +141,20 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
153} 141}
154 142
155/* Find label */ 143/* Find label */
156static bool __ip6addrlbl_match(struct net *net, 144static int __ip6addrlbl_match(struct net *net,
157 const struct ip6addrlbl_entry *p, 145 struct ip6addrlbl_entry *p,
158 const struct in6_addr *addr, 146 const struct in6_addr *addr,
159 int addrtype, int ifindex) 147 int addrtype, int ifindex)
160{ 148{
161 if (!net_eq(ip6addrlbl_net(p), net)) 149 if (!net_eq(ip6addrlbl_net(p), net))
162 return false; 150 return 0;
163 if (p->ifindex && p->ifindex != ifindex) 151 if (p->ifindex && p->ifindex != ifindex)
164 return false; 152 return 0;
165 if (p->addrtype && p->addrtype != addrtype) 153 if (p->addrtype && p->addrtype != addrtype)
166 return false; 154 return 0;
167 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 155 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
168 return false; 156 return 0;
169 return true; 157 return 1;
170} 158}
171 159
172static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 160static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
@@ -362,7 +350,7 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
362 int err = 0; 350 int err = 0;
363 int i; 351 int i;
364 352
365 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 353 ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
366 354
367 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 355 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
368 int ret = ip6addrlbl_add(net, 356 int ret = ip6addrlbl_add(net,
@@ -468,8 +456,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
468 return err; 456 return err;
469} 457}
470 458
471static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 459static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
472 int prefixlen, int ifindex, u32 lseq) 460 int prefixlen, int ifindex, u32 lseq)
473{ 461{
474 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 462 struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
475 ifal->ifal_family = AF_INET6; 463 ifal->ifal_family = AF_INET6;
@@ -482,10 +470,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
482static int ip6addrlbl_fill(struct sk_buff *skb, 470static int ip6addrlbl_fill(struct sk_buff *skb,
483 struct ip6addrlbl_entry *p, 471 struct ip6addrlbl_entry *p,
484 u32 lseq, 472 u32 lseq,
485 u32 portid, u32 seq, int event, 473 u32 pid, u32 seq, int event,
486 unsigned int flags) 474 unsigned int flags)
487{ 475{
488 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 476 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
489 sizeof(struct ifaddrlblmsg), flags); 477 sizeof(struct ifaddrlblmsg), flags);
490 if (!nlh) 478 if (!nlh)
491 return -EMSGSIZE; 479 return -EMSGSIZE;
@@ -515,7 +503,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
515 net_eq(ip6addrlbl_net(p), net)) { 503 net_eq(ip6addrlbl_net(p), net)) {
516 if ((err = ip6addrlbl_fill(skb, p, 504 if ((err = ip6addrlbl_fill(skb, p,
517 ip6addrlbl_table.seq, 505 ip6addrlbl_table.seq,
518 NETLINK_CB(cb->skb).portid, 506 NETLINK_CB(cb->skb).pid,
519 cb->nlh->nlmsg_seq, 507 cb->nlh->nlmsg_seq,
520 RTM_NEWADDRLABEL, 508 RTM_NEWADDRLABEL,
521 NLM_F_MULTI)) <= 0) 509 NLM_F_MULTI)) <= 0)
@@ -586,7 +574,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
586 } 574 }
587 575
588 err = ip6addrlbl_fill(skb, p, lseq, 576 err = ip6addrlbl_fill(skb, p, lseq,
589 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 577 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
590 RTM_NEWADDRLABEL, 0); 578 RTM_NEWADDRLABEL, 0);
591 579
592 ip6addrlbl_put(p); 580 ip6addrlbl_put(p);
@@ -597,7 +585,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
597 goto out; 585 goto out;
598 } 586 }
599 587
600 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 588 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
601out: 589out:
602 return err; 590 return err;
603} 591}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b043c60429b..4252b3cc183 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -18,7 +18,6 @@
18 * 2 of the License, or (at your option) any later version. 18 * 2 of the License, or (at your option) any later version.
19 */ 19 */
20 20
21#define pr_fmt(fmt) "IPv6: " fmt
22 21
23#include <linux/module.h> 22#include <linux/module.h>
24#include <linux/capability.h> 23#include <linux/capability.h>
@@ -61,8 +60,23 @@
61#endif 60#endif
62 61
63#include <asm/uaccess.h> 62#include <asm/uaccess.h>
63#include <asm/system.h>
64#include <linux/mroute6.h> 64#include <linux/mroute6.h>
65 65
66#ifdef CONFIG_ANDROID_PARANOID_NETWORK
67#include <linux/android_aid.h>
68
69static inline int current_has_network(void)
70{
71 return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
72}
73#else
74static inline int current_has_network(void)
75{
76 return 1;
77}
78#endif
79
66MODULE_AUTHOR("Cast of dozens"); 80MODULE_AUTHOR("Cast of dozens");
67MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); 81MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
68MODULE_LICENSE("GPL"); 82MODULE_LICENSE("GPL");
@@ -78,7 +92,7 @@ struct ipv6_params ipv6_defaults = {
78 .autoconf = 1, 92 .autoconf = 1,
79}; 93};
80 94
81static int disable_ipv6_mod; 95static int disable_ipv6_mod = 0;
82 96
83module_param_named(disable, disable_ipv6_mod, int, 0444); 97module_param_named(disable, disable_ipv6_mod, int, 0444);
84MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); 98MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
@@ -109,6 +123,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
109 int try_loading_module = 0; 123 int try_loading_module = 0;
110 int err; 124 int err;
111 125
126 if (!current_has_network())
127 return -EACCES;
128
112 if (sock->type != SOCK_RAW && 129 if (sock->type != SOCK_RAW &&
113 sock->type != SOCK_DGRAM && 130 sock->type != SOCK_DGRAM &&
114 !inet_ehash_secret) 131 !inet_ehash_secret)
@@ -160,8 +177,7 @@ lookup_protocol:
160 } 177 }
161 178
162 err = -EPERM; 179 err = -EPERM;
163 if (sock->type == SOCK_RAW && !kern && 180 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
164 !ns_capable(net->user_ns, CAP_NET_RAW))
165 goto out_rcu_unlock; 181 goto out_rcu_unlock;
166 182
167 sock->ops = answer->ops; 183 sock->ops = answer->ops;
@@ -182,7 +198,7 @@ lookup_protocol:
182 err = 0; 198 err = 0;
183 sk->sk_no_check = answer_no_check; 199 sk->sk_no_check = answer_no_check;
184 if (INET_PROTOSW_REUSE & answer_flags) 200 if (INET_PROTOSW_REUSE & answer_flags)
185 sk->sk_reuse = SK_CAN_REUSE; 201 sk->sk_reuse = 1;
186 202
187 inet = inet_sk(sk); 203 inet = inet_sk(sk);
188 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 204 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
@@ -215,7 +231,6 @@ lookup_protocol:
215 inet->mc_ttl = 1; 231 inet->mc_ttl = 1;
216 inet->mc_index = 0; 232 inet->mc_index = 0;
217 inet->mc_list = NULL; 233 inet->mc_list = NULL;
218 inet->rcv_tos = 0;
219 234
220 if (ipv4_config.no_pmtu_disc) 235 if (ipv4_config.no_pmtu_disc)
221 inet->pmtudisc = IP_PMTUDISC_DONT; 236 inet->pmtudisc = IP_PMTUDISC_DONT;
@@ -258,7 +273,7 @@ out_rcu_unlock:
258/* bind for INET6 API */ 273/* bind for INET6 API */
259int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 274int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
260{ 275{
261 struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; 276 struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
262 struct sock *sk = sock->sk; 277 struct sock *sk = sock->sk;
263 struct inet_sock *inet = inet_sk(sk); 278 struct inet_sock *inet = inet_sk(sk);
264 struct ipv6_pinfo *np = inet6_sk(sk); 279 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -283,7 +298,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
283 return -EINVAL; 298 return -EINVAL;
284 299
285 snum = ntohs(addr->sin6_port); 300 snum = ntohs(addr->sin6_port);
286 if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) 301 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
287 return -EACCES; 302 return -EACCES;
288 303
289 lock_sock(sk); 304 lock_sock(sk);
@@ -349,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
349 */ 364 */
350 v4addr = LOOPBACK4_IPV6; 365 v4addr = LOOPBACK4_IPV6;
351 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 366 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
352 if (!(inet->freebind || inet->transparent) && 367 if (!inet->transparent &&
353 !ipv6_chk_addr(net, &addr->sin6_addr, 368 !ipv6_chk_addr(net, &addr->sin6_addr,
354 dev, 0)) { 369 dev, 0)) {
355 err = -EADDRNOTAVAIL; 370 err = -EADDRNOTAVAIL;
@@ -363,10 +378,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
363 inet->inet_rcv_saddr = v4addr; 378 inet->inet_rcv_saddr = v4addr;
364 inet->inet_saddr = v4addr; 379 inet->inet_saddr = v4addr;
365 380
366 np->rcv_saddr = addr->sin6_addr; 381 ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
367 382
368 if (!(addr_type & IPV6_ADDR_MULTICAST)) 383 if (!(addr_type & IPV6_ADDR_MULTICAST))
369 np->saddr = addr->sin6_addr; 384 ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
370 385
371 /* Make sure we are allowed to bind here. */ 386 /* Make sure we are allowed to bind here. */
372 if (sk->sk_prot->get_port(sk, snum)) { 387 if (sk->sk_prot->get_port(sk, snum)) {
@@ -392,6 +407,7 @@ out_unlock:
392 rcu_read_unlock(); 407 rcu_read_unlock();
393 goto out; 408 goto out;
394} 409}
410
395EXPORT_SYMBOL(inet6_bind); 411EXPORT_SYMBOL(inet6_bind);
396 412
397int inet6_release(struct socket *sock) 413int inet6_release(struct socket *sock)
@@ -409,6 +425,7 @@ int inet6_release(struct socket *sock)
409 425
410 return inet_release(sock); 426 return inet_release(sock);
411} 427}
428
412EXPORT_SYMBOL(inet6_release); 429EXPORT_SYMBOL(inet6_release);
413 430
414void inet6_destroy_sock(struct sock *sk) 431void inet6_destroy_sock(struct sock *sk)
@@ -419,12 +436,10 @@ void inet6_destroy_sock(struct sock *sk)
419 436
420 /* Release rx options */ 437 /* Release rx options */
421 438
422 skb = xchg(&np->pktoptions, NULL); 439 if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
423 if (skb != NULL)
424 kfree_skb(skb); 440 kfree_skb(skb);
425 441
426 skb = xchg(&np->rxpmtu, NULL); 442 if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
427 if (skb != NULL)
428 kfree_skb(skb); 443 kfree_skb(skb);
429 444
430 /* Free flowlabels */ 445 /* Free flowlabels */
@@ -432,10 +447,10 @@ void inet6_destroy_sock(struct sock *sk)
432 447
433 /* Free tx options */ 448 /* Free tx options */
434 449
435 opt = xchg(&np->opt, NULL); 450 if ((opt = xchg(&np->opt, NULL)) != NULL)
436 if (opt != NULL)
437 sock_kfree_s(sk, opt, opt->tot_len); 451 sock_kfree_s(sk, opt, opt->tot_len);
438} 452}
453
439EXPORT_SYMBOL_GPL(inet6_destroy_sock); 454EXPORT_SYMBOL_GPL(inet6_destroy_sock);
440 455
441/* 456/*
@@ -445,7 +460,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
445int inet6_getname(struct socket *sock, struct sockaddr *uaddr, 460int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
446 int *uaddr_len, int peer) 461 int *uaddr_len, int peer)
447{ 462{
448 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; 463 struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
449 struct sock *sk = sock->sk; 464 struct sock *sk = sock->sk;
450 struct inet_sock *inet = inet_sk(sk); 465 struct inet_sock *inet = inet_sk(sk);
451 struct ipv6_pinfo *np = inet6_sk(sk); 466 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -460,14 +475,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
460 peer == 1) 475 peer == 1)
461 return -ENOTCONN; 476 return -ENOTCONN;
462 sin->sin6_port = inet->inet_dport; 477 sin->sin6_port = inet->inet_dport;
463 sin->sin6_addr = np->daddr; 478 ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
464 if (np->sndflow) 479 if (np->sndflow)
465 sin->sin6_flowinfo = np->flow_label; 480 sin->sin6_flowinfo = np->flow_label;
466 } else { 481 } else {
467 if (ipv6_addr_any(&np->rcv_saddr)) 482 if (ipv6_addr_any(&np->rcv_saddr))
468 sin->sin6_addr = np->saddr; 483 ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
469 else 484 else
470 sin->sin6_addr = np->rcv_saddr; 485 ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
471 486
472 sin->sin6_port = inet->inet_sport; 487 sin->sin6_port = inet->inet_sport;
473 } 488 }
@@ -476,14 +491,31 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
476 *uaddr_len = sizeof(*sin); 491 *uaddr_len = sizeof(*sin);
477 return 0; 492 return 0;
478} 493}
494
479EXPORT_SYMBOL(inet6_getname); 495EXPORT_SYMBOL(inet6_getname);
480 496
497int inet6_killaddr_ioctl(struct net *net, void __user *arg) {
498 struct in6_ifreq ireq;
499 struct sockaddr_in6 sin6;
500
501 if (!capable(CAP_NET_ADMIN))
502 return -EACCES;
503
504 if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
505 return -EFAULT;
506
507 sin6.sin6_family = AF_INET6;
508 ipv6_addr_copy(&sin6.sin6_addr, &ireq.ifr6_addr);
509 return tcp_nuke_addr(net, (struct sockaddr *) &sin6);
510}
511
481int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 512int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
482{ 513{
483 struct sock *sk = sock->sk; 514 struct sock *sk = sock->sk;
484 struct net *net = sock_net(sk); 515 struct net *net = sock_net(sk);
485 516
486 switch (cmd) { 517 switch(cmd)
518 {
487 case SIOCGSTAMP: 519 case SIOCGSTAMP:
488 return sock_get_timestamp(sk, (struct timeval __user *)arg); 520 return sock_get_timestamp(sk, (struct timeval __user *)arg);
489 521
@@ -501,6 +533,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
501 return addrconf_del_ifaddr(net, (void __user *) arg); 533 return addrconf_del_ifaddr(net, (void __user *) arg);
502 case SIOCSIFDSTADDR: 534 case SIOCSIFDSTADDR:
503 return addrconf_set_dstaddr(net, (void __user *) arg); 535 return addrconf_set_dstaddr(net, (void __user *) arg);
536 case SIOCKILLADDR:
537 return inet6_killaddr_ioctl(net, (void __user *) arg);
504 default: 538 default:
505 if (!sk->sk_prot->ioctl) 539 if (!sk->sk_prot->ioctl)
506 return -ENOIOCTLCMD; 540 return -ENOIOCTLCMD;
@@ -509,6 +543,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
509 /*NOTREACHED*/ 543 /*NOTREACHED*/
510 return 0; 544 return 0;
511} 545}
546
512EXPORT_SYMBOL(inet6_ioctl); 547EXPORT_SYMBOL(inet6_ioctl);
513 548
514const struct proto_ops inet6_stream_ops = { 549const struct proto_ops inet6_stream_ops = {
@@ -614,21 +649,25 @@ out:
614 return ret; 649 return ret;
615 650
616out_permanent: 651out_permanent:
617 pr_err("Attempt to override permanent protocol %d\n", protocol); 652 printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
653 protocol);
618 goto out; 654 goto out;
619 655
620out_illegal: 656out_illegal:
621 pr_err("Ignoring attempt to register invalid socket type %d\n", 657 printk(KERN_ERR
658 "Ignoring attempt to register invalid socket type %d.\n",
622 p->type); 659 p->type);
623 goto out; 660 goto out;
624} 661}
662
625EXPORT_SYMBOL(inet6_register_protosw); 663EXPORT_SYMBOL(inet6_register_protosw);
626 664
627void 665void
628inet6_unregister_protosw(struct inet_protosw *p) 666inet6_unregister_protosw(struct inet_protosw *p)
629{ 667{
630 if (INET_PROTOSW_PERMANENT & p->flags) { 668 if (INET_PROTOSW_PERMANENT & p->flags) {
631 pr_err("Attempt to unregister permanent protocol %d\n", 669 printk(KERN_ERR
670 "Attempt to unregister permanent protocol %d.\n",
632 p->protocol); 671 p->protocol);
633 } else { 672 } else {
634 spin_lock_bh(&inetsw6_lock); 673 spin_lock_bh(&inetsw6_lock);
@@ -638,6 +677,7 @@ inet6_unregister_protosw(struct inet_protosw *p)
638 synchronize_net(); 677 synchronize_net();
639 } 678 }
640} 679}
680
641EXPORT_SYMBOL(inet6_unregister_protosw); 681EXPORT_SYMBOL(inet6_unregister_protosw);
642 682
643int inet6_sk_rebuild_header(struct sock *sk) 683int inet6_sk_rebuild_header(struct sock *sk)
@@ -654,8 +694,8 @@ int inet6_sk_rebuild_header(struct sock *sk)
654 694
655 memset(&fl6, 0, sizeof(fl6)); 695 memset(&fl6, 0, sizeof(fl6));
656 fl6.flowi6_proto = sk->sk_protocol; 696 fl6.flowi6_proto = sk->sk_protocol;
657 fl6.daddr = np->daddr; 697 ipv6_addr_copy(&fl6.daddr, &np->daddr);
658 fl6.saddr = np->saddr; 698 ipv6_addr_copy(&fl6.saddr, &np->saddr);
659 fl6.flowlabel = np->flow_label; 699 fl6.flowlabel = np->flow_label;
660 fl6.flowi6_oif = sk->sk_bound_dev_if; 700 fl6.flowi6_oif = sk->sk_bound_dev_if;
661 fl6.flowi6_mark = sk->sk_mark; 701 fl6.flowi6_mark = sk->sk_mark;
@@ -677,12 +717,13 @@ int inet6_sk_rebuild_header(struct sock *sk)
677 717
678 return 0; 718 return 0;
679} 719}
720
680EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); 721EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
681 722
682bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) 723int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
683{ 724{
684 const struct ipv6_pinfo *np = inet6_sk(sk); 725 struct ipv6_pinfo *np = inet6_sk(sk);
685 const struct inet6_skb_parm *opt = IP6CB(skb); 726 struct inet6_skb_parm *opt = IP6CB(skb);
686 727
687 if (np->rxopt.all) { 728 if (np->rxopt.all) {
688 if ((opt->hop && (np->rxopt.bits.hopopts || 729 if ((opt->hop && (np->rxopt.bits.hopopts ||
@@ -694,15 +735,259 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)
694 np->rxopt.bits.osrcrt)) || 735 np->rxopt.bits.osrcrt)) ||
695 ((opt->dst1 || opt->dst0) && 736 ((opt->dst1 || opt->dst0) &&
696 (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) 737 (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
697 return true; 738 return 1;
698 } 739 }
699 return false; 740 return 0;
700} 741}
742
701EXPORT_SYMBOL_GPL(ipv6_opt_accepted); 743EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
702 744
745static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
746{
747 const struct inet6_protocol *ops = NULL;
748
749 for (;;) {
750 struct ipv6_opt_hdr *opth;
751 int len;
752
753 if (proto != NEXTHDR_HOP) {
754 ops = rcu_dereference(inet6_protos[proto]);
755
756 if (unlikely(!ops))
757 break;
758
759 if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
760 break;
761 }
762
763 if (unlikely(!pskb_may_pull(skb, 8)))
764 break;
765
766 opth = (void *)skb->data;
767 len = ipv6_optlen(opth);
768
769 if (unlikely(!pskb_may_pull(skb, len)))
770 break;
771
772 proto = opth->nexthdr;
773 __skb_pull(skb, len);
774 }
775
776 return proto;
777}
778
779static int ipv6_gso_send_check(struct sk_buff *skb)
780{
781 const struct ipv6hdr *ipv6h;
782 const struct inet6_protocol *ops;
783 int err = -EINVAL;
784
785 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
786 goto out;
787
788 ipv6h = ipv6_hdr(skb);
789 __skb_pull(skb, sizeof(*ipv6h));
790 err = -EPROTONOSUPPORT;
791
792 rcu_read_lock();
793 ops = rcu_dereference(inet6_protos[
794 ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
795
796 if (likely(ops && ops->gso_send_check)) {
797 skb_reset_transport_header(skb);
798 err = ops->gso_send_check(skb);
799 }
800 rcu_read_unlock();
801
802out:
803 return err;
804}
805
806static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
807{
808 struct sk_buff *segs = ERR_PTR(-EINVAL);
809 struct ipv6hdr *ipv6h;
810 const struct inet6_protocol *ops;
811 int proto;
812 struct frag_hdr *fptr;
813 unsigned int unfrag_ip6hlen;
814 u8 *prevhdr;
815 int offset = 0;
816
817 if (!(features & NETIF_F_V6_CSUM))
818 features &= ~NETIF_F_SG;
819
820 if (unlikely(skb_shinfo(skb)->gso_type &
821 ~(SKB_GSO_UDP |
822 SKB_GSO_DODGY |
823 SKB_GSO_TCP_ECN |
824 SKB_GSO_TCPV6 |
825 0)))
826 goto out;
827
828 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
829 goto out;
830
831 ipv6h = ipv6_hdr(skb);
832 __skb_pull(skb, sizeof(*ipv6h));
833 segs = ERR_PTR(-EPROTONOSUPPORT);
834
835 proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
836 rcu_read_lock();
837 ops = rcu_dereference(inet6_protos[proto]);
838 if (likely(ops && ops->gso_segment)) {
839 skb_reset_transport_header(skb);
840 segs = ops->gso_segment(skb, features);
841 }
842 rcu_read_unlock();
843
844 if (IS_ERR(segs))
845 goto out;
846
847 for (skb = segs; skb; skb = skb->next) {
848 ipv6h = ipv6_hdr(skb);
849 ipv6h->payload_len = htons(skb->len - skb->mac_len -
850 sizeof(*ipv6h));
851 if (proto == IPPROTO_UDP) {
852 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
853 fptr = (struct frag_hdr *)(skb_network_header(skb) +
854 unfrag_ip6hlen);
855 fptr->frag_off = htons(offset);
856 if (skb->next != NULL)
857 fptr->frag_off |= htons(IP6_MF);
858 offset += (ntohs(ipv6h->payload_len) -
859 sizeof(struct frag_hdr));
860 }
861 }
862
863out:
864 return segs;
865}
866
867struct ipv6_gro_cb {
868 struct napi_gro_cb napi;
869 int proto;
870};
871
872#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb)
873
874static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
875 struct sk_buff *skb)
876{
877 const struct inet6_protocol *ops;
878 struct sk_buff **pp = NULL;
879 struct sk_buff *p;
880 struct ipv6hdr *iph;
881 unsigned int nlen;
882 unsigned int hlen;
883 unsigned int off;
884 int flush = 1;
885 int proto;
886 __wsum csum;
887
888 off = skb_gro_offset(skb);
889 hlen = off + sizeof(*iph);
890 iph = skb_gro_header_fast(skb, off);
891 if (skb_gro_header_hard(skb, hlen)) {
892 iph = skb_gro_header_slow(skb, hlen, off);
893 if (unlikely(!iph))
894 goto out;
895 }
896
897 skb_gro_pull(skb, sizeof(*iph));
898 skb_set_transport_header(skb, skb_gro_offset(skb));
899
900 flush += ntohs(iph->payload_len) != skb_gro_len(skb);
901
902 rcu_read_lock();
903 proto = iph->nexthdr;
904 ops = rcu_dereference(inet6_protos[proto]);
905 if (!ops || !ops->gro_receive) {
906 __pskb_pull(skb, skb_gro_offset(skb));
907 proto = ipv6_gso_pull_exthdrs(skb, proto);
908 skb_gro_pull(skb, -skb_transport_offset(skb));
909 skb_reset_transport_header(skb);
910 __skb_push(skb, skb_gro_offset(skb));
911
912 ops = rcu_dereference(inet6_protos[proto]);
913 if (!ops || !ops->gro_receive)
914 goto out_unlock;
915
916 iph = ipv6_hdr(skb);
917 }
918
919 IPV6_GRO_CB(skb)->proto = proto;
920
921 flush--;
922 nlen = skb_network_header_len(skb);
923
924 for (p = *head; p; p = p->next) {
925 struct ipv6hdr *iph2;
926
927 if (!NAPI_GRO_CB(p)->same_flow)
928 continue;
929
930 iph2 = ipv6_hdr(p);
931
932 /* All fields must match except length. */
933 if (nlen != skb_network_header_len(p) ||
934 memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) ||
935 memcmp(&iph->nexthdr, &iph2->nexthdr,
936 nlen - offsetof(struct ipv6hdr, nexthdr))) {
937 NAPI_GRO_CB(p)->same_flow = 0;
938 continue;
939 }
940
941 NAPI_GRO_CB(p)->flush |= flush;
942 }
943
944 NAPI_GRO_CB(skb)->flush |= flush;
945
946 csum = skb->csum;
947 skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
948
949 pp = ops->gro_receive(head, skb);
950
951 skb->csum = csum;
952
953out_unlock:
954 rcu_read_unlock();
955
956out:
957 NAPI_GRO_CB(skb)->flush |= flush;
958
959 return pp;
960}
961
962static int ipv6_gro_complete(struct sk_buff *skb)
963{
964 const struct inet6_protocol *ops;
965 struct ipv6hdr *iph = ipv6_hdr(skb);
966 int err = -ENOSYS;
967
968 iph->payload_len = htons(skb->len - skb_network_offset(skb) -
969 sizeof(*iph));
970
971 rcu_read_lock();
972 ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]);
973 if (WARN_ON(!ops || !ops->gro_complete))
974 goto out_unlock;
975
976 err = ops->gro_complete(skb);
977
978out_unlock:
979 rcu_read_unlock();
980
981 return err;
982}
983
703static struct packet_type ipv6_packet_type __read_mostly = { 984static struct packet_type ipv6_packet_type __read_mostly = {
704 .type = cpu_to_be16(ETH_P_IPV6), 985 .type = cpu_to_be16(ETH_P_IPV6),
705 .func = ipv6_rcv, 986 .func = ipv6_rcv,
987 .gso_send_check = ipv6_gso_send_check,
988 .gso_segment = ipv6_gso_segment,
989 .gro_receive = ipv6_gro_receive,
990 .gro_complete = ipv6_gro_complete,
706}; 991};
707 992
708static int __init ipv6_packet_init(void) 993static int __init ipv6_packet_init(void)
@@ -734,9 +1019,9 @@ static int __net_init ipv6_init_mibs(struct net *net)
734 sizeof(struct icmpv6_mib), 1019 sizeof(struct icmpv6_mib),
735 __alignof__(struct icmpv6_mib)) < 0) 1020 __alignof__(struct icmpv6_mib)) < 0)
736 goto err_icmp_mib; 1021 goto err_icmp_mib;
737 net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), 1022 if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
738 GFP_KERNEL); 1023 sizeof(struct icmpv6msg_mib),
739 if (!net->mib.icmpv6msg_statistics) 1024 __alignof__(struct icmpv6msg_mib)) < 0)
740 goto err_icmpmsg_mib; 1025 goto err_icmpmsg_mib;
741 return 0; 1026 return 0;
742 1027
@@ -757,7 +1042,7 @@ static void ipv6_cleanup_mibs(struct net *net)
757 snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); 1042 snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
758 snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); 1043 snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
759 snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); 1044 snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
760 kfree(net->mib.icmpv6msg_statistics); 1045 snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics);
761} 1046}
762 1047
763static int __net_init inet6_net_init(struct net *net) 1048static int __net_init inet6_net_init(struct net *net)
@@ -818,11 +1103,13 @@ static int __init inet6_init(void)
818 BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); 1103 BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
819 1104
820 /* Register the socket-side information for inet6_create. */ 1105 /* Register the socket-side information for inet6_create. */
821 for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) 1106 for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
822 INIT_LIST_HEAD(r); 1107 INIT_LIST_HEAD(r);
823 1108
824 if (disable_ipv6_mod) { 1109 if (disable_ipv6_mod) {
825 pr_info("Loaded, but administratively disabled, reboot required to enable\n"); 1110 printk(KERN_INFO
1111 "IPv6: Loaded, but administratively disabled, "
1112 "reboot required to enable\n");
826 goto out; 1113 goto out;
827 } 1114 }
828 1115
@@ -857,8 +1144,11 @@ static int __init inet6_init(void)
857 if (err) 1144 if (err)
858 goto out_sock_register_fail; 1145 goto out_sock_register_fail;
859 1146
860 tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; 1147#ifdef CONFIG_SYSCTL
861 1148 err = ipv6_static_sysctl_register();
1149 if (err)
1150 goto static_sysctl_fail;
1151#endif
862 /* 1152 /*
863 * ipngwg API draft makes clear that the correct semantics 1153 * ipngwg API draft makes clear that the correct semantics
864 * for TCP and UDP is to consider one TCP and UDP instance 1154 * for TCP and UDP is to consider one TCP and UDP instance
@@ -983,6 +1273,10 @@ ipmr_fail:
983icmp_fail: 1273icmp_fail:
984 unregister_pernet_subsys(&inet6_net_ops); 1274 unregister_pernet_subsys(&inet6_net_ops);
985register_pernet_fail: 1275register_pernet_fail:
1276#ifdef CONFIG_SYSCTL
1277 ipv6_static_sysctl_unregister();
1278static_sysctl_fail:
1279#endif
986 sock_unregister(PF_INET6); 1280 sock_unregister(PF_INET6);
987 rtnl_unregister_all(PF_INET6); 1281 rtnl_unregister_all(PF_INET6);
988out_sock_register_fail: 1282out_sock_register_fail:
@@ -1009,6 +1303,9 @@ static void __exit inet6_exit(void)
1009 /* Disallow any further netlink messages */ 1303 /* Disallow any further netlink messages */
1010 rtnl_unregister_all(PF_INET6); 1304 rtnl_unregister_all(PF_INET6);
1011 1305
1306#ifdef CONFIG_SYSCTL
1307 ipv6_sysctl_unregister();
1308#endif
1012 udpv6_exit(); 1309 udpv6_exit();
1013 udplitev6_exit(); 1310 udplitev6_exit();
1014 tcpv6_exit(); 1311 tcpv6_exit();
@@ -1036,6 +1333,9 @@ static void __exit inet6_exit(void)
1036 rawv6_exit(); 1333 rawv6_exit();
1037 1334
1038 unregister_pernet_subsys(&inet6_net_ops); 1335 unregister_pernet_subsys(&inet6_net_ops);
1336#ifdef CONFIG_SYSCTL
1337 ipv6_static_sysctl_unregister();
1338#endif
1039 proto_unregister(&rawv6_prot); 1339 proto_unregister(&rawv6_prot);
1040 proto_unregister(&udplitev6_prot); 1340 proto_unregister(&udplitev6_prot);
1041 proto_unregister(&udpv6_prot); 1341 proto_unregister(&udpv6_prot);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ecc35b93314..4c0f894d084 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -24,8 +24,6 @@
24 * This file is derived from net/ipv4/ah.c. 24 * This file is derived from net/ipv4/ah.c.
25 */ 25 */
26 26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <crypto/hash.h> 27#include <crypto/hash.h>
30#include <linux/module.h> 28#include <linux/module.h>
31#include <linux/slab.h> 29#include <linux/slab.h>
@@ -35,7 +33,6 @@
35#include <linux/pfkeyv2.h> 33#include <linux/pfkeyv2.h>
36#include <linux/string.h> 34#include <linux/string.h>
37#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
38#include <net/ip6_route.h>
39#include <net/icmp.h> 36#include <net/icmp.h>
40#include <net/ipv6.h> 37#include <net/ipv6.h>
41#include <net/protocol.h> 38#include <net/protocol.h>
@@ -44,7 +41,7 @@
44#define IPV6HDR_BASELEN 8 41#define IPV6HDR_BASELEN 8
45 42
46struct tmp_ext { 43struct tmp_ext {
47#if IS_ENABLED(CONFIG_IPV6_MIP6) 44#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
48 struct in6_addr saddr; 45 struct in6_addr saddr;
49#endif 46#endif
50 struct in6_addr daddr; 47 struct in6_addr daddr;
@@ -114,7 +111,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
114 __alignof__(struct scatterlist)); 111 __alignof__(struct scatterlist));
115} 112}
116 113
117static bool zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) 114static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
118{ 115{
119 u8 *opt = (u8 *)opthdr; 116 u8 *opt = (u8 *)opthdr;
120 int len = ipv6_optlen(opthdr); 117 int len = ipv6_optlen(opthdr);
@@ -128,7 +125,7 @@ static bool zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
128 125
129 switch (opt[off]) { 126 switch (opt[off]) {
130 127
131 case IPV6_TLV_PAD1: 128 case IPV6_TLV_PAD0:
132 optlen = 1; 129 optlen = 1;
133 break; 130 break;
134 default: 131 default:
@@ -146,13 +143,13 @@ static bool zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
146 len -= optlen; 143 len -= optlen;
147 } 144 }
148 if (len == 0) 145 if (len == 0)
149 return true; 146 return 1;
150 147
151bad: 148bad:
152 return false; 149 return 0;
153} 150}
154 151
155#if IS_ENABLED(CONFIG_IPV6_MIP6) 152#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
156/** 153/**
157 * ipv6_rearrange_destopt - rearrange IPv6 destination options header 154 * ipv6_rearrange_destopt - rearrange IPv6 destination options header
158 * @iph: IPv6 header 155 * @iph: IPv6 header
@@ -172,7 +169,7 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
172 169
173 switch (opt[off]) { 170 switch (opt[off]) {
174 171
175 case IPV6_TLV_PAD1: 172 case IPV6_TLV_PAD0:
176 optlen = 1; 173 optlen = 1;
177 break; 174 break;
178 default: 175 default:
@@ -192,13 +189,13 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
192 189
193 hao = (struct ipv6_destopt_hao *)&opt[off]; 190 hao = (struct ipv6_destopt_hao *)&opt[off];
194 if (hao->length != sizeof(hao->addr)) { 191 if (hao->length != sizeof(hao->addr)) {
195 net_warn_ratelimited("destopt hao: invalid header length: %u\n", 192 if (net_ratelimit())
196 hao->length); 193 printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
197 goto bad; 194 goto bad;
198 } 195 }
199 final_addr = hao->addr; 196 ipv6_addr_copy(&final_addr, &hao->addr);
200 hao->addr = iph->saddr; 197 ipv6_addr_copy(&hao->addr, &iph->saddr);
201 iph->saddr = final_addr; 198 ipv6_addr_copy(&iph->saddr, &final_addr);
202 } 199 }
203 break; 200 break;
204 } 201 }
@@ -244,13 +241,13 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
244 segments = rthdr->hdrlen >> 1; 241 segments = rthdr->hdrlen >> 1;
245 242
246 addrs = ((struct rt0_hdr *)rthdr)->addr; 243 addrs = ((struct rt0_hdr *)rthdr)->addr;
247 final_addr = addrs[segments - 1]; 244 ipv6_addr_copy(&final_addr, addrs + segments - 1);
248 245
249 addrs += segments - segments_left; 246 addrs += segments - segments_left;
250 memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs)); 247 memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
251 248
252 addrs[0] = iph->daddr; 249 ipv6_addr_copy(addrs, &iph->daddr);
253 iph->daddr = final_addr; 250 ipv6_addr_copy(&iph->daddr, &final_addr);
254} 251}
255 252
256static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) 253static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
@@ -320,7 +317,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
320 memcpy(top_iph, iph_base, IPV6HDR_BASELEN); 317 memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
321 318
322 if (extlen) { 319 if (extlen) {
323#if IS_ENABLED(CONFIG_IPV6_MIP6) 320#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
324 memcpy(&top_iph->saddr, iph_ext, extlen); 321 memcpy(&top_iph->saddr, iph_ext, extlen);
325#else 322#else
326 memcpy(&top_iph->daddr, iph_ext, extlen); 323 memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -385,7 +382,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
385 memcpy(iph_base, top_iph, IPV6HDR_BASELEN); 382 memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
386 383
387 if (extlen) { 384 if (extlen) {
388#if IS_ENABLED(CONFIG_IPV6_MIP6) 385#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
389 memcpy(iph_ext, &top_iph->saddr, extlen); 386 memcpy(iph_ext, &top_iph->saddr, extlen);
390#else 387#else
391 memcpy(iph_ext, &top_iph->daddr, extlen); 388 memcpy(iph_ext, &top_iph->daddr, extlen);
@@ -434,7 +431,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
434 memcpy(top_iph, iph_base, IPV6HDR_BASELEN); 431 memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
435 432
436 if (extlen) { 433 if (extlen) {
437#if IS_ENABLED(CONFIG_IPV6_MIP6) 434#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
438 memcpy(&top_iph->saddr, iph_ext, extlen); 435 memcpy(&top_iph->saddr, iph_ext, extlen);
439#else 436#else
440 memcpy(&top_iph->daddr, iph_ext, extlen); 437 memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -613,18 +610,16 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
613 struct xfrm_state *x; 610 struct xfrm_state *x;
614 611
615 if (type != ICMPV6_DEST_UNREACH && 612 if (type != ICMPV6_DEST_UNREACH &&
616 type != ICMPV6_PKT_TOOBIG && 613 type != ICMPV6_PKT_TOOBIG)
617 type != NDISC_REDIRECT)
618 return; 614 return;
619 615
620 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); 616 x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
621 if (!x) 617 if (!x)
622 return; 618 return;
623 619
624 if (type == NDISC_REDIRECT) 620 NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n",
625 ip6_redirect(skb, net, 0, 0); 621 ntohl(ah->spi), &iph->daddr);
626 else 622
627 ip6_update_pmtu(skb, net, info, 0, 0);
628 xfrm_state_put(x); 623 xfrm_state_put(x);
629} 624}
630 625
@@ -664,9 +659,9 @@ static int ah6_init_state(struct xfrm_state *x)
664 659
665 if (aalg_desc->uinfo.auth.icv_fullbits/8 != 660 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
666 crypto_ahash_digestsize(ahash)) { 661 crypto_ahash_digestsize(ahash)) {
667 pr_info("AH: %s digestsize %u != %hu\n", 662 printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
668 x->aalg->alg_name, crypto_ahash_digestsize(ahash), 663 x->aalg->alg_name, crypto_ahash_digestsize(ahash),
669 aalg_desc->uinfo.auth.icv_fullbits/8); 664 aalg_desc->uinfo.auth.icv_fullbits/8);
670 goto error; 665 goto error;
671 } 666 }
672 667
@@ -732,12 +727,12 @@ static const struct inet6_protocol ah6_protocol = {
732static int __init ah6_init(void) 727static int __init ah6_init(void)
733{ 728{
734 if (xfrm_register_type(&ah6_type, AF_INET6) < 0) { 729 if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
735 pr_info("%s: can't add xfrm type\n", __func__); 730 printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
736 return -EAGAIN; 731 return -EAGAIN;
737 } 732 }
738 733
739 if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { 734 if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
740 pr_info("%s: can't add protocol\n", __func__); 735 printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
741 xfrm_unregister_type(&ah6_type, AF_INET6); 736 xfrm_unregister_type(&ah6_type, AF_INET6);
742 return -EAGAIN; 737 return -EAGAIN;
743 } 738 }
@@ -748,10 +743,10 @@ static int __init ah6_init(void)
748static void __exit ah6_fini(void) 743static void __exit ah6_fini(void)
749{ 744{
750 if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0) 745 if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
751 pr_info("%s: can't remove protocol\n", __func__); 746 printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
752 747
753 if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0) 748 if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
754 pr_info("%s: can't remove xfrm type\n", __func__); 749 printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
755 750
756} 751}
757 752
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 757a810d8f1..674255f5e6b 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -64,7 +64,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
64 int ishost = !net->ipv6.devconf_all->forwarding; 64 int ishost = !net->ipv6.devconf_all->forwarding;
65 int err = 0; 65 int err = 0;
66 66
67 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 67 if (!capable(CAP_NET_ADMIN))
68 return -EPERM; 68 return -EPERM;
69 if (ipv6_addr_is_multicast(addr)) 69 if (ipv6_addr_is_multicast(addr))
70 return -EINVAL; 70 return -EINVAL;
@@ -75,7 +75,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
75 if (pac == NULL) 75 if (pac == NULL)
76 return -ENOMEM; 76 return -ENOMEM;
77 pac->acl_next = NULL; 77 pac->acl_next = NULL;
78 pac->acl_addr = *addr; 78 ipv6_addr_copy(&pac->acl_addr, addr);
79 79
80 rcu_read_lock(); 80 rcu_read_lock();
81 if (ifindex == 0) { 81 if (ifindex == 0) {
@@ -83,8 +83,8 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
83 83
84 rt = rt6_lookup(net, addr, NULL, 0, 0); 84 rt = rt6_lookup(net, addr, NULL, 0, 0);
85 if (rt) { 85 if (rt) {
86 dev = rt->dst.dev; 86 dev = rt->rt6i_dev;
87 ip6_rt_put(rt); 87 dst_release(&rt->dst);
88 } else if (ishost) { 88 } else if (ishost) {
89 err = -EADDRNOTAVAIL; 89 err = -EADDRNOTAVAIL;
90 goto error; 90 goto error;
@@ -189,9 +189,6 @@ void ipv6_sock_ac_close(struct sock *sk)
189 struct net *net = sock_net(sk); 189 struct net *net = sock_net(sk);
190 int prev_index; 190 int prev_index;
191 191
192 if (!np->ipv6_ac_list)
193 return;
194
195 write_lock_bh(&ipv6_sk_ac_lock); 192 write_lock_bh(&ipv6_sk_ac_lock);
196 pac = np->ipv6_ac_list; 193 pac = np->ipv6_ac_list;
197 np->ipv6_ac_list = NULL; 194 np->ipv6_ac_list = NULL;
@@ -214,6 +211,35 @@ void ipv6_sock_ac_close(struct sock *sk)
214 rcu_read_unlock(); 211 rcu_read_unlock();
215} 212}
216 213
214#if 0
215/* The function is not used, which is funny. Apparently, author
216 * supposed to use it to filter out datagrams inside udp/raw but forgot.
217 *
218 * It is OK, anycasts are not special comparing to delivery to unicasts.
219 */
220
221int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
222{
223 struct ipv6_ac_socklist *pac;
224 struct ipv6_pinfo *np = inet6_sk(sk);
225 int found;
226
227 found = 0;
228 read_lock(&ipv6_sk_ac_lock);
229 for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
230 if (ifindex && pac->acl_ifindex != ifindex)
231 continue;
232 found = ipv6_addr_equal(&pac->acl_addr, addr);
233 if (found)
234 break;
235 }
236 read_unlock(&ipv6_sk_ac_lock);
237
238 return found;
239}
240
241#endif
242
217static void aca_put(struct ifacaddr6 *ac) 243static void aca_put(struct ifacaddr6 *ac)
218{ 244{
219 if (atomic_dec_and_test(&ac->aca_refcnt)) { 245 if (atomic_dec_and_test(&ac->aca_refcnt)) {
@@ -263,14 +289,14 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
263 goto out; 289 goto out;
264 } 290 }
265 291
266 rt = addrconf_dst_alloc(idev, addr, true); 292 rt = addrconf_dst_alloc(idev, addr, 1);
267 if (IS_ERR(rt)) { 293 if (IS_ERR(rt)) {
268 kfree(aca); 294 kfree(aca);
269 err = PTR_ERR(rt); 295 err = PTR_ERR(rt);
270 goto out; 296 goto out;
271 } 297 }
272 298
273 aca->aca_addr = *addr; 299 ipv6_addr_copy(&aca->aca_addr, addr);
274 aca->aca_idev = idev; 300 aca->aca_idev = idev;
275 aca->aca_rt = rt; 301 aca->aca_rt = rt;
276 aca->aca_users = 1; 302 aca->aca_users = 1;
@@ -345,7 +371,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
345 * check if the interface has this anycast address 371 * check if the interface has this anycast address
346 * called with rcu_read_lock() 372 * called with rcu_read_lock()
347 */ 373 */
348static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr) 374static int ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
349{ 375{
350 struct inet6_dev *idev; 376 struct inet6_dev *idev;
351 struct ifacaddr6 *aca; 377 struct ifacaddr6 *aca;
@@ -359,16 +385,16 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
359 read_unlock_bh(&idev->lock); 385 read_unlock_bh(&idev->lock);
360 return aca != NULL; 386 return aca != NULL;
361 } 387 }
362 return false; 388 return 0;
363} 389}
364 390
365/* 391/*
366 * check if given interface (or any, if dev==0) has this anycast address 392 * check if given interface (or any, if dev==0) has this anycast address
367 */ 393 */
368bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, 394int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
369 const struct in6_addr *addr) 395 const struct in6_addr *addr)
370{ 396{
371 bool found = false; 397 int found = 0;
372 398
373 rcu_read_lock(); 399 rcu_read_lock();
374 if (dev) 400 if (dev)
@@ -376,7 +402,7 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
376 else 402 else
377 for_each_netdev_rcu(net, dev) 403 for_each_netdev_rcu(net, dev)
378 if (ipv6_chk_acast_dev(dev, addr)) { 404 if (ipv6_chk_acast_dev(dev, addr)) {
379 found = true; 405 found = 1;
380 break; 406 break;
381 } 407 }
382 rcu_read_unlock(); 408 rcu_read_unlock();
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 8edf2601065..b46e9f88ce3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -22,7 +22,6 @@
22#include <linux/ipv6.h> 22#include <linux/ipv6.h>
23#include <linux/route.h> 23#include <linux/route.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/export.h>
26 25
27#include <net/ipv6.h> 26#include <net/ipv6.h>
28#include <net/ndisc.h> 27#include <net/ndisc.h>
@@ -34,9 +33,9 @@
34#include <linux/errqueue.h> 33#include <linux/errqueue.h>
35#include <asm/uaccess.h> 34#include <asm/uaccess.h>
36 35
37static bool ipv6_mapped_addr_any(const struct in6_addr *a) 36static inline int ipv6_mapped_addr_any(const struct in6_addr *a)
38{ 37{
39 return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); 38 return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0));
40} 39}
41 40
42int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 41int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -72,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
72 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 71 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
73 if (flowlabel == NULL) 72 if (flowlabel == NULL)
74 return -EINVAL; 73 return -EINVAL;
75 usin->sin6_addr = flowlabel->dst; 74 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
76 } 75 }
77 } 76 }
78 77
@@ -99,7 +98,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
99 sin.sin_port = usin->sin6_port; 98 sin.sin_port = usin->sin6_port;
100 99
101 err = ip4_datagram_connect(sk, 100 err = ip4_datagram_connect(sk,
102 (struct sockaddr *) &sin, 101 (struct sockaddr*) &sin,
103 sizeof(sin)); 102 sizeof(sin));
104 103
105ipv4_connected: 104ipv4_connected:
@@ -144,7 +143,7 @@ ipv4_connected:
144 } 143 }
145 } 144 }
146 145
147 np->daddr = *daddr; 146 ipv6_addr_copy(&np->daddr, daddr);
148 np->flow_label = fl6.flowlabel; 147 np->flow_label = fl6.flowlabel;
149 148
150 inet->inet_dport = usin->sin6_port; 149 inet->inet_dport = usin->sin6_port;
@@ -155,8 +154,8 @@ ipv4_connected:
155 */ 154 */
156 155
157 fl6.flowi6_proto = sk->sk_protocol; 156 fl6.flowi6_proto = sk->sk_protocol;
158 fl6.daddr = np->daddr; 157 ipv6_addr_copy(&fl6.daddr, &np->daddr);
159 fl6.saddr = np->saddr; 158 ipv6_addr_copy(&fl6.saddr, &np->saddr);
160 fl6.flowi6_oif = sk->sk_bound_dev_if; 159 fl6.flowi6_oif = sk->sk_bound_dev_if;
161 fl6.flowi6_mark = sk->sk_mark; 160 fl6.flowi6_mark = sk->sk_mark;
162 fl6.fl6_dport = inet->inet_dport; 161 fl6.fl6_dport = inet->inet_dport;
@@ -180,10 +179,10 @@ ipv4_connected:
180 /* source address lookup done in ip6_dst_lookup */ 179 /* source address lookup done in ip6_dst_lookup */
181 180
182 if (ipv6_addr_any(&np->saddr)) 181 if (ipv6_addr_any(&np->saddr))
183 np->saddr = fl6.saddr; 182 ipv6_addr_copy(&np->saddr, &fl6.saddr);
184 183
185 if (ipv6_addr_any(&np->rcv_saddr)) { 184 if (ipv6_addr_any(&np->rcv_saddr)) {
186 np->rcv_saddr = fl6.saddr; 185 ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr);
187 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 186 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
188 if (sk->sk_prot->rehash) 187 if (sk->sk_prot->rehash)
189 sk->sk_prot->rehash(sk); 188 sk->sk_prot->rehash(sk);
@@ -203,7 +202,6 @@ out:
203 fl6_sock_release(flowlabel); 202 fl6_sock_release(flowlabel);
204 return err; 203 return err;
205} 204}
206EXPORT_SYMBOL_GPL(ip6_datagram_connect);
207 205
208void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 206void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
209 __be16 port, u32 info, u8 *payload) 207 __be16 port, u32 info, u8 *payload)
@@ -259,7 +257,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
259 skb_put(skb, sizeof(struct ipv6hdr)); 257 skb_put(skb, sizeof(struct ipv6hdr));
260 skb_reset_network_header(skb); 258 skb_reset_network_header(skb);
261 iph = ipv6_hdr(skb); 259 iph = ipv6_hdr(skb);
262 iph->daddr = fl6->daddr; 260 ipv6_addr_copy(&iph->daddr, &fl6->daddr);
263 261
264 serr = SKB_EXT_ERR(skb); 262 serr = SKB_EXT_ERR(skb);
265 serr->ee.ee_errno = err; 263 serr->ee.ee_errno = err;
@@ -296,16 +294,20 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
296 skb_put(skb, sizeof(struct ipv6hdr)); 294 skb_put(skb, sizeof(struct ipv6hdr));
297 skb_reset_network_header(skb); 295 skb_reset_network_header(skb);
298 iph = ipv6_hdr(skb); 296 iph = ipv6_hdr(skb);
299 iph->daddr = fl6->daddr; 297 ipv6_addr_copy(&iph->daddr, &fl6->daddr);
300 298
301 mtu_info = IP6CBMTU(skb); 299 mtu_info = IP6CBMTU(skb);
300 if (!mtu_info) {
301 kfree_skb(skb);
302 return;
303 }
302 304
303 mtu_info->ip6m_mtu = mtu; 305 mtu_info->ip6m_mtu = mtu;
304 mtu_info->ip6m_addr.sin6_family = AF_INET6; 306 mtu_info->ip6m_addr.sin6_family = AF_INET6;
305 mtu_info->ip6m_addr.sin6_port = 0; 307 mtu_info->ip6m_addr.sin6_port = 0;
306 mtu_info->ip6m_addr.sin6_flowinfo = 0; 308 mtu_info->ip6m_addr.sin6_flowinfo = 0;
307 mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif; 309 mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
308 mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr; 310 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
309 311
310 __skb_pull(skb, skb_tail_pointer(skb) - skb->data); 312 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
311 skb_reset_transport_header(skb); 313 skb_reset_transport_header(skb);
@@ -356,8 +358,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
356 sin->sin6_port = serr->port; 358 sin->sin6_port = serr->port;
357 sin->sin6_scope_id = 0; 359 sin->sin6_scope_id = 0;
358 if (skb->protocol == htons(ETH_P_IPV6)) { 360 if (skb->protocol == htons(ETH_P_IPV6)) {
359 sin->sin6_addr = 361 ipv6_addr_copy(&sin->sin6_addr,
360 *(struct in6_addr *)(nh + serr->addr_offset); 362 (struct in6_addr *)(nh + serr->addr_offset));
361 if (np->sndflow) 363 if (np->sndflow)
362 sin->sin6_flowinfo = 364 sin->sin6_flowinfo =
363 (*(__be32 *)(nh + serr->addr_offset - 24) & 365 (*(__be32 *)(nh + serr->addr_offset - 24) &
@@ -378,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
378 sin->sin6_flowinfo = 0; 380 sin->sin6_flowinfo = 0;
379 sin->sin6_scope_id = 0; 381 sin->sin6_scope_id = 0;
380 if (skb->protocol == htons(ETH_P_IPV6)) { 382 if (skb->protocol == htons(ETH_P_IPV6)) {
381 sin->sin6_addr = ipv6_hdr(skb)->saddr; 383 ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
382 if (np->rxopt.all) 384 if (np->rxopt.all)
383 datagram_recv_ctl(sk, msg, skb); 385 datagram_recv_ctl(sk, msg, skb);
384 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 386 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -416,7 +418,6 @@ out_free_skb:
416out: 418out:
417 return err; 419 return err;
418} 420}
419EXPORT_SYMBOL_GPL(ipv6_recv_error);
420 421
421/* 422/*
422 * Handle IPV6_RECVPATHMTU 423 * Handle IPV6_RECVPATHMTU
@@ -454,7 +455,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
454 sin->sin6_flowinfo = 0; 455 sin->sin6_flowinfo = 0;
455 sin->sin6_port = 0; 456 sin->sin6_port = 0;
456 sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; 457 sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
457 sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; 458 ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
458 } 459 }
459 460
460 put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); 461 put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
@@ -478,7 +479,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
478 struct in6_pktinfo src_info; 479 struct in6_pktinfo src_info;
479 480
480 src_info.ipi6_ifindex = opt->iif; 481 src_info.ipi6_ifindex = opt->iif;
481 src_info.ipi6_addr = ipv6_hdr(skb)->daddr; 482 ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
482 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); 483 put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
483 } 484 }
484 485
@@ -488,7 +489,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
488 } 489 }
489 490
490 if (np->rxopt.bits.rxtclass) { 491 if (np->rxopt.bits.rxtclass) {
491 int tclass = ipv6_tclass(ipv6_hdr(skb)); 492 int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
492 put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); 493 put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
493 } 494 }
494 495
@@ -518,10 +519,10 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
518 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 519 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
519 520
520 while (off <= opt->lastopt) { 521 while (off <= opt->lastopt) {
521 unsigned int len; 522 unsigned len;
522 u8 *ptr = nh + off; 523 u8 *ptr = nh + off;
523 524
524 switch (nexthdr) { 525 switch(nexthdr) {
525 case IPPROTO_DSTOPTS: 526 case IPPROTO_DSTOPTS:
526 nexthdr = ptr[0]; 527 nexthdr = ptr[0];
527 len = (ptr[1] + 1) << 3; 528 len = (ptr[1] + 1) << 3;
@@ -553,7 +554,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
553 struct in6_pktinfo src_info; 554 struct in6_pktinfo src_info;
554 555
555 src_info.ipi6_ifindex = opt->iif; 556 src_info.ipi6_ifindex = opt->iif;
556 src_info.ipi6_addr = ipv6_hdr(skb)->daddr; 557 ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
557 put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); 558 put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
558 } 559 }
559 if (np->rxopt.bits.rxohlim) { 560 if (np->rxopt.bits.rxohlim) {
@@ -578,7 +579,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
578 } 579 }
579 if (np->rxopt.bits.rxorigdstaddr) { 580 if (np->rxopt.bits.rxorigdstaddr) {
580 struct sockaddr_in6 sin6; 581 struct sockaddr_in6 sin6;
581 __be16 *ports = (__be16 *) skb_transport_header(skb); 582 u16 *ports = (u16 *) skb_transport_header(skb);
582 583
583 if (skb_transport_offset(skb) + 4 <= skb->len) { 584 if (skb_transport_offset(skb) + 4 <= skb->len) {
584 /* All current transport protocols have the port numbers in the 585 /* All current transport protocols have the port numbers in the
@@ -587,7 +588,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
587 */ 588 */
588 589
589 sin6.sin6_family = AF_INET6; 590 sin6.sin6_family = AF_INET6;
590 sin6.sin6_addr = ipv6_hdr(skb)->daddr; 591 ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
591 sin6.sin6_port = ports[1]; 592 sin6.sin6_port = ports[1];
592 sin6.sin6_flowinfo = 0; 593 sin6.sin6_flowinfo = 0;
593 sin6.sin6_scope_id = 0; 594 sin6.sin6_scope_id = 0;
@@ -657,12 +658,12 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
657 658
658 if (addr_type != IPV6_ADDR_ANY) { 659 if (addr_type != IPV6_ADDR_ANY) {
659 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; 660 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
660 if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && 661 if (!inet_sk(sk)->transparent &&
661 !ipv6_chk_addr(net, &src_info->ipi6_addr, 662 !ipv6_chk_addr(net, &src_info->ipi6_addr,
662 strict ? dev : NULL, 0)) 663 strict ? dev : NULL, 0))
663 err = -EINVAL; 664 err = -EINVAL;
664 else 665 else
665 fl6->saddr = src_info->ipi6_addr; 666 ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr);
666 } 667 }
667 668
668 rcu_read_unlock(); 669 rcu_read_unlock();
@@ -701,7 +702,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
701 err = -EINVAL; 702 err = -EINVAL;
702 goto exit_f; 703 goto exit_f;
703 } 704 }
704 if (!ns_capable(net->user_ns, CAP_NET_RAW)) { 705 if (!capable(CAP_NET_RAW)) {
705 err = -EPERM; 706 err = -EPERM;
706 goto exit_f; 707 goto exit_f;
707 } 708 }
@@ -721,7 +722,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
721 err = -EINVAL; 722 err = -EINVAL;
722 goto exit_f; 723 goto exit_f;
723 } 724 }
724 if (!ns_capable(net->user_ns, CAP_NET_RAW)) { 725 if (!capable(CAP_NET_RAW)) {
725 err = -EPERM; 726 err = -EPERM;
726 goto exit_f; 727 goto exit_f;
727 } 728 }
@@ -746,7 +747,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
746 err = -EINVAL; 747 err = -EINVAL;
747 goto exit_f; 748 goto exit_f;
748 } 749 }
749 if (!ns_capable(net->user_ns, CAP_NET_RAW)) { 750 if (!capable(CAP_NET_RAW)) {
750 err = -EPERM; 751 err = -EPERM;
751 goto exit_f; 752 goto exit_f;
752 } 753 }
@@ -769,7 +770,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
769 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); 770 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
770 771
771 switch (rthdr->type) { 772 switch (rthdr->type) {
772#if IS_ENABLED(CONFIG_IPV6_MIP6) 773#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
773 case IPV6_SRCRT_TYPE_2: 774 case IPV6_SRCRT_TYPE_2:
774 if (rthdr->hdrlen != 2 || 775 if (rthdr->hdrlen != 2 ||
775 rthdr->segments_left != 1) { 776 rthdr->segments_left != 1) {
@@ -830,8 +831,9 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
830 int tc; 831 int tc;
831 832
832 err = -EINVAL; 833 err = -EINVAL;
833 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 834 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
834 goto exit_f; 835 goto exit_f;
836 }
835 837
836 tc = *(int *)CMSG_DATA(cmsg); 838 tc = *(int *)CMSG_DATA(cmsg);
837 if (tc < -1 || tc > 0xff) 839 if (tc < -1 || tc > 0xff)
@@ -848,8 +850,9 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
848 int df; 850 int df;
849 851
850 err = -EINVAL; 852 err = -EINVAL;
851 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) 853 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
852 goto exit_f; 854 goto exit_f;
855 }
853 856
854 df = *(int *)CMSG_DATA(cmsg); 857 df = *(int *)CMSG_DATA(cmsg);
855 if (df < 0 || df > 1) 858 if (df < 0 || df > 1)
@@ -871,4 +874,3 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
871exit_f: 874exit_f:
872 return err; 875 return err;
873} 876}
874EXPORT_SYMBOL_GPL(datagram_send_ctl);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 282f3723ee1..1ac7938dd9e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -24,8 +24,6 @@
24 * This file is derived from net/ipv4/esp.c 24 * This file is derived from net/ipv4/esp.c
25 */ 25 */
26 26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <crypto/aead.h> 27#include <crypto/aead.h>
30#include <crypto/authenc.h> 28#include <crypto/authenc.h>
31#include <linux/err.h> 29#include <linux/err.h>
@@ -39,7 +37,6 @@
39#include <linux/random.h> 37#include <linux/random.h>
40#include <linux/slab.h> 38#include <linux/slab.h>
41#include <linux/spinlock.h> 39#include <linux/spinlock.h>
42#include <net/ip6_route.h>
43#include <net/icmp.h> 40#include <net/icmp.h>
44#include <net/ipv6.h> 41#include <net/ipv6.h>
45#include <net/protocol.h> 42#include <net/protocol.h>
@@ -167,6 +164,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
167 struct esp_data *esp = x->data; 164 struct esp_data *esp = x->data;
168 165
169 /* skb is pure payload to encrypt */ 166 /* skb is pure payload to encrypt */
167 err = -ENOMEM;
168
170 aead = esp->aead; 169 aead = esp->aead;
171 alen = crypto_aead_authsize(aead); 170 alen = crypto_aead_authsize(aead);
172 171
@@ -201,10 +200,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
201 } 200 }
202 201
203 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 202 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
204 if (!tmp) { 203 if (!tmp)
205 err = -ENOMEM;
206 goto error; 204 goto error;
207 }
208 205
209 seqhi = esp_tmp_seqhi(tmp); 206 seqhi = esp_tmp_seqhi(tmp);
210 iv = esp_tmp_iv(aead, tmp, seqhilen); 207 iv = esp_tmp_iv(aead, tmp, seqhilen);
@@ -414,15 +411,19 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
414 struct esp_data *esp = x->data; 411 struct esp_data *esp = x->data;
415 u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); 412 u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
416 u32 align = max_t(u32, blksize, esp->padlen); 413 u32 align = max_t(u32, blksize, esp->padlen);
417 unsigned int net_adj; 414 u32 rem;
418 415
419 if (x->props.mode != XFRM_MODE_TUNNEL) 416 mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
420 net_adj = sizeof(struct ipv6hdr); 417 rem = mtu & (align - 1);
421 else 418 mtu &= ~(align - 1);
422 net_adj = 0; 419
420 if (x->props.mode != XFRM_MODE_TUNNEL) {
421 u32 padsize = ((blksize - 1) & 7) + 1;
422 mtu -= blksize - padsize;
423 mtu += min_t(u32, blksize - padsize, rem);
424 }
423 425
424 return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - 426 return mtu - 2;
425 net_adj) & ~(align - 1)) + (net_adj - 2);
426} 427}
427 428
428static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 429static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -434,19 +435,15 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
434 struct xfrm_state *x; 435 struct xfrm_state *x;
435 436
436 if (type != ICMPV6_DEST_UNREACH && 437 if (type != ICMPV6_DEST_UNREACH &&
437 type != ICMPV6_PKT_TOOBIG && 438 type != ICMPV6_PKT_TOOBIG)
438 type != NDISC_REDIRECT)
439 return; 439 return;
440 440
441 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, 441 x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
442 esph->spi, IPPROTO_ESP, AF_INET6); 442 esph->spi, IPPROTO_ESP, AF_INET6);
443 if (!x) 443 if (!x)
444 return; 444 return;
445 445 printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n",
446 if (type == NDISC_REDIRECT) 446 ntohl(esph->spi), &iph->daddr);
447 ip6_redirect(skb, net, 0, 0);
448 else
449 ip6_update_pmtu(skb, net, info, 0, 0);
450 xfrm_state_put(x); 447 xfrm_state_put(x);
451} 448}
452 449
@@ -654,11 +651,11 @@ static const struct inet6_protocol esp6_protocol = {
654static int __init esp6_init(void) 651static int __init esp6_init(void)
655{ 652{
656 if (xfrm_register_type(&esp6_type, AF_INET6) < 0) { 653 if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
657 pr_info("%s: can't add xfrm type\n", __func__); 654 printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
658 return -EAGAIN; 655 return -EAGAIN;
659 } 656 }
660 if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { 657 if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
661 pr_info("%s: can't add protocol\n", __func__); 658 printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
662 xfrm_unregister_type(&esp6_type, AF_INET6); 659 xfrm_unregister_type(&esp6_type, AF_INET6);
663 return -EAGAIN; 660 return -EAGAIN;
664 } 661 }
@@ -669,9 +666,9 @@ static int __init esp6_init(void)
669static void __exit esp6_fini(void) 666static void __exit esp6_fini(void)
670{ 667{
671 if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0) 668 if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
672 pr_info("%s: can't remove protocol\n", __func__); 669 printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
673 if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0) 670 if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
674 pr_info("%s: can't remove xfrm type\n", __func__); 671 printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
675} 672}
676 673
677module_init(esp6_init); 674module_init(esp6_init);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 473f628f9f2..79a485e8a70 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -30,7 +30,6 @@
30#include <linux/in6.h> 30#include <linux/in6.h>
31#include <linux/icmpv6.h> 31#include <linux/icmpv6.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/export.h>
34 33
35#include <net/dst.h> 34#include <net/dst.h>
36#include <net/sock.h> 35#include <net/sock.h>
@@ -43,23 +42,67 @@
43#include <net/ndisc.h> 42#include <net/ndisc.h>
44#include <net/ip6_route.h> 43#include <net/ip6_route.h>
45#include <net/addrconf.h> 44#include <net/addrconf.h>
46#if IS_ENABLED(CONFIG_IPV6_MIP6) 45#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
47#include <net/xfrm.h> 46#include <net/xfrm.h>
48#endif 47#endif
49 48
50#include <asm/uaccess.h> 49#include <asm/uaccess.h>
51 50
51int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
52{
53 const unsigned char *nh = skb_network_header(skb);
54 int packet_len = skb->tail - skb->network_header;
55 struct ipv6_opt_hdr *hdr;
56 int len;
57
58 if (offset + 2 > packet_len)
59 goto bad;
60 hdr = (struct ipv6_opt_hdr *)(nh + offset);
61 len = ((hdr->hdrlen + 1) << 3);
62
63 if (offset + len > packet_len)
64 goto bad;
65
66 offset += 2;
67 len -= 2;
68
69 while (len > 0) {
70 int opttype = nh[offset];
71 int optlen;
72
73 if (opttype == type)
74 return offset;
75
76 switch (opttype) {
77 case IPV6_TLV_PAD0:
78 optlen = 1;
79 break;
80 default:
81 optlen = nh[offset + 1] + 2;
82 if (optlen > len)
83 goto bad;
84 break;
85 }
86 offset += optlen;
87 len -= optlen;
88 }
89 /* not_found */
90 bad:
91 return -1;
92}
93EXPORT_SYMBOL_GPL(ipv6_find_tlv);
94
52/* 95/*
53 * Parsing tlv encoded headers. 96 * Parsing tlv encoded headers.
54 * 97 *
55 * Parsing function "func" returns true, if parsing succeed 98 * Parsing function "func" returns 1, if parsing succeed
56 * and false, if it failed. 99 * and 0, if it failed.
57 * It MUST NOT touch skb->h. 100 * It MUST NOT touch skb->h.
58 */ 101 */
59 102
60struct tlvtype_proc { 103struct tlvtype_proc {
61 int type; 104 int type;
62 bool (*func)(struct sk_buff *skb, int offset); 105 int (*func)(struct sk_buff *skb, int offset);
63}; 106};
64 107
65/********************* 108/*********************
@@ -68,11 +111,11 @@ struct tlvtype_proc {
68 111
69/* An unknown option is detected, decide what to do */ 112/* An unknown option is detected, decide what to do */
70 113
71static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) 114static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
72{ 115{
73 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { 116 switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
74 case 0: /* ignore */ 117 case 0: /* ignore */
75 return true; 118 return 1;
76 119
77 case 1: /* drop packet */ 120 case 1: /* drop packet */
78 break; 121 break;
@@ -85,22 +128,21 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
85 break; 128 break;
86 case 2: /* send ICMP PARM PROB regardless and drop packet */ 129 case 2: /* send ICMP PARM PROB regardless and drop packet */
87 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); 130 icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
88 return false; 131 return 0;
89 } 132 }
90 133
91 kfree_skb(skb); 134 kfree_skb(skb);
92 return false; 135 return 0;
93} 136}
94 137
95/* Parse tlv encoded option header (hop-by-hop or destination) */ 138/* Parse tlv encoded option header (hop-by-hop or destination) */
96 139
97static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) 140static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
98{ 141{
99 const struct tlvtype_proc *curr; 142 struct tlvtype_proc *curr;
100 const unsigned char *nh = skb_network_header(skb); 143 const unsigned char *nh = skb_network_header(skb);
101 int off = skb_network_header_len(skb); 144 int off = skb_network_header_len(skb);
102 int len = (skb_transport_header(skb)[1] + 1) << 3; 145 int len = (skb_transport_header(skb)[1] + 1) << 3;
103 int padlen = 0;
104 146
105 if (skb_transport_offset(skb) + len > skb_headlen(skb)) 147 if (skb_transport_offset(skb) + len > skb_headlen(skb))
106 goto bad; 148 goto bad;
@@ -110,33 +152,13 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
110 152
111 while (len > 0) { 153 while (len > 0) {
112 int optlen = nh[off + 1] + 2; 154 int optlen = nh[off + 1] + 2;
113 int i;
114 155
115 switch (nh[off]) { 156 switch (nh[off]) {
116 case IPV6_TLV_PAD1: 157 case IPV6_TLV_PAD0:
117 optlen = 1; 158 optlen = 1;
118 padlen++;
119 if (padlen > 7)
120 goto bad;
121 break; 159 break;
122 160
123 case IPV6_TLV_PADN: 161 case IPV6_TLV_PADN:
124 /* RFC 2460 states that the purpose of PadN is
125 * to align the containing header to multiples
126 * of 8. 7 is therefore the highest valid value.
127 * See also RFC 4942, Section 2.1.9.5.
128 */
129 padlen += optlen;
130 if (padlen > 7)
131 goto bad;
132 /* RFC 4942 recommends receiving hosts to
133 * actively check PadN payload to contain
134 * only zeroes.
135 */
136 for (i = 2; i < optlen; i++) {
137 if (nh[off + i] != 0)
138 goto bad;
139 }
140 break; 162 break;
141 163
142 default: /* Other TLV code so scan list */ 164 default: /* Other TLV code so scan list */
@@ -147,41 +169,33 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
147 /* type specific length/alignment 169 /* type specific length/alignment
148 checks will be performed in the 170 checks will be performed in the
149 func(). */ 171 func(). */
150 if (curr->func(skb, off) == false) 172 if (curr->func(skb, off) == 0)
151 return false; 173 return 0;
152 break; 174 break;
153 } 175 }
154 } 176 }
155 if (curr->type < 0) { 177 if (curr->type < 0) {
156 if (ip6_tlvopt_unknown(skb, off) == 0) 178 if (ip6_tlvopt_unknown(skb, off) == 0)
157 return false; 179 return 0;
158 } 180 }
159 padlen = 0;
160 break; 181 break;
161 } 182 }
162 off += optlen; 183 off += optlen;
163 len -= optlen; 184 len -= optlen;
164 } 185 }
165 /* This case will not be caught by above check since its padding
166 * length is smaller than 7:
167 * 1 byte NH + 1 byte Length + 6 bytes Padding
168 */
169 if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
170 goto bad;
171
172 if (len == 0) 186 if (len == 0)
173 return true; 187 return 1;
174bad: 188bad:
175 kfree_skb(skb); 189 kfree_skb(skb);
176 return false; 190 return 0;
177} 191}
178 192
179/***************************** 193/*****************************
180 Destination options header. 194 Destination options header.
181 *****************************/ 195 *****************************/
182 196
183#if IS_ENABLED(CONFIG_IPV6_MIP6) 197#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
184static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) 198static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
185{ 199{
186 struct ipv6_destopt_hao *hao; 200 struct ipv6_destopt_hao *hao;
187 struct inet6_skb_parm *opt = IP6CB(skb); 201 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -228,23 +242,23 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
228 if (skb->ip_summed == CHECKSUM_COMPLETE) 242 if (skb->ip_summed == CHECKSUM_COMPLETE)
229 skb->ip_summed = CHECKSUM_NONE; 243 skb->ip_summed = CHECKSUM_NONE;
230 244
231 tmp_addr = ipv6h->saddr; 245 ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
232 ipv6h->saddr = hao->addr; 246 ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
233 hao->addr = tmp_addr; 247 ipv6_addr_copy(&hao->addr, &tmp_addr);
234 248
235 if (skb->tstamp.tv64 == 0) 249 if (skb->tstamp.tv64 == 0)
236 __net_timestamp(skb); 250 __net_timestamp(skb);
237 251
238 return true; 252 return 1;
239 253
240 discard: 254 discard:
241 kfree_skb(skb); 255 kfree_skb(skb);
242 return false; 256 return 0;
243} 257}
244#endif 258#endif
245 259
246static const struct tlvtype_proc tlvprocdestopt_lst[] = { 260static struct tlvtype_proc tlvprocdestopt_lst[] = {
247#if IS_ENABLED(CONFIG_IPV6_MIP6) 261#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
248 { 262 {
249 .type = IPV6_TLV_HAO, 263 .type = IPV6_TLV_HAO,
250 .func = ipv6_dest_hao, 264 .func = ipv6_dest_hao,
@@ -256,29 +270,31 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = {
256static int ipv6_destopt_rcv(struct sk_buff *skb) 270static int ipv6_destopt_rcv(struct sk_buff *skb)
257{ 271{
258 struct inet6_skb_parm *opt = IP6CB(skb); 272 struct inet6_skb_parm *opt = IP6CB(skb);
259#if IS_ENABLED(CONFIG_IPV6_MIP6) 273#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
260 __u16 dstbuf; 274 __u16 dstbuf;
261#endif 275#endif
262 struct dst_entry *dst = skb_dst(skb); 276 struct dst_entry *dst;
263 277
264 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 278 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
265 !pskb_may_pull(skb, (skb_transport_offset(skb) + 279 !pskb_may_pull(skb, (skb_transport_offset(skb) +
266 ((skb_transport_header(skb)[1] + 1) << 3)))) { 280 ((skb_transport_header(skb)[1] + 1) << 3)))) {
267 IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), 281 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
268 IPSTATS_MIB_INHDRERRORS); 282 IPSTATS_MIB_INHDRERRORS);
269 kfree_skb(skb); 283 kfree_skb(skb);
270 return -1; 284 return -1;
271 } 285 }
272 286
273 opt->lastopt = opt->dst1 = skb_network_header_len(skb); 287 opt->lastopt = opt->dst1 = skb_network_header_len(skb);
274#if IS_ENABLED(CONFIG_IPV6_MIP6) 288#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
275 dstbuf = opt->dst1; 289 dstbuf = opt->dst1;
276#endif 290#endif
277 291
292 dst = dst_clone(skb_dst(skb));
278 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 293 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
294 dst_release(dst);
279 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 295 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
280 opt = IP6CB(skb); 296 opt = IP6CB(skb);
281#if IS_ENABLED(CONFIG_IPV6_MIP6) 297#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
282 opt->nhoff = dstbuf; 298 opt->nhoff = dstbuf;
283#else 299#else
284 opt->nhoff = opt->dst1; 300 opt->nhoff = opt->dst1;
@@ -288,6 +304,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
288 304
289 IP6_INC_STATS_BH(dev_net(dst->dev), 305 IP6_INC_STATS_BH(dev_net(dst->dev),
290 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 306 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
307 dst_release(dst);
291 return -1; 308 return -1;
292} 309}
293 310
@@ -334,7 +351,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
334looped_back: 351looped_back:
335 if (hdr->segments_left == 0) { 352 if (hdr->segments_left == 0) {
336 switch (hdr->type) { 353 switch (hdr->type) {
337#if IS_ENABLED(CONFIG_IPV6_MIP6) 354#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
338 case IPV6_SRCRT_TYPE_2: 355 case IPV6_SRCRT_TYPE_2:
339 /* Silently discard type 2 header unless it was 356 /* Silently discard type 2 header unless it was
340 * processed by own 357 * processed by own
@@ -360,7 +377,7 @@ looped_back:
360 } 377 }
361 378
362 switch (hdr->type) { 379 switch (hdr->type) {
363#if IS_ENABLED(CONFIG_IPV6_MIP6) 380#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
364 case IPV6_SRCRT_TYPE_2: 381 case IPV6_SRCRT_TYPE_2:
365 if (accept_source_route < 0) 382 if (accept_source_route < 0)
366 goto unknown_rh; 383 goto unknown_rh;
@@ -417,7 +434,7 @@ looped_back:
417 addr += i - 1; 434 addr += i - 1;
418 435
419 switch (hdr->type) { 436 switch (hdr->type) {
420#if IS_ENABLED(CONFIG_IPV6_MIP6) 437#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
421 case IPV6_SRCRT_TYPE_2: 438 case IPV6_SRCRT_TYPE_2:
422 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, 439 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
423 (xfrm_address_t *)&ipv6_hdr(skb)->saddr, 440 (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
@@ -446,9 +463,9 @@ looped_back:
446 return -1; 463 return -1;
447 } 464 }
448 465
449 daddr = *addr; 466 ipv6_addr_copy(&daddr, addr);
450 *addr = ipv6_hdr(skb)->daddr; 467 ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
451 ipv6_hdr(skb)->daddr = daddr; 468 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
452 469
453 skb_dst_drop(skb); 470 skb_dst_drop(skb);
454 ip6_route_input(skb); 471 ip6_route_input(skb);
@@ -484,12 +501,12 @@ unknown_rh:
484 501
485static const struct inet6_protocol rthdr_protocol = { 502static const struct inet6_protocol rthdr_protocol = {
486 .handler = ipv6_rthdr_rcv, 503 .handler = ipv6_rthdr_rcv,
487 .flags = INET6_PROTO_NOPOLICY, 504 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
488}; 505};
489 506
490static const struct inet6_protocol destopt_protocol = { 507static const struct inet6_protocol destopt_protocol = {
491 .handler = ipv6_destopt_rcv, 508 .handler = ipv6_destopt_rcv,
492 .flags = INET6_PROTO_NOPOLICY, 509 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
493}; 510};
494 511
495static const struct inet6_protocol nodata_protocol = { 512static const struct inet6_protocol nodata_protocol = {
@@ -515,10 +532,10 @@ int __init ipv6_exthdrs_init(void)
515 532
516out: 533out:
517 return ret; 534 return ret;
518out_destopt:
519 inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
520out_rthdr: 535out_rthdr:
521 inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); 536 inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING);
537out_destopt:
538 inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS);
522 goto out; 539 goto out;
523}; 540};
524 541
@@ -548,23 +565,23 @@ static inline struct net *ipv6_skb_net(struct sk_buff *skb)
548 565
549/* Router Alert as of RFC 2711 */ 566/* Router Alert as of RFC 2711 */
550 567
551static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) 568static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
552{ 569{
553 const unsigned char *nh = skb_network_header(skb); 570 const unsigned char *nh = skb_network_header(skb);
554 571
555 if (nh[optoff + 1] == 2) { 572 if (nh[optoff + 1] == 2) {
556 IP6CB(skb)->ra = optoff; 573 IP6CB(skb)->ra = optoff;
557 return true; 574 return 1;
558 } 575 }
559 LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", 576 LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
560 nh[optoff + 1]); 577 nh[optoff + 1]);
561 kfree_skb(skb); 578 kfree_skb(skb);
562 return false; 579 return 0;
563} 580}
564 581
565/* Jumbo payload */ 582/* Jumbo payload */
566 583
567static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) 584static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
568{ 585{
569 const unsigned char *nh = skb_network_header(skb); 586 const unsigned char *nh = skb_network_header(skb);
570 struct net *net = ipv6_skb_net(skb); 587 struct net *net = ipv6_skb_net(skb);
@@ -583,13 +600,13 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
583 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), 600 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
584 IPSTATS_MIB_INHDRERRORS); 601 IPSTATS_MIB_INHDRERRORS);
585 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); 602 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
586 return false; 603 return 0;
587 } 604 }
588 if (ipv6_hdr(skb)->payload_len) { 605 if (ipv6_hdr(skb)->payload_len) {
589 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), 606 IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
590 IPSTATS_MIB_INHDRERRORS); 607 IPSTATS_MIB_INHDRERRORS);
591 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); 608 icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
592 return false; 609 return 0;
593 } 610 }
594 611
595 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { 612 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
@@ -601,14 +618,14 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
601 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) 618 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
602 goto drop; 619 goto drop;
603 620
604 return true; 621 return 1;
605 622
606drop: 623drop:
607 kfree_skb(skb); 624 kfree_skb(skb);
608 return false; 625 return 0;
609} 626}
610 627
611static const struct tlvtype_proc tlvprochopopt_lst[] = { 628static struct tlvtype_proc tlvprochopopt_lst[] = {
612 { 629 {
613 .type = IPV6_TLV_ROUTERALERT, 630 .type = IPV6_TLV_ROUTERALERT,
614 .func = ipv6_hop_ra, 631 .func = ipv6_hop_ra,
@@ -675,7 +692,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
675 memcpy(phdr->addr, ihdr->addr + 1, 692 memcpy(phdr->addr, ihdr->addr + 1,
676 (hops - 1) * sizeof(struct in6_addr)); 693 (hops - 1) * sizeof(struct in6_addr));
677 694
678 phdr->addr[hops - 1] = **addr_p; 695 ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
679 *addr_p = ihdr->addr; 696 *addr_p = ihdr->addr;
680 697
681 phdr->rt_hdr.nexthdr = *proto; 698 phdr->rt_hdr.nexthdr = *proto;
@@ -707,6 +724,7 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
707 if (opt->hopopt) 724 if (opt->hopopt)
708 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); 725 ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
709} 726}
727
710EXPORT_SYMBOL(ipv6_push_nfrag_opts); 728EXPORT_SYMBOL(ipv6_push_nfrag_opts);
711 729
712void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) 730void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
@@ -722,19 +740,20 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
722 740
723 opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC); 741 opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
724 if (opt2) { 742 if (opt2) {
725 long dif = (char *)opt2 - (char *)opt; 743 long dif = (char*)opt2 - (char*)opt;
726 memcpy(opt2, opt, opt->tot_len); 744 memcpy(opt2, opt, opt->tot_len);
727 if (opt2->hopopt) 745 if (opt2->hopopt)
728 *((char **)&opt2->hopopt) += dif; 746 *((char**)&opt2->hopopt) += dif;
729 if (opt2->dst0opt) 747 if (opt2->dst0opt)
730 *((char **)&opt2->dst0opt) += dif; 748 *((char**)&opt2->dst0opt) += dif;
731 if (opt2->dst1opt) 749 if (opt2->dst1opt)
732 *((char **)&opt2->dst1opt) += dif; 750 *((char**)&opt2->dst1opt) += dif;
733 if (opt2->srcrt) 751 if (opt2->srcrt)
734 *((char **)&opt2->srcrt) += dif; 752 *((char**)&opt2->srcrt) += dif;
735 } 753 }
736 return opt2; 754 return opt2;
737} 755}
756
738EXPORT_SYMBOL_GPL(ipv6_dup_options); 757EXPORT_SYMBOL_GPL(ipv6_dup_options);
739 758
740static int ipv6_renew_option(void *ohdr, 759static int ipv6_renew_option(void *ohdr,
@@ -747,14 +766,14 @@ static int ipv6_renew_option(void *ohdr,
747 if (ohdr) { 766 if (ohdr) {
748 memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); 767 memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
749 *hdr = (struct ipv6_opt_hdr *)*p; 768 *hdr = (struct ipv6_opt_hdr *)*p;
750 *p += CMSG_ALIGN(ipv6_optlen(*hdr)); 769 *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr));
751 } 770 }
752 } else { 771 } else {
753 if (newopt) { 772 if (newopt) {
754 if (copy_from_user(*p, newopt, newoptlen)) 773 if (copy_from_user(*p, newopt, newoptlen))
755 return -EFAULT; 774 return -EFAULT;
756 *hdr = (struct ipv6_opt_hdr *)*p; 775 *hdr = (struct ipv6_opt_hdr *)*p;
757 if (ipv6_optlen(*hdr) > newoptlen) 776 if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen)
758 return -EINVAL; 777 return -EINVAL;
759 *p += CMSG_ALIGN(newoptlen); 778 *p += CMSG_ALIGN(newoptlen);
760 } 779 }
@@ -852,7 +871,6 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
852 871
853 return opt; 872 return opt;
854} 873}
855EXPORT_SYMBOL_GPL(ipv6_fixup_options);
856 874
857/** 875/**
858 * fl6_update_dst - update flowi destination address with info given 876 * fl6_update_dst - update flowi destination address with info given
@@ -872,8 +890,9 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
872 if (!opt || !opt->srcrt) 890 if (!opt || !opt->srcrt)
873 return NULL; 891 return NULL;
874 892
875 *orig = fl6->daddr; 893 ipv6_addr_copy(orig, &fl6->daddr);
876 fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; 894 ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr);
877 return orig; 895 return orig;
878} 896}
897
879EXPORT_SYMBOL_GPL(fl6_update_dst); 898EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index c5e83fae4df..14ed0a955b5 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -2,14 +2,13 @@
2 * IPv6 library code, needed by static components when full IPv6 support is 2 * IPv6 library code, needed by static components when full IPv6 support is
3 * not configured or static. 3 * not configured or static.
4 */ 4 */
5#include <linux/export.h>
6#include <net/ipv6.h> 5#include <net/ipv6.h>
7 6
8/* 7/*
9 * find out if nexthdr is a well-known extension header or a protocol 8 * find out if nexthdr is a well-known extension header or a protocol
10 */ 9 */
11 10
12bool ipv6_ext_hdr(u8 nexthdr) 11int ipv6_ext_hdr(u8 nexthdr)
13{ 12{
14 /* 13 /*
15 * find out if nexthdr is an extension header or a protocol 14 * find out if nexthdr is an extension header or a protocol
@@ -21,7 +20,6 @@ bool ipv6_ext_hdr(u8 nexthdr)
21 (nexthdr == NEXTHDR_NONE) || 20 (nexthdr == NEXTHDR_NONE) ||
22 (nexthdr == NEXTHDR_DEST); 21 (nexthdr == NEXTHDR_DEST);
23} 22}
24EXPORT_SYMBOL(ipv6_ext_hdr);
25 23
26/* 24/*
27 * Skip any extension headers. This is used by the ICMP module. 25 * Skip any extension headers. This is used by the ICMP module.
@@ -58,9 +56,6 @@ EXPORT_SYMBOL(ipv6_ext_hdr);
58 * it returns NULL. 56 * it returns NULL.
59 * - First fragment header is skipped, not-first ones 57 * - First fragment header is skipped, not-first ones
60 * are considered as unparsable. 58 * are considered as unparsable.
61 * - Reports the offset field of the final fragment header so it is
62 * possible to tell whether this is a first fragment, later fragment,
63 * or not fragmented.
64 * - ESP is unparsable for now and considered like 59 * - ESP is unparsable for now and considered like
65 * normal payload protocol. 60 * normal payload protocol.
66 * - Note also special handling of AUTH header. Thanks to IPsec wizards. 61 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
@@ -68,13 +63,10 @@ EXPORT_SYMBOL(ipv6_ext_hdr);
68 * --ANK (980726) 63 * --ANK (980726)
69 */ 64 */
70 65
71int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, 66int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp)
72 __be16 *frag_offp)
73{ 67{
74 u8 nexthdr = *nexthdrp; 68 u8 nexthdr = *nexthdrp;
75 69
76 *frag_offp = 0;
77
78 while (ipv6_ext_hdr(nexthdr)) { 70 while (ipv6_ext_hdr(nexthdr)) {
79 struct ipv6_opt_hdr _hdr, *hp; 71 struct ipv6_opt_hdr _hdr, *hp;
80 int hdrlen; 72 int hdrlen;
@@ -94,8 +86,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
94 if (fp == NULL) 86 if (fp == NULL)
95 return -1; 87 return -1;
96 88
97 *frag_offp = *fp; 89 if (ntohs(*fp) & ~0x7)
98 if (ntohs(*frag_offp) & ~0x7)
99 break; 90 break;
100 hdrlen = 8; 91 hdrlen = 8;
101 } else if (nexthdr == NEXTHDR_AUTH) 92 } else if (nexthdr == NEXTHDR_AUTH)
@@ -110,172 +101,6 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
110 *nexthdrp = nexthdr; 101 *nexthdrp = nexthdr;
111 return start; 102 return start;
112} 103}
113EXPORT_SYMBOL(ipv6_skip_exthdr);
114
115int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
116{
117 const unsigned char *nh = skb_network_header(skb);
118 int packet_len = skb->tail - skb->network_header;
119 struct ipv6_opt_hdr *hdr;
120 int len;
121
122 if (offset + 2 > packet_len)
123 goto bad;
124 hdr = (struct ipv6_opt_hdr *)(nh + offset);
125 len = ((hdr->hdrlen + 1) << 3);
126
127 if (offset + len > packet_len)
128 goto bad;
129
130 offset += 2;
131 len -= 2;
132
133 while (len > 0) {
134 int opttype = nh[offset];
135 int optlen;
136
137 if (opttype == type)
138 return offset;
139
140 switch (opttype) {
141 case IPV6_TLV_PAD1:
142 optlen = 1;
143 break;
144 default:
145 optlen = nh[offset + 1] + 2;
146 if (optlen > len)
147 goto bad;
148 break;
149 }
150 offset += optlen;
151 len -= optlen;
152 }
153 /* not_found */
154 bad:
155 return -1;
156}
157EXPORT_SYMBOL_GPL(ipv6_find_tlv);
158
159/*
160 * find the offset to specified header or the protocol number of last header
161 * if target < 0. "last header" is transport protocol header, ESP, or
162 * "No next header".
163 *
164 * Note that *offset is used as input/output parameter. an if it is not zero,
165 * then it must be a valid offset to an inner IPv6 header. This can be used
166 * to explore inner IPv6 header, eg. ICMPv6 error messages.
167 *
168 * If target header is found, its offset is set in *offset and return protocol
169 * number. Otherwise, return -1.
170 *
171 * If the first fragment doesn't contain the final protocol header or
172 * NEXTHDR_NONE it is considered invalid.
173 *
174 * Note that non-1st fragment is special case that "the protocol number
175 * of last header" is "next header" field in Fragment header. In this case,
176 * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
177 * isn't NULL.
178 *
179 * if flags is not NULL and it's a fragment, then the frag flag
180 * IP6_FH_F_FRAG will be set. If it's an AH header, the
181 * IP6_FH_F_AUTH flag is set and target < 0, then this function will
182 * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
183 * function will skip all those routing headers, where segements_left was 0.
184 */
185int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
186 int target, unsigned short *fragoff, int *flags)
187{
188 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
189 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
190 unsigned int len;
191 bool found;
192
193 if (fragoff)
194 *fragoff = 0;
195
196 if (*offset) {
197 struct ipv6hdr _ip6, *ip6;
198
199 ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
200 if (!ip6 || (ip6->version != 6)) {
201 printk(KERN_ERR "IPv6 header not found\n");
202 return -EBADMSG;
203 }
204 start = *offset + sizeof(struct ipv6hdr);
205 nexthdr = ip6->nexthdr;
206 }
207 len = skb->len - start;
208
209 do {
210 struct ipv6_opt_hdr _hdr, *hp;
211 unsigned int hdrlen;
212 found = (nexthdr == target);
213
214 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
215 if (target < 0)
216 break;
217 return -ENOENT;
218 }
219
220 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
221 if (hp == NULL)
222 return -EBADMSG;
223
224 if (nexthdr == NEXTHDR_ROUTING) {
225 struct ipv6_rt_hdr _rh, *rh;
226
227 rh = skb_header_pointer(skb, start, sizeof(_rh),
228 &_rh);
229 if (rh == NULL)
230 return -EBADMSG;
231
232 if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
233 rh->segments_left == 0)
234 found = false;
235 }
236
237 if (nexthdr == NEXTHDR_FRAGMENT) {
238 unsigned short _frag_off;
239 __be16 *fp;
240
241 if (flags) /* Indicate that this is a fragment */
242 *flags |= IP6_FH_F_FRAG;
243 fp = skb_header_pointer(skb,
244 start+offsetof(struct frag_hdr,
245 frag_off),
246 sizeof(_frag_off),
247 &_frag_off);
248 if (fp == NULL)
249 return -EBADMSG;
250
251 _frag_off = ntohs(*fp) & ~0x7;
252 if (_frag_off) {
253 if (target < 0 &&
254 ((!ipv6_ext_hdr(hp->nexthdr)) ||
255 hp->nexthdr == NEXTHDR_NONE)) {
256 if (fragoff)
257 *fragoff = _frag_off;
258 return hp->nexthdr;
259 }
260 return -ENOENT;
261 }
262 hdrlen = 8;
263 } else if (nexthdr == NEXTHDR_AUTH) {
264 if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
265 break;
266 hdrlen = (hp->hdrlen + 2) << 2;
267 } else
268 hdrlen = ipv6_optlen(hp);
269
270 if (!found) {
271 nexthdr = hp->nexthdr;
272 len -= hdrlen;
273 start += hdrlen;
274 }
275 } while (!found);
276
277 *offset = start;
278 return nexthdr;
279}
280EXPORT_SYMBOL(ipv6_find_hdr);
281 104
105EXPORT_SYMBOL(ipv6_ext_hdr);
106EXPORT_SYMBOL(ipv6_skip_exthdr);
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
deleted file mode 100644
index cf77f3abfd0..00000000000
--- a/net/ipv6/exthdrs_offload.c
+++ /dev/null
@@ -1,41 +0,0 @@
1/*
2 * IPV6 GSO/GRO offload support
3 * Linux INET6 implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * IPV6 Extension Header GSO/GRO support
11 */
12#include <net/protocol.h>
13#include "ip6_offload.h"
14
15static const struct net_offload rthdr_offload = {
16 .flags = INET6_PROTO_GSO_EXTHDR,
17};
18
19static const struct net_offload dstopt_offload = {
20 .flags = INET6_PROTO_GSO_EXTHDR,
21};
22
23int __init ipv6_exthdrs_offload_init(void)
24{
25 int ret;
26
27 ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
28 if (!ret)
29 goto out;
30
31 ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
32 if (!ret)
33 goto out_rt;
34
35out:
36 return ret;
37
38out_rt:
39 inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
40 goto out;
41}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432867c..34d244df907 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,7 +14,6 @@
14 */ 14 */
15 15
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <linux/export.h>
18 17
19#include <net/fib_rules.h> 18#include <net/fib_rules.h>
20#include <net/ipv6.h> 19#include <net/ipv6.h>
@@ -22,7 +21,8 @@
22#include <net/ip6_route.h> 21#include <net/ip6_route.h>
23#include <net/netlink.h> 22#include <net/netlink.h>
24 23
25struct fib6_rule { 24struct fib6_rule
25{
26 struct fib_rule common; 26 struct fib_rule common;
27 struct rt6key src; 27 struct rt6key src;
28 struct rt6key dst; 28 struct rt6key dst;
@@ -95,12 +95,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
95 if (!ipv6_prefix_equal(&saddr, &r->src.addr, 95 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
96 r->src.plen)) 96 r->src.plen))
97 goto again; 97 goto again;
98 flp6->saddr = saddr; 98 ipv6_addr_copy(&flp6->saddr, &saddr);
99 } 99 }
100 goto out; 100 goto out;
101 } 101 }
102again: 102again:
103 ip6_rt_put(rt); 103 dst_release(&rt->dst);
104 rt = NULL; 104 rt = NULL;
105 goto out; 105 goto out;
106 106
@@ -214,13 +214,14 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
214 frh->src_len = rule6->src.plen; 214 frh->src_len = rule6->src.plen;
215 frh->tos = rule6->tclass; 215 frh->tos = rule6->tclass;
216 216
217 if ((rule6->dst.plen && 217 if (rule6->dst.plen)
218 nla_put(skb, FRA_DST, sizeof(struct in6_addr), 218 NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
219 &rule6->dst.addr)) || 219 &rule6->dst.addr);
220 (rule6->src.plen && 220
221 nla_put(skb, FRA_SRC, sizeof(struct in6_addr), 221 if (rule6->src.plen)
222 &rule6->src.addr))) 222 NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
223 goto nla_put_failure; 223 &rule6->src.addr);
224
224 return 0; 225 return 0;
225 226
226nla_put_failure: 227nla_put_failure:
@@ -238,7 +239,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
238 + nla_total_size(16); /* src */ 239 + nla_total_size(16); /* src */
239} 240}
240 241
241static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { 242static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
242 .family = AF_INET6, 243 .family = AF_INET6,
243 .rule_size = sizeof(struct fib6_rule), 244 .rule_size = sizeof(struct fib6_rule),
244 .addr_size = sizeof(struct in6_addr), 245 .addr_size = sizeof(struct in6_addr),
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4a9fd51dae..11900417b1c 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -29,8 +29,6 @@
29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data 29 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
30 */ 30 */
31 31
32#define pr_fmt(fmt) "IPv6: " fmt
33
34#include <linux/module.h> 32#include <linux/module.h>
35#include <linux/errno.h> 33#include <linux/errno.h>
36#include <linux/types.h> 34#include <linux/types.h>
@@ -68,6 +66,7 @@
68#include <net/inet_common.h> 66#include <net/inet_common.h>
69 67
70#include <asm/uaccess.h> 68#include <asm/uaccess.h>
69#include <asm/system.h>
71 70
72/* 71/*
73 * The ICMP socket(s). This is the most convenient way to flow control 72 * The ICMP socket(s). This is the most convenient way to flow control
@@ -131,19 +130,18 @@ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
131 * --ANK (980726) 130 * --ANK (980726)
132 */ 131 */
133 132
134static bool is_ineligible(const struct sk_buff *skb) 133static int is_ineligible(struct sk_buff *skb)
135{ 134{
136 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 135 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
137 int len = skb->len - ptr; 136 int len = skb->len - ptr;
138 __u8 nexthdr = ipv6_hdr(skb)->nexthdr; 137 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
139 __be16 frag_off;
140 138
141 if (len < 0) 139 if (len < 0)
142 return true; 140 return 1;
143 141
144 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); 142 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
145 if (ptr < 0) 143 if (ptr < 0)
146 return false; 144 return 0;
147 if (nexthdr == IPPROTO_ICMPV6) { 145 if (nexthdr == IPPROTO_ICMPV6) {
148 u8 _type, *tp; 146 u8 _type, *tp;
149 tp = skb_header_pointer(skb, 147 tp = skb_header_pointer(skb,
@@ -151,9 +149,9 @@ static bool is_ineligible(const struct sk_buff *skb)
151 sizeof(_type), &_type); 149 sizeof(_type), &_type);
152 if (tp == NULL || 150 if (tp == NULL ||
153 !(*tp & ICMPV6_INFOMSG_MASK)) 151 !(*tp & ICMPV6_INFOMSG_MASK))
154 return true; 152 return 1;
155 } 153 }
156 return false; 154 return 0;
157} 155}
158 156
159/* 157/*
@@ -188,16 +186,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
188 } else { 186 } else {
189 struct rt6_info *rt = (struct rt6_info *)dst; 187 struct rt6_info *rt = (struct rt6_info *)dst;
190 int tmo = net->ipv6.sysctl.icmpv6_time; 188 int tmo = net->ipv6.sysctl.icmpv6_time;
191 struct inet_peer *peer;
192 189
193 /* Give more bandwidth to wider prefixes. */ 190 /* Give more bandwidth to wider prefixes. */
194 if (rt->rt6i_dst.plen < 128) 191 if (rt->rt6i_dst.plen < 128)
195 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
196 193
197 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 194 if (!rt->rt6i_peer)
198 res = inet_peer_xrlim_allow(peer, tmo); 195 rt6_bind_peer(rt, 1);
199 if (peer) 196 res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo);
200 inet_putpeer(peer);
201 } 197 }
202 dst_release(dst); 198 dst_release(dst);
203 return res; 199 return res;
@@ -210,14 +206,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
210 * highest-order two bits set to 10 206 * highest-order two bits set to 10
211 */ 207 */
212 208
213static bool opt_unrec(struct sk_buff *skb, __u32 offset) 209static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
214{ 210{
215 u8 _optval, *op; 211 u8 _optval, *op;
216 212
217 offset += skb_network_offset(skb); 213 offset += skb_network_offset(skb);
218 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); 214 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
219 if (op == NULL) 215 if (op == NULL)
220 return true; 216 return 1;
221 return (*op & 0xC0) == 0x80; 217 return (*op & 0xC0) == 0x80;
222} 218}
223 219
@@ -280,7 +276,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
280 return 0; 276 return 0;
281} 277}
282 278
283#if IS_ENABLED(CONFIG_IPV6_MIP6) 279#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
284static void mip6_addr_swap(struct sk_buff *skb) 280static void mip6_addr_swap(struct sk_buff *skb)
285{ 281{
286 struct ipv6hdr *iph = ipv6_hdr(skb); 282 struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -294,9 +290,9 @@ static void mip6_addr_swap(struct sk_buff *skb)
294 if (likely(off >= 0)) { 290 if (likely(off >= 0)) {
295 hao = (struct ipv6_destopt_hao *) 291 hao = (struct ipv6_destopt_hao *)
296 (skb_network_header(skb) + off); 292 (skb_network_header(skb) + off);
297 tmp = iph->saddr; 293 ipv6_addr_copy(&tmp, &iph->saddr);
298 iph->saddr = hao->addr; 294 ipv6_addr_copy(&iph->saddr, &hao->addr);
299 hao->addr = tmp; 295 ipv6_addr_copy(&hao->addr, &tmp);
300 } 296 }
301 } 297 }
302} 298}
@@ -448,9 +444,9 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
448 444
449 memset(&fl6, 0, sizeof(fl6)); 445 memset(&fl6, 0, sizeof(fl6));
450 fl6.flowi6_proto = IPPROTO_ICMPV6; 446 fl6.flowi6_proto = IPPROTO_ICMPV6;
451 fl6.daddr = hdr->saddr; 447 ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
452 if (saddr) 448 if (saddr)
453 fl6.saddr = *saddr; 449 ipv6_addr_copy(&fl6.saddr, saddr);
454 fl6.flowi6_oif = iif; 450 fl6.flowi6_oif = iif;
455 fl6.fl6_icmp_type = type; 451 fl6.fl6_icmp_type = type;
456 fl6.fl6_icmp_code = code; 452 fl6.fl6_icmp_code = code;
@@ -471,8 +467,6 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
471 467
472 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 468 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
473 fl6.flowi6_oif = np->mcast_oif; 469 fl6.flowi6_oif = np->mcast_oif;
474 else if (!fl6.flowi6_oif)
475 fl6.flowi6_oif = np->ucast_oif;
476 470
477 dst = icmpv6_route_lookup(net, skb, sk, &fl6); 471 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
478 if (IS_ERR(dst)) 472 if (IS_ERR(dst))
@@ -496,27 +490,29 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
496 goto out_dst_release; 490 goto out_dst_release;
497 } 491 }
498 492
499 rcu_read_lock(); 493 idev = in6_dev_get(skb->dev);
500 idev = __in6_dev_get(skb->dev);
501 494
502 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 495 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
503 len + sizeof(struct icmp6hdr), 496 len + sizeof(struct icmp6hdr),
504 sizeof(struct icmp6hdr), hlimit, 497 sizeof(struct icmp6hdr), hlimit,
505 np->tclass, NULL, &fl6, (struct rt6_info *)dst, 498 np->tclass, NULL, &fl6, (struct rt6_info*)dst,
506 MSG_DONTWAIT, np->dontfrag); 499 MSG_DONTWAIT, np->dontfrag);
507 if (err) { 500 if (err) {
508 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 501 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
509 ip6_flush_pending_frames(sk); 502 ip6_flush_pending_frames(sk);
510 } else { 503 goto out_put;
511 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
512 len + sizeof(struct icmp6hdr));
513 } 504 }
514 rcu_read_unlock(); 505 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr));
506
507out_put:
508 if (likely(idev != NULL))
509 in6_dev_put(idev);
515out_dst_release: 510out_dst_release:
516 dst_release(dst); 511 dst_release(dst);
517out: 512out:
518 icmpv6_xmit_unlock(sk); 513 icmpv6_xmit_unlock(sk);
519} 514}
515
520EXPORT_SYMBOL(icmpv6_send); 516EXPORT_SYMBOL(icmpv6_send);
521 517
522static void icmpv6_echo_reply(struct sk_buff *skb) 518static void icmpv6_echo_reply(struct sk_buff *skb)
@@ -544,9 +540,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
544 540
545 memset(&fl6, 0, sizeof(fl6)); 541 memset(&fl6, 0, sizeof(fl6));
546 fl6.flowi6_proto = IPPROTO_ICMPV6; 542 fl6.flowi6_proto = IPPROTO_ICMPV6;
547 fl6.daddr = ipv6_hdr(skb)->saddr; 543 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
548 if (saddr) 544 if (saddr)
549 fl6.saddr = *saddr; 545 ipv6_addr_copy(&fl6.saddr, saddr);
550 fl6.flowi6_oif = skb->dev->ifindex; 546 fl6.flowi6_oif = skb->dev->ifindex;
551 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; 547 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
552 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 548 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -558,8 +554,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
558 554
559 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 555 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
560 fl6.flowi6_oif = np->mcast_oif; 556 fl6.flowi6_oif = np->mcast_oif;
561 else if (!fl6.flowi6_oif)
562 fl6.flowi6_oif = np->ucast_oif;
563 557
564 err = ip6_dst_lookup(sk, &dst, &fl6); 558 err = ip6_dst_lookup(sk, &dst, &fl6);
565 if (err) 559 if (err)
@@ -575,7 +569,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
575 if (hlimit < 0) 569 if (hlimit < 0)
576 hlimit = ip6_dst_hoplimit(dst); 570 hlimit = ip6_dst_hoplimit(dst);
577 571
578 idev = __in6_dev_get(skb->dev); 572 idev = in6_dev_get(skb->dev);
579 573
580 msg.skb = skb; 574 msg.skb = skb;
581 msg.offset = 0; 575 msg.offset = 0;
@@ -583,26 +577,29 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
583 577
584 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 578 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
585 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, 579 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6,
586 (struct rt6_info *)dst, MSG_DONTWAIT, 580 (struct rt6_info*)dst, MSG_DONTWAIT,
587 np->dontfrag); 581 np->dontfrag);
588 582
589 if (err) { 583 if (err) {
590 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 584 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
591 ip6_flush_pending_frames(sk); 585 ip6_flush_pending_frames(sk);
592 } else { 586 goto out_put;
593 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
594 skb->len + sizeof(struct icmp6hdr));
595 } 587 }
588 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
589
590out_put:
591 if (likely(idev != NULL))
592 in6_dev_put(idev);
596 dst_release(dst); 593 dst_release(dst);
597out: 594out:
598 icmpv6_xmit_unlock(sk); 595 icmpv6_xmit_unlock(sk);
599} 596}
600 597
601void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) 598static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
602{ 599{
603 const struct inet6_protocol *ipprot; 600 const struct inet6_protocol *ipprot;
604 int inner_offset; 601 int inner_offset;
605 __be16 frag_off; 602 int hash;
606 u8 nexthdr; 603 u8 nexthdr;
607 604
608 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 605 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -611,8 +608,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
611 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; 608 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
612 if (ipv6_ext_hdr(nexthdr)) { 609 if (ipv6_ext_hdr(nexthdr)) {
613 /* now skip over extension headers */ 610 /* now skip over extension headers */
614 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 611 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
615 &nexthdr, &frag_off);
616 if (inner_offset<0) 612 if (inner_offset<0)
617 return; 613 return;
618 } else { 614 } else {
@@ -630,8 +626,10 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
630 --ANK (980726) 626 --ANK (980726)
631 */ 627 */
632 628
629 hash = nexthdr & (MAX_INET_PROTOS - 1);
630
633 rcu_read_lock(); 631 rcu_read_lock();
634 ipprot = rcu_dereference(inet6_protos[nexthdr]); 632 ipprot = rcu_dereference(inet6_protos[hash]);
635 if (ipprot && ipprot->err_handler) 633 if (ipprot && ipprot->err_handler)
636 ipprot->err_handler(skb, NULL, type, code, inner_offset, info); 634 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
637 rcu_read_unlock(); 635 rcu_read_unlock();
@@ -648,6 +646,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
648 struct net_device *dev = skb->dev; 646 struct net_device *dev = skb->dev;
649 struct inet6_dev *idev = __in6_dev_get(dev); 647 struct inet6_dev *idev = __in6_dev_get(dev);
650 const struct in6_addr *saddr, *daddr; 648 const struct in6_addr *saddr, *daddr;
649 const struct ipv6hdr *orig_hdr;
651 struct icmp6hdr *hdr; 650 struct icmp6hdr *hdr;
652 u8 type; 651 u8 type;
653 652
@@ -659,7 +658,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
659 XFRM_STATE_ICMP)) 658 XFRM_STATE_ICMP))
660 goto drop_no_count; 659 goto drop_no_count;
661 660
662 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) 661 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
663 goto drop_no_count; 662 goto drop_no_count;
664 663
665 nh = skb_network_offset(skb); 664 nh = skb_network_offset(skb);
@@ -720,6 +719,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
720 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 719 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
721 goto discard_it; 720 goto discard_it;
722 hdr = icmp6_hdr(skb); 721 hdr = icmp6_hdr(skb);
722 orig_hdr = (struct ipv6hdr *) (hdr + 1);
723 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
724 ntohl(hdr->icmp6_mtu));
723 725
724 /* 726 /*
725 * Drop through to notify 727 * Drop through to notify
@@ -789,8 +791,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
789 int oif) 791 int oif)
790{ 792{
791 memset(fl6, 0, sizeof(*fl6)); 793 memset(fl6, 0, sizeof(*fl6));
792 fl6->saddr = *saddr; 794 ipv6_addr_copy(&fl6->saddr, saddr);
793 fl6->daddr = *daddr; 795 ipv6_addr_copy(&fl6->daddr, daddr);
794 fl6->flowi6_proto = IPPROTO_ICMPV6; 796 fl6->flowi6_proto = IPPROTO_ICMPV6;
795 fl6->fl6_icmp_type = type; 797 fl6->fl6_icmp_type = type;
796 fl6->fl6_icmp_code = 0; 798 fl6->fl6_icmp_code = 0;
@@ -817,7 +819,9 @@ static int __net_init icmpv6_sk_init(struct net *net)
817 err = inet_ctl_sock_create(&sk, PF_INET6, 819 err = inet_ctl_sock_create(&sk, PF_INET6,
818 SOCK_RAW, IPPROTO_ICMPV6, net); 820 SOCK_RAW, IPPROTO_ICMPV6, net);
819 if (err < 0) { 821 if (err < 0) {
820 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", 822 printk(KERN_ERR
823 "Failed to initialize the ICMP6 control socket "
824 "(err %d).\n",
821 err); 825 err);
822 goto fail; 826 goto fail;
823 } 827 }
@@ -836,7 +840,8 @@ static int __net_init icmpv6_sk_init(struct net *net)
836 /* Enough space for 2 64K ICMP packets, including 840 /* Enough space for 2 64K ICMP packets, including
837 * sk_buff struct overhead. 841 * sk_buff struct overhead.
838 */ 842 */
839 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); 843 sk->sk_sndbuf =
844 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
840 } 845 }
841 return 0; 846 return 0;
842 847
@@ -876,7 +881,7 @@ int __init icmpv6_init(void)
876 return 0; 881 return 0;
877 882
878fail: 883fail:
879 pr_err("Failed to register ICMP6 protocol\n"); 884 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
880 unregister_pernet_subsys(&icmpv6_sk_ops); 885 unregister_pernet_subsys(&icmpv6_sk_ops);
881 return err; 886 return err;
882} 887}
@@ -945,6 +950,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
945 950
946 return fatal; 951 return fatal;
947} 952}
953
948EXPORT_SYMBOL(icmpv6_err_convert); 954EXPORT_SYMBOL(icmpv6_err_convert);
949 955
950#ifdef CONFIG_SYSCTL 956#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 30647857a37..8a58e8cf664 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -28,7 +28,7 @@
28#include <net/inet6_connection_sock.h> 28#include <net/inet6_connection_sock.h>
29 29
30int inet6_csk_bind_conflict(const struct sock *sk, 30int inet6_csk_bind_conflict(const struct sock *sk,
31 const struct inet_bind_bucket *tb, bool relax) 31 const struct inet_bind_bucket *tb)
32{ 32{
33 const struct sock *sk2; 33 const struct sock *sk2;
34 const struct hlist_node *node; 34 const struct hlist_node *node;
@@ -55,26 +55,26 @@ int inet6_csk_bind_conflict(const struct sock *sk,
55EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); 55EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
56 56
57struct dst_entry *inet6_csk_route_req(struct sock *sk, 57struct dst_entry *inet6_csk_route_req(struct sock *sk,
58 struct flowi6 *fl6,
59 const struct request_sock *req) 58 const struct request_sock *req)
60{ 59{
61 struct inet6_request_sock *treq = inet6_rsk(req); 60 struct inet6_request_sock *treq = inet6_rsk(req);
62 struct ipv6_pinfo *np = inet6_sk(sk); 61 struct ipv6_pinfo *np = inet6_sk(sk);
63 struct in6_addr *final_p, final; 62 struct in6_addr *final_p, final;
64 struct dst_entry *dst; 63 struct dst_entry *dst;
64 struct flowi6 fl6;
65 65
66 memset(fl6, 0, sizeof(*fl6)); 66 memset(&fl6, 0, sizeof(fl6));
67 fl6->flowi6_proto = IPPROTO_TCP; 67 fl6.flowi6_proto = IPPROTO_TCP;
68 fl6->daddr = treq->rmt_addr; 68 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
69 final_p = fl6_update_dst(fl6, np->opt, &final); 69 final_p = fl6_update_dst(&fl6, np->opt, &final);
70 fl6->saddr = treq->loc_addr; 70 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
71 fl6->flowi6_oif = treq->iif; 71 fl6.flowi6_oif = sk->sk_bound_dev_if;
72 fl6->flowi6_mark = sk->sk_mark; 72 fl6.flowi6_mark = sk->sk_mark;
73 fl6->fl6_dport = inet_rsk(req)->rmt_port; 73 fl6.fl6_dport = inet_rsk(req)->rmt_port;
74 fl6->fl6_sport = inet_rsk(req)->loc_port; 74 fl6.fl6_sport = inet_rsk(req)->loc_port;
75 security_req_classify_flow(req, flowi6_to_flowi(fl6)); 75 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
76 76
77 dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); 77 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
78 if (IS_ERR(dst)) 78 if (IS_ERR(dst))
79 return NULL; 79 return NULL;
80 80
@@ -85,7 +85,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
85 * request_sock (formerly open request) hash tables. 85 * request_sock (formerly open request) hash tables.
86 */ 86 */
87static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, 87static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
88 const u32 rnd, const u32 synq_hsize) 88 const u32 rnd, const u16 synq_hsize)
89{ 89{
90 u32 c; 90 u32 c;
91 91
@@ -157,7 +157,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
157 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; 157 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
158 158
159 sin6->sin6_family = AF_INET6; 159 sin6->sin6_family = AF_INET6;
160 sin6->sin6_addr = np->daddr; 160 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
161 sin6->sin6_port = inet_sk(sk)->inet_dport; 161 sin6->sin6_port = inet_sk(sk)->inet_dport;
162 /* We do not store received flowlabel for TCP */ 162 /* We do not store received flowlabel for TCP */
163 sin6->sin6_flowinfo = 0; 163 sin6->sin6_flowinfo = 0;
@@ -171,88 +171,82 @@ EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
171 171
172static inline 172static inline
173void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, 173void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
174 const struct in6_addr *daddr, 174 struct in6_addr *daddr, struct in6_addr *saddr)
175 const struct in6_addr *saddr)
176{ 175{
177 __ip6_dst_store(sk, dst, daddr, saddr); 176 __ip6_dst_store(sk, dst, daddr, saddr);
177
178#ifdef CONFIG_XFRM
179 {
180 struct rt6_info *rt = (struct rt6_info *)dst;
181 rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
182 }
183#endif
178} 184}
179 185
180static inline 186static inline
181struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) 187struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
182{ 188{
183 return __sk_dst_check(sk, cookie);
184}
185
186static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
187 struct flowi6 *fl6)
188{
189 struct inet_sock *inet = inet_sk(sk);
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *final_p, final;
192 struct dst_entry *dst; 189 struct dst_entry *dst;
193 190
194 memset(fl6, 0, sizeof(*fl6)); 191 dst = __sk_dst_check(sk, cookie);
195 fl6->flowi6_proto = sk->sk_protocol;
196 fl6->daddr = np->daddr;
197 fl6->saddr = np->saddr;
198 fl6->flowlabel = np->flow_label;
199 IP6_ECN_flow_xmit(sk, fl6->flowlabel);
200 fl6->flowi6_oif = sk->sk_bound_dev_if;
201 fl6->flowi6_mark = sk->sk_mark;
202 fl6->fl6_sport = inet->inet_sport;
203 fl6->fl6_dport = inet->inet_dport;
204 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
205 192
206 final_p = fl6_update_dst(fl6, np->opt, &final); 193#ifdef CONFIG_XFRM
207 194 if (dst) {
208 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 195 struct rt6_info *rt = (struct rt6_info *)dst;
209 if (!dst) { 196 if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
210 dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); 197 __sk_dst_reset(sk);
211 198 dst = NULL;
212 if (!IS_ERR(dst)) 199 }
213 __inet6_csk_dst_store(sk, dst, NULL, NULL);
214 } 200 }
201#endif
202
215 return dst; 203 return dst;
216} 204}
217 205
218int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) 206int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
219{ 207{
220 struct sock *sk = skb->sk; 208 struct sock *sk = skb->sk;
209 struct inet_sock *inet = inet_sk(sk);
221 struct ipv6_pinfo *np = inet6_sk(sk); 210 struct ipv6_pinfo *np = inet6_sk(sk);
222 struct flowi6 fl6; 211 struct flowi6 fl6;
223 struct dst_entry *dst; 212 struct dst_entry *dst;
224 int res; 213 struct in6_addr *final_p, final;
225
226 dst = inet6_csk_route_socket(sk, &fl6);
227 if (IS_ERR(dst)) {
228 sk->sk_err_soft = -PTR_ERR(dst);
229 sk->sk_route_caps = 0;
230 kfree_skb(skb);
231 return PTR_ERR(dst);
232 }
233 214
234 rcu_read_lock(); 215 memset(&fl6, 0, sizeof(fl6));
235 skb_dst_set_noref(skb, dst); 216 fl6.flowi6_proto = sk->sk_protocol;
217 ipv6_addr_copy(&fl6.daddr, &np->daddr);
218 ipv6_addr_copy(&fl6.saddr, &np->saddr);
219 fl6.flowlabel = np->flow_label;
220 IP6_ECN_flow_xmit(sk, fl6.flowlabel);
221 fl6.flowi6_oif = sk->sk_bound_dev_if;
222 fl6.flowi6_mark = sk->sk_mark;
223 fl6.fl6_sport = inet->inet_sport;
224 fl6.fl6_dport = inet->inet_dport;
225 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
236 226
237 /* Restore final destination back after routing done */ 227 final_p = fl6_update_dst(&fl6, np->opt, &final);
238 fl6.daddr = np->daddr;
239 228
240 res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); 229 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
241 rcu_read_unlock();
242 return res;
243}
244EXPORT_SYMBOL_GPL(inet6_csk_xmit);
245 230
246struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) 231 if (dst == NULL) {
247{ 232 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
248 struct flowi6 fl6;
249 struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6);
250 233
251 if (IS_ERR(dst)) 234 if (IS_ERR(dst)) {
252 return NULL; 235 sk->sk_err_soft = -PTR_ERR(dst);
253 dst->ops->update_pmtu(dst, sk, NULL, mtu); 236 sk->sk_route_caps = 0;
237 kfree_skb(skb);
238 return PTR_ERR(dst);
239 }
240
241 __inet6_csk_dst_store(sk, dst, NULL, NULL);
242 }
243
244 skb_dst_set(skb, dst_clone(dst));
254 245
255 dst = inet6_csk_route_socket(sk, &fl6); 246 /* Restore final destination back after routing done */
256 return IS_ERR(dst) ? NULL : dst; 247 ipv6_addr_copy(&fl6.daddr, &np->daddr);
248
249 return ip6_xmit(sk, skb, &fl6, np->opt);
257} 250}
258EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); 251
252EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index dea17fd28e5..73f1a00a96a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -87,13 +87,11 @@ struct sock *__inet6_lookup_established(struct net *net,
87 rcu_read_lock(); 87 rcu_read_lock();
88begin: 88begin:
89 sk_nulls_for_each_rcu(sk, node, &head->chain) { 89 sk_nulls_for_each_rcu(sk, node, &head->chain) {
90 if (sk->sk_hash != hash) 90 /* For IPV6 do the cheaper port and family tests first. */
91 continue; 91 if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
92 if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
93 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 92 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
94 goto begintw; 93 goto begintw;
95 if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, 94 if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
96 ports, dif))) {
97 sock_put(sk); 95 sock_put(sk);
98 goto begin; 96 goto begin;
99 } 97 }
@@ -106,16 +104,12 @@ begin:
106begintw: 104begintw:
107 /* Must check for a TIME_WAIT'er before going to listener hash. */ 105 /* Must check for a TIME_WAIT'er before going to listener hash. */
108 sk_nulls_for_each_rcu(sk, node, &head->twchain) { 106 sk_nulls_for_each_rcu(sk, node, &head->twchain) {
109 if (sk->sk_hash != hash) 107 if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
110 continue;
111 if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
112 ports, dif))) {
113 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { 108 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
114 sk = NULL; 109 sk = NULL;
115 goto out; 110 goto out;
116 } 111 }
117 if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, 112 if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) {
118 ports, dif))) {
119 sock_put(sk); 113 sock_put(sk);
120 goto begintw; 114 goto begintw;
121 } 115 }
@@ -242,12 +236,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
242 236
243 /* Check TIME-WAIT sockets first. */ 237 /* Check TIME-WAIT sockets first. */
244 sk_nulls_for_each(sk2, node, &head->twchain) { 238 sk_nulls_for_each(sk2, node, &head->twchain) {
245 if (sk2->sk_hash != hash) 239 tw = inet_twsk(sk2);
246 continue;
247 240
248 if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, 241 if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
249 ports, dif))) {
250 tw = inet_twsk(sk2);
251 if (twsk_unique(sk, sk2, twp)) 242 if (twsk_unique(sk, sk2, twp))
252 goto unique; 243 goto unique;
253 else 244 else
@@ -258,9 +249,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
258 249
259 /* And established part... */ 250 /* And established part... */
260 sk_nulls_for_each(sk2, node, &head->chain) { 251 sk_nulls_for_each(sk2, node, &head->chain) {
261 if (sk2->sk_hash != hash) 252 if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
262 continue;
263 if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
264 goto not_unique; 253 goto not_unique;
265 } 254 }
266 255
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 710cafd2e1a..320d91d20ad 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -18,9 +18,6 @@
18 * routing table. 18 * routing table.
19 * Ville Nuorvala: Fixed routing subtrees. 19 * Ville Nuorvala: Fixed routing subtrees.
20 */ 20 */
21
22#define pr_fmt(fmt) "IPv6: " fmt
23
24#include <linux/errno.h> 21#include <linux/errno.h>
25#include <linux/types.h> 22#include <linux/types.h>
26#include <linux/net.h> 23#include <linux/net.h>
@@ -31,6 +28,10 @@
31#include <linux/list.h> 28#include <linux/list.h>
32#include <linux/slab.h> 29#include <linux/slab.h>
33 30
31#ifdef CONFIG_PROC_FS
32#include <linux/proc_fs.h>
33#endif
34
34#include <net/ipv6.h> 35#include <net/ipv6.h>
35#include <net/ndisc.h> 36#include <net/ndisc.h>
36#include <net/addrconf.h> 37#include <net/addrconf.h>
@@ -41,7 +42,7 @@
41#define RT6_DEBUG 2 42#define RT6_DEBUG 2
42 43
43#if RT6_DEBUG >= 3 44#if RT6_DEBUG >= 3
44#define RT6_TRACE(x...) pr_debug(x) 45#define RT6_TRACE(x...) printk(KERN_DEBUG x)
45#else 46#else
46#define RT6_TRACE(x...) do { ; } while (0) 47#define RT6_TRACE(x...) do { ; } while (0)
47#endif 48#endif
@@ -193,11 +194,10 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
193 struct fib6_table *table; 194 struct fib6_table *table;
194 195
195 table = kzalloc(sizeof(*table), GFP_ATOMIC); 196 table = kzalloc(sizeof(*table), GFP_ATOMIC);
196 if (table) { 197 if (table != NULL) {
197 table->tb6_id = id; 198 table->tb6_id = id;
198 table->tb6_root.leaf = net->ipv6.ip6_null_entry; 199 table->tb6_root.leaf = net->ipv6.ip6_null_entry;
199 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 200 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
200 inet_peer_base_init(&table->tb6_peers);
201 } 201 }
202 202
203 return table; 203 return table;
@@ -214,7 +214,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id)
214 return tb; 214 return tb;
215 215
216 tb = fib6_alloc_table(net, id); 216 tb = fib6_alloc_table(net, id);
217 if (tb) 217 if (tb != NULL)
218 fib6_link_table(net, tb); 218 fib6_link_table(net, tb);
219 219
220 return tb; 220 return tb;
@@ -371,7 +371,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
371 s_e = cb->args[1]; 371 s_e = cb->args[1];
372 372
373 w = (void *)cb->args[2]; 373 w = (void *)cb->args[2];
374 if (!w) { 374 if (w == NULL) {
375 /* New dump: 375 /* New dump:
376 * 376 *
377 * 1. hook callback destructor. 377 * 1. hook callback destructor.
@@ -383,7 +383,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
383 * 2. allocate and initialize walker. 383 * 2. allocate and initialize walker.
384 */ 384 */
385 w = kzalloc(sizeof(*w), GFP_ATOMIC); 385 w = kzalloc(sizeof(*w), GFP_ATOMIC);
386 if (!w) 386 if (w == NULL)
387 return -ENOMEM; 387 return -ENOMEM;
388 w->func = fib6_dump_node; 388 w->func = fib6_dump_node;
389 cb->args[2] = (long)w; 389 cb->args[2] = (long)w;
@@ -429,8 +429,7 @@ out:
429 429
430static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, 430static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
431 int addrlen, int plen, 431 int addrlen, int plen,
432 int offset, int allow_create, 432 int offset)
433 int replace_required)
434{ 433{
435 struct fib6_node *fn, *in, *ln; 434 struct fib6_node *fn, *in, *ln;
436 struct fib6_node *pn = NULL; 435 struct fib6_node *pn = NULL;
@@ -452,16 +451,8 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
452 * Prefix match 451 * Prefix match
453 */ 452 */
454 if (plen < fn->fn_bit || 453 if (plen < fn->fn_bit ||
455 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { 454 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
456 if (!allow_create) {
457 if (replace_required) {
458 pr_warn("Can't replace route, no match found\n");
459 return ERR_PTR(-ENOENT);
460 }
461 pr_warn("NLM_F_CREATE should be set when creating new route\n");
462 }
463 goto insert_above; 455 goto insert_above;
464 }
465 456
466 /* 457 /*
467 * Exact match ? 458 * Exact match ?
@@ -469,7 +460,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
469 460
470 if (plen == fn->fn_bit) { 461 if (plen == fn->fn_bit) {
471 /* clean up an intermediate node */ 462 /* clean up an intermediate node */
472 if (!(fn->fn_flags & RTN_RTINFO)) { 463 if ((fn->fn_flags & RTN_RTINFO) == 0) {
473 rt6_release(fn->leaf); 464 rt6_release(fn->leaf);
474 fn->leaf = NULL; 465 fn->leaf = NULL;
475 } 466 }
@@ -490,22 +481,6 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
490 fn = dir ? fn->right: fn->left; 481 fn = dir ? fn->right: fn->left;
491 } while (fn); 482 } while (fn);
492 483
493 if (!allow_create) {
494 /* We should not create new node because
495 * NLM_F_REPLACE was specified without NLM_F_CREATE
496 * I assume it is safe to require NLM_F_CREATE when
497 * REPLACE flag is used! Later we may want to remove the
498 * check for replace_required, because according
499 * to netlink specification, NLM_F_CREATE
500 * MUST be specified if new route is created.
501 * That would keep IPv6 consistent with IPv4
502 */
503 if (replace_required) {
504 pr_warn("Can't replace route, no match found\n");
505 return ERR_PTR(-ENOENT);
506 }
507 pr_warn("NLM_F_CREATE should be set when creating new route\n");
508 }
509 /* 484 /*
510 * We walked to the bottom of tree. 485 * We walked to the bottom of tree.
511 * Create new leaf node without children. 486 * Create new leaf node without children.
@@ -513,8 +488,8 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
513 488
514 ln = node_alloc(); 489 ln = node_alloc();
515 490
516 if (!ln) 491 if (ln == NULL)
517 return ERR_PTR(-ENOMEM); 492 return NULL;
518 ln->fn_bit = plen; 493 ln->fn_bit = plen;
519 494
520 ln->parent = pn; 495 ln->parent = pn;
@@ -556,12 +531,12 @@ insert_above:
556 in = node_alloc(); 531 in = node_alloc();
557 ln = node_alloc(); 532 ln = node_alloc();
558 533
559 if (!in || !ln) { 534 if (in == NULL || ln == NULL) {
560 if (in) 535 if (in)
561 node_free(in); 536 node_free(in);
562 if (ln) 537 if (ln)
563 node_free(ln); 538 node_free(ln);
564 return ERR_PTR(-ENOMEM); 539 return NULL;
565 } 540 }
566 541
567 /* 542 /*
@@ -610,8 +585,8 @@ insert_above:
610 585
611 ln = node_alloc(); 586 ln = node_alloc();
612 587
613 if (!ln) 588 if (ln == NULL)
614 return ERR_PTR(-ENOMEM); 589 return NULL;
615 590
616 ln->fn_bit = plen; 591 ln->fn_bit = plen;
617 592
@@ -643,15 +618,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
643{ 618{
644 struct rt6_info *iter = NULL; 619 struct rt6_info *iter = NULL;
645 struct rt6_info **ins; 620 struct rt6_info **ins;
646 int replace = (info->nlh &&
647 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
648 int add = (!info->nlh ||
649 (info->nlh->nlmsg_flags & NLM_F_CREATE));
650 int found = 0;
651 621
652 ins = &fn->leaf; 622 ins = &fn->leaf;
653 623
654 for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { 624 for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
655 /* 625 /*
656 * Search for duplicates 626 * Search for duplicates
657 */ 627 */
@@ -660,43 +630,20 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
660 /* 630 /*
661 * Same priority level 631 * Same priority level
662 */ 632 */
663 if (info->nlh &&
664 (info->nlh->nlmsg_flags & NLM_F_EXCL))
665 return -EEXIST;
666 if (replace) {
667 found++;
668 break;
669 }
670 633
671 if (iter->dst.dev == rt->dst.dev && 634 if (iter->rt6i_dev == rt->rt6i_dev &&
672 iter->rt6i_idev == rt->rt6i_idev && 635 iter->rt6i_idev == rt->rt6i_idev &&
673 ipv6_addr_equal(&iter->rt6i_gateway, 636 ipv6_addr_equal(&iter->rt6i_gateway,
674 &rt->rt6i_gateway)) { 637 &rt->rt6i_gateway)) {
675 if (rt->rt6i_nsiblings) 638 if (!(iter->rt6i_flags&RTF_EXPIRES))
676 rt->rt6i_nsiblings = 0;
677 if (!(iter->rt6i_flags & RTF_EXPIRES))
678 return -EEXIST; 639 return -EEXIST;
679 if (!(rt->rt6i_flags & RTF_EXPIRES)) 640 iter->rt6i_expires = rt->rt6i_expires;
680 rt6_clean_expires(iter); 641 if (!(rt->rt6i_flags&RTF_EXPIRES)) {
681 else 642 iter->rt6i_flags &= ~RTF_EXPIRES;
682 rt6_set_expires(iter, rt->dst.expires); 643 iter->rt6i_expires = 0;
644 }
683 return -EEXIST; 645 return -EEXIST;
684 } 646 }
685 /* If we have the same destination and the same metric,
686 * but not the same gateway, then the route we try to
687 * add is sibling to this route, increment our counter
688 * of siblings, and later we will add our route to the
689 * list.
690 * Only static routes (which don't have flag
691 * RTF_EXPIRES) are used for ECMPv6.
692 *
693 * To avoid long list, we only had siblings if the
694 * route have a gateway.
695 */
696 if (rt->rt6i_flags & RTF_GATEWAY &&
697 !(rt->rt6i_flags & RTF_EXPIRES) &&
698 !(iter->rt6i_flags & RTF_EXPIRES))
699 rt->rt6i_nsiblings++;
700 } 647 }
701 648
702 if (iter->rt6i_metric > rt->rt6i_metric) 649 if (iter->rt6i_metric > rt->rt6i_metric)
@@ -709,72 +656,20 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
709 if (ins == &fn->leaf) 656 if (ins == &fn->leaf)
710 fn->rr_ptr = NULL; 657 fn->rr_ptr = NULL;
711 658
712 /* Link this route to others same route. */
713 if (rt->rt6i_nsiblings) {
714 unsigned int rt6i_nsiblings;
715 struct rt6_info *sibling, *temp_sibling;
716
717 /* Find the first route that have the same metric */
718 sibling = fn->leaf;
719 while (sibling) {
720 if (sibling->rt6i_metric == rt->rt6i_metric) {
721 list_add_tail(&rt->rt6i_siblings,
722 &sibling->rt6i_siblings);
723 break;
724 }
725 sibling = sibling->dst.rt6_next;
726 }
727 /* For each sibling in the list, increment the counter of
728 * siblings. BUG() if counters does not match, list of siblings
729 * is broken!
730 */
731 rt6i_nsiblings = 0;
732 list_for_each_entry_safe(sibling, temp_sibling,
733 &rt->rt6i_siblings, rt6i_siblings) {
734 sibling->rt6i_nsiblings++;
735 BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
736 rt6i_nsiblings++;
737 }
738 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
739 }
740
741 /* 659 /*
742 * insert node 660 * insert node
743 */ 661 */
744 if (!replace) {
745 if (!add)
746 pr_warn("NLM_F_CREATE should be set when creating new route\n");
747
748add:
749 rt->dst.rt6_next = iter;
750 *ins = rt;
751 rt->rt6i_node = fn;
752 atomic_inc(&rt->rt6i_ref);
753 inet6_rt_notify(RTM_NEWROUTE, rt, info);
754 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
755
756 if (!(fn->fn_flags & RTN_RTINFO)) {
757 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
758 fn->fn_flags |= RTN_RTINFO;
759 }
760 662
761 } else { 663 rt->dst.rt6_next = iter;
762 if (!found) { 664 *ins = rt;
763 if (add) 665 rt->rt6i_node = fn;
764 goto add; 666 atomic_inc(&rt->rt6i_ref);
765 pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); 667 inet6_rt_notify(RTM_NEWROUTE, rt, info);
766 return -ENOENT; 668 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
767 } 669
768 *ins = rt; 670 if ((fn->fn_flags & RTN_RTINFO) == 0) {
769 rt->rt6i_node = fn; 671 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
770 rt->dst.rt6_next = iter->dst.rt6_next; 672 fn->fn_flags |= RTN_RTINFO;
771 atomic_inc(&rt->rt6i_ref);
772 inet6_rt_notify(RTM_NEWROUTE, rt, info);
773 rt6_release(iter);
774 if (!(fn->fn_flags & RTN_RTINFO)) {
775 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
776 fn->fn_flags |= RTN_RTINFO;
777 }
778 } 673 }
779 674
780 return 0; 675 return 0;
@@ -783,7 +678,7 @@ add:
783static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) 678static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt)
784{ 679{
785 if (!timer_pending(&net->ipv6.ip6_fib_timer) && 680 if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
786 (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) 681 (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
787 mod_timer(&net->ipv6.ip6_fib_timer, 682 mod_timer(&net->ipv6.ip6_fib_timer,
788 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 683 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
789} 684}
@@ -805,26 +700,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
805{ 700{
806 struct fib6_node *fn, *pn = NULL; 701 struct fib6_node *fn, *pn = NULL;
807 int err = -ENOMEM; 702 int err = -ENOMEM;
808 int allow_create = 1;
809 int replace_required = 0;
810
811 if (info->nlh) {
812 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
813 allow_create = 0;
814 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
815 replace_required = 1;
816 }
817 if (!allow_create && !replace_required)
818 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
819 703
820 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), 704 fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
821 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), 705 rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
822 allow_create, replace_required);
823 706
824 if (IS_ERR(fn)) { 707 if (fn == NULL)
825 err = PTR_ERR(fn);
826 goto out; 708 goto out;
827 }
828 709
829 pn = fn; 710 pn = fn;
830 711
@@ -832,7 +713,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
832 if (rt->rt6i_src.plen) { 713 if (rt->rt6i_src.plen) {
833 struct fib6_node *sn; 714 struct fib6_node *sn;
834 715
835 if (!fn->subtree) { 716 if (fn->subtree == NULL) {
836 struct fib6_node *sfn; 717 struct fib6_node *sfn;
837 718
838 /* 719 /*
@@ -847,7 +728,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
847 728
848 /* Create subtree root node */ 729 /* Create subtree root node */
849 sfn = node_alloc(); 730 sfn = node_alloc();
850 if (!sfn) 731 if (sfn == NULL)
851 goto st_failure; 732 goto st_failure;
852 733
853 sfn->leaf = info->nl_net->ipv6.ip6_null_entry; 734 sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
@@ -859,16 +740,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
859 740
860 sn = fib6_add_1(sfn, &rt->rt6i_src.addr, 741 sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
861 sizeof(struct in6_addr), rt->rt6i_src.plen, 742 sizeof(struct in6_addr), rt->rt6i_src.plen,
862 offsetof(struct rt6_info, rt6i_src), 743 offsetof(struct rt6_info, rt6i_src));
863 allow_create, replace_required);
864 744
865 if (IS_ERR(sn)) { 745 if (sn == NULL) {
866 /* If it is failed, discard just allocated 746 /* If it is failed, discard just allocated
867 root, and then (in st_failure) stale node 747 root, and then (in st_failure) stale node
868 in main tree. 748 in main tree.
869 */ 749 */
870 node_free(sfn); 750 node_free(sfn);
871 err = PTR_ERR(sn);
872 goto st_failure; 751 goto st_failure;
873 } 752 }
874 753
@@ -878,16 +757,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
878 } else { 757 } else {
879 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, 758 sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
880 sizeof(struct in6_addr), rt->rt6i_src.plen, 759 sizeof(struct in6_addr), rt->rt6i_src.plen,
881 offsetof(struct rt6_info, rt6i_src), 760 offsetof(struct rt6_info, rt6i_src));
882 allow_create, replace_required);
883 761
884 if (IS_ERR(sn)) { 762 if (sn == NULL)
885 err = PTR_ERR(sn);
886 goto st_failure; 763 goto st_failure;
887 }
888 } 764 }
889 765
890 if (!fn->leaf) { 766 if (fn->leaf == NULL) {
891 fn->leaf = rt; 767 fn->leaf = rt;
892 atomic_inc(&rt->rt6i_ref); 768 atomic_inc(&rt->rt6i_ref);
893 } 769 }
@@ -896,9 +772,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
896#endif 772#endif
897 773
898 err = fib6_add_rt2node(fn, rt, info); 774 err = fib6_add_rt2node(fn, rt, info);
899 if (!err) { 775
776 if (err == 0) {
900 fib6_start_gc(info->nl_net, rt); 777 fib6_start_gc(info->nl_net, rt);
901 if (!(rt->rt6i_flags & RTF_CACHE)) 778 if (!(rt->rt6i_flags&RTF_CACHE))
902 fib6_prune_clones(info->nl_net, pn, rt); 779 fib6_prune_clones(info->nl_net, pn, rt);
903 } 780 }
904 781
@@ -946,7 +823,7 @@ st_failure:
946 */ 823 */
947 824
948struct lookup_args { 825struct lookup_args {
949 int offset; /* key offset on rt6_info */ 826 int offset; /* key offset on rt6_info */
950 const struct in6_addr *addr; /* search key */ 827 const struct in6_addr *addr; /* search key */
951}; 828};
952 829
@@ -976,10 +853,11 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
976 fn = next; 853 fn = next;
977 continue; 854 continue;
978 } 855 }
856
979 break; 857 break;
980 } 858 }
981 859
982 while (fn) { 860 while(fn) {
983 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { 861 if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
984 struct rt6key *key; 862 struct rt6key *key;
985 863
@@ -1026,7 +904,8 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da
1026 }; 904 };
1027 905
1028 fn = fib6_lookup_1(root, daddr ? args : args + 1); 906 fn = fib6_lookup_1(root, daddr ? args : args + 1);
1029 if (!fn || fn->fn_flags & RTN_TL_ROOT) 907
908 if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
1030 fn = root; 909 fn = root;
1031 910
1032 return fn; 911 return fn;
@@ -1086,7 +965,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
1086 } 965 }
1087#endif 966#endif
1088 967
1089 if (fn && fn->fn_flags & RTN_RTINFO) 968 if (fn && fn->fn_flags&RTN_RTINFO)
1090 return fn; 969 return fn;
1091 970
1092 return NULL; 971 return NULL;
@@ -1100,13 +979,14 @@ struct fib6_node * fib6_locate(struct fib6_node *root,
1100 979
1101static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) 980static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
1102{ 981{
1103 if (fn->fn_flags & RTN_ROOT) 982 if (fn->fn_flags&RTN_ROOT)
1104 return net->ipv6.ip6_null_entry; 983 return net->ipv6.ip6_null_entry;
1105 984
1106 while (fn) { 985 while(fn) {
1107 if (fn->left) 986 if(fn->left)
1108 return fn->left->leaf; 987 return fn->left->leaf;
1109 if (fn->right) 988
989 if(fn->right)
1110 return fn->right->leaf; 990 return fn->right->leaf;
1111 991
1112 fn = FIB6_SUBTREE(fn); 992 fn = FIB6_SUBTREE(fn);
@@ -1144,12 +1024,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1144 if (children == 3 || FIB6_SUBTREE(fn) 1024 if (children == 3 || FIB6_SUBTREE(fn)
1145#ifdef CONFIG_IPV6_SUBTREES 1025#ifdef CONFIG_IPV6_SUBTREES
1146 /* Subtree root (i.e. fn) may have one child */ 1026 /* Subtree root (i.e. fn) may have one child */
1147 || (children && fn->fn_flags & RTN_ROOT) 1027 || (children && fn->fn_flags&RTN_ROOT)
1148#endif 1028#endif
1149 ) { 1029 ) {
1150 fn->leaf = fib6_find_prefix(net, fn); 1030 fn->leaf = fib6_find_prefix(net, fn);
1151#if RT6_DEBUG >= 2 1031#if RT6_DEBUG >= 2
1152 if (!fn->leaf) { 1032 if (fn->leaf==NULL) {
1153 WARN_ON(!fn->leaf); 1033 WARN_ON(!fn->leaf);
1154 fn->leaf = net->ipv6.ip6_null_entry; 1034 fn->leaf = net->ipv6.ip6_null_entry;
1155 } 1035 }
@@ -1182,7 +1062,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1182 1062
1183 read_lock(&fib6_walker_lock); 1063 read_lock(&fib6_walker_lock);
1184 FOR_WALKERS(w) { 1064 FOR_WALKERS(w) {
1185 if (!child) { 1065 if (child == NULL) {
1186 if (w->root == fn) { 1066 if (w->root == fn) {
1187 w->root = w->node = NULL; 1067 w->root = w->node = NULL;
1188 RT6_TRACE("W %p adjusted by delroot 1\n", w); 1068 RT6_TRACE("W %p adjusted by delroot 1\n", w);
@@ -1211,7 +1091,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1211 read_unlock(&fib6_walker_lock); 1091 read_unlock(&fib6_walker_lock);
1212 1092
1213 node_free(fn); 1093 node_free(fn);
1214 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1094 if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
1215 return pn; 1095 return pn;
1216 1096
1217 rt6_release(pn->leaf); 1097 rt6_release(pn->leaf);
@@ -1239,24 +1119,13 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1239 if (fn->rr_ptr == rt) 1119 if (fn->rr_ptr == rt)
1240 fn->rr_ptr = NULL; 1120 fn->rr_ptr = NULL;
1241 1121
1242 /* Remove this entry from other siblings */
1243 if (rt->rt6i_nsiblings) {
1244 struct rt6_info *sibling, *next_sibling;
1245
1246 list_for_each_entry_safe(sibling, next_sibling,
1247 &rt->rt6i_siblings, rt6i_siblings)
1248 sibling->rt6i_nsiblings--;
1249 rt->rt6i_nsiblings = 0;
1250 list_del_init(&rt->rt6i_siblings);
1251 }
1252
1253 /* Adjust walkers */ 1122 /* Adjust walkers */
1254 read_lock(&fib6_walker_lock); 1123 read_lock(&fib6_walker_lock);
1255 FOR_WALKERS(w) { 1124 FOR_WALKERS(w) {
1256 if (w->state == FWS_C && w->leaf == rt) { 1125 if (w->state == FWS_C && w->leaf == rt) {
1257 RT6_TRACE("walker %p adjusted by delroute\n", w); 1126 RT6_TRACE("walker %p adjusted by delroute\n", w);
1258 w->leaf = rt->dst.rt6_next; 1127 w->leaf = rt->dst.rt6_next;
1259 if (!w->leaf) 1128 if (w->leaf == NULL)
1260 w->state = FWS_U; 1129 w->state = FWS_U;
1261 } 1130 }
1262 } 1131 }
@@ -1265,7 +1134,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1265 rt->dst.rt6_next = NULL; 1134 rt->dst.rt6_next = NULL;
1266 1135
1267 /* If it was last route, expunge its radix tree node */ 1136 /* If it was last route, expunge its radix tree node */
1268 if (!fn->leaf) { 1137 if (fn->leaf == NULL) {
1269 fn->fn_flags &= ~RTN_RTINFO; 1138 fn->fn_flags &= ~RTN_RTINFO;
1270 net->ipv6.rt6_stats->fib_route_nodes--; 1139 net->ipv6.rt6_stats->fib_route_nodes--;
1271 fn = fib6_repair_tree(net, fn); 1140 fn = fib6_repair_tree(net, fn);
@@ -1279,7 +1148,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1279 * to still alive ones. 1148 * to still alive ones.
1280 */ 1149 */
1281 while (fn) { 1150 while (fn) {
1282 if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { 1151 if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
1283 fn->leaf = fib6_find_prefix(net, fn); 1152 fn->leaf = fib6_find_prefix(net, fn);
1284 atomic_inc(&fn->leaf->rt6i_ref); 1153 atomic_inc(&fn->leaf->rt6i_ref);
1285 rt6_release(rt); 1154 rt6_release(rt);
@@ -1306,17 +1175,17 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1306 return -ENOENT; 1175 return -ENOENT;
1307 } 1176 }
1308#endif 1177#endif
1309 if (!fn || rt == net->ipv6.ip6_null_entry) 1178 if (fn == NULL || rt == net->ipv6.ip6_null_entry)
1310 return -ENOENT; 1179 return -ENOENT;
1311 1180
1312 WARN_ON(!(fn->fn_flags & RTN_RTINFO)); 1181 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1313 1182
1314 if (!(rt->rt6i_flags & RTF_CACHE)) { 1183 if (!(rt->rt6i_flags&RTF_CACHE)) {
1315 struct fib6_node *pn = fn; 1184 struct fib6_node *pn = fn;
1316#ifdef CONFIG_IPV6_SUBTREES 1185#ifdef CONFIG_IPV6_SUBTREES
1317 /* clones of this route might be in another subtree */ 1186 /* clones of this route might be in another subtree */
1318 if (rt->rt6i_src.plen) { 1187 if (rt->rt6i_src.plen) {
1319 while (!(pn->fn_flags & RTN_ROOT)) 1188 while (!(pn->fn_flags&RTN_ROOT))
1320 pn = pn->parent; 1189 pn = pn->parent;
1321 pn = pn->parent; 1190 pn = pn->parent;
1322 } 1191 }
@@ -1367,11 +1236,11 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
1367 1236
1368 for (;;) { 1237 for (;;) {
1369 fn = w->node; 1238 fn = w->node;
1370 if (!fn) 1239 if (fn == NULL)
1371 return 0; 1240 return 0;
1372 1241
1373 if (w->prune && fn != w->root && 1242 if (w->prune && fn != w->root &&
1374 fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { 1243 fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
1375 w->state = FWS_C; 1244 w->state = FWS_C;
1376 w->leaf = fn->leaf; 1245 w->leaf = fn->leaf;
1377 } 1246 }
@@ -1400,11 +1269,11 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
1400 w->state = FWS_C; 1269 w->state = FWS_C;
1401 w->leaf = fn->leaf; 1270 w->leaf = fn->leaf;
1402 case FWS_C: 1271 case FWS_C:
1403 if (w->leaf && fn->fn_flags & RTN_RTINFO) { 1272 if (w->leaf && fn->fn_flags&RTN_RTINFO) {
1404 int err; 1273 int err;
1405 1274
1406 if (w->skip) { 1275 if (w->count < w->skip) {
1407 w->skip--; 1276 w->count++;
1408 continue; 1277 continue;
1409 } 1278 }
1410 1279
@@ -1474,8 +1343,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1474 res = fib6_del(rt, &info); 1343 res = fib6_del(rt, &info);
1475 if (res) { 1344 if (res) {
1476#if RT6_DEBUG >= 2 1345#if RT6_DEBUG >= 2
1477 pr_debug("%s: del failed: rt=%p@%p err=%d\n", 1346 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
1478 __func__, rt, rt->rt6i_node, res);
1479#endif 1347#endif
1480 continue; 1348 continue;
1481 } 1349 }
@@ -1516,26 +1384,6 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
1516 fib6_walk(&c.w); 1384 fib6_walk(&c.w);
1517} 1385}
1518 1386
1519void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg),
1520 int prune, void *arg)
1521{
1522 struct fib6_table *table;
1523 struct hlist_node *node;
1524 struct hlist_head *head;
1525 unsigned int h;
1526
1527 rcu_read_lock();
1528 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
1529 head = &net->ipv6.fib_table_hash[h];
1530 hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
1531 read_lock_bh(&table->tb6_lock);
1532 fib6_clean_tree(net, &table->tb6_root,
1533 func, prune, arg);
1534 read_unlock_bh(&table->tb6_lock);
1535 }
1536 }
1537 rcu_read_unlock();
1538}
1539void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), 1387void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
1540 int prune, void *arg) 1388 int prune, void *arg)
1541{ 1389{
@@ -1595,8 +1443,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1595 * only if they are not in use now. 1443 * only if they are not in use now.
1596 */ 1444 */
1597 1445
1598 if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { 1446 if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
1599 if (time_after(now, rt->dst.expires)) { 1447 if (time_after(now, rt->rt6i_expires)) {
1600 RT6_TRACE("expiring %p\n", rt); 1448 RT6_TRACE("expiring %p\n", rt);
1601 return -1; 1449 return -1;
1602 } 1450 }
@@ -1606,20 +1454,11 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1606 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { 1454 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
1607 RT6_TRACE("aging clone %p\n", rt); 1455 RT6_TRACE("aging clone %p\n", rt);
1608 return -1; 1456 return -1;
1609 } else if (rt->rt6i_flags & RTF_GATEWAY) { 1457 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1610 struct neighbour *neigh; 1458 (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) {
1611 __u8 neigh_flags = 0; 1459 RT6_TRACE("purging route %p via non-router but gateway\n",
1612 1460 rt);
1613 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); 1461 return -1;
1614 if (neigh) {
1615 neigh_flags = neigh->flags;
1616 neigh_release(neigh);
1617 }
1618 if (!(neigh_flags & NTF_ROUTER)) {
1619 RT6_TRACE("purging route %p via non-router but gateway\n",
1620 rt);
1621 return -1;
1622 }
1623 } 1462 }
1624 gc_args.more++; 1463 gc_args.more++;
1625 } 1464 }
@@ -1687,7 +1526,6 @@ static int __net_init fib6_net_init(struct net *net)
1687 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 1526 net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
1688 net->ipv6.fib6_main_tbl->tb6_root.fn_flags = 1527 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
1689 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 1528 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
1690 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
1691 1529
1692#ifdef CONFIG_IPV6_MULTIPLE_TABLES 1530#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1693 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), 1531 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -1698,7 +1536,6 @@ static int __net_init fib6_net_init(struct net *net)
1698 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; 1536 net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
1699 net->ipv6.fib6_local_tbl->tb6_root.fn_flags = 1537 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
1700 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; 1538 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
1701 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
1702#endif 1539#endif
1703 fib6_tables_init(net); 1540 fib6_tables_init(net);
1704 1541
@@ -1722,10 +1559,8 @@ static void fib6_net_exit(struct net *net)
1722 del_timer_sync(&net->ipv6.ip6_fib_timer); 1559 del_timer_sync(&net->ipv6.ip6_fib_timer);
1723 1560
1724#ifdef CONFIG_IPV6_MULTIPLE_TABLES 1561#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1725 inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
1726 kfree(net->ipv6.fib6_local_tbl); 1562 kfree(net->ipv6.fib6_local_tbl);
1727#endif 1563#endif
1728 inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
1729 kfree(net->ipv6.fib6_main_tbl); 1564 kfree(net->ipv6.fib6_main_tbl);
1730 kfree(net->ipv6.fib_table_hash); 1565 kfree(net->ipv6.fib_table_hash);
1731 kfree(net->ipv6.rt6_stats); 1566 kfree(net->ipv6.rt6_stats);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 29124b7a04c..54303945019 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -21,8 +21,6 @@
21#include <linux/proc_fs.h> 21#include <linux/proc_fs.h>
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/export.h>
25#include <linux/pid_namespace.h>
26 24
27#include <net/net_namespace.h> 25#include <net/net_namespace.h>
28#include <net/sock.h> 26#include <net/sock.h>
@@ -92,8 +90,6 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
92static void fl_free(struct ip6_flowlabel *fl) 90static void fl_free(struct ip6_flowlabel *fl)
93{ 91{
94 if (fl) { 92 if (fl) {
95 if (fl->share == IPV6_FL_S_PROCESS)
96 put_pid(fl->owner.pid);
97 release_net(fl->fl_net); 93 release_net(fl->fl_net);
98 kfree(fl->opt); 94 kfree(fl->opt);
99 } 95 }
@@ -297,7 +293,6 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
297 opt_space->opt_flen = fopt->opt_flen; 293 opt_space->opt_flen = fopt->opt_flen;
298 return opt_space; 294 return opt_space;
299} 295}
300EXPORT_SYMBOL_GPL(fl6_merge_options);
301 296
302static unsigned long check_linger(unsigned long ttl) 297static unsigned long check_linger(unsigned long ttl)
303{ 298{
@@ -390,17 +385,17 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
390 err = -EINVAL; 385 err = -EINVAL;
391 goto done; 386 goto done;
392 } 387 }
393 fl->dst = freq->flr_dst; 388 ipv6_addr_copy(&fl->dst, &freq->flr_dst);
394 atomic_set(&fl->users, 1); 389 atomic_set(&fl->users, 1);
395 switch (fl->share) { 390 switch (fl->share) {
396 case IPV6_FL_S_EXCL: 391 case IPV6_FL_S_EXCL:
397 case IPV6_FL_S_ANY: 392 case IPV6_FL_S_ANY:
398 break; 393 break;
399 case IPV6_FL_S_PROCESS: 394 case IPV6_FL_S_PROCESS:
400 fl->owner.pid = get_task_pid(current, PIDTYPE_PID); 395 fl->owner = current->pid;
401 break; 396 break;
402 case IPV6_FL_S_USER: 397 case IPV6_FL_S_USER:
403 fl->owner.uid = current_euid(); 398 fl->owner = current_euid();
404 break; 399 break;
405 default: 400 default:
406 err = -EINVAL; 401 err = -EINVAL;
@@ -436,32 +431,32 @@ static int mem_check(struct sock *sk)
436 return 0; 431 return 0;
437} 432}
438 433
439static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2) 434static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
440{ 435{
441 if (h1 == h2) 436 if (h1 == h2)
442 return false; 437 return 0;
443 if (h1 == NULL || h2 == NULL) 438 if (h1 == NULL || h2 == NULL)
444 return true; 439 return 1;
445 if (h1->hdrlen != h2->hdrlen) 440 if (h1->hdrlen != h2->hdrlen)
446 return true; 441 return 1;
447 return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1)); 442 return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
448} 443}
449 444
450static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) 445static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
451{ 446{
452 if (o1 == o2) 447 if (o1 == o2)
453 return false; 448 return 0;
454 if (o1 == NULL || o2 == NULL) 449 if (o1 == NULL || o2 == NULL)
455 return true; 450 return 1;
456 if (o1->opt_nflen != o2->opt_nflen) 451 if (o1->opt_nflen != o2->opt_nflen)
457 return true; 452 return 1;
458 if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt)) 453 if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
459 return true; 454 return 1;
460 if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt)) 455 if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
461 return true; 456 return 1;
462 if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt)) 457 if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
463 return true; 458 return 1;
464 return false; 459 return 0;
465} 460}
466 461
467static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, 462static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
@@ -519,8 +514,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
519 } 514 }
520 read_unlock_bh(&ip6_sk_fl_lock); 515 read_unlock_bh(&ip6_sk_fl_lock);
521 516
522 if (freq.flr_share == IPV6_FL_S_NONE && 517 if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
523 ns_capable(net->user_ns, CAP_NET_ADMIN)) {
524 fl = fl_lookup(net, freq.flr_label); 518 fl = fl_lookup(net, freq.flr_label);
525 if (fl) { 519 if (fl) {
526 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); 520 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
@@ -565,10 +559,7 @@ recheck:
565 err = -EPERM; 559 err = -EPERM;
566 if (fl1->share == IPV6_FL_S_EXCL || 560 if (fl1->share == IPV6_FL_S_EXCL ||
567 fl1->share != fl->share || 561 fl1->share != fl->share ||
568 ((fl1->share == IPV6_FL_S_PROCESS) && 562 fl1->owner != fl->owner)
569 (fl1->owner.pid == fl->owner.pid)) ||
570 ((fl1->share == IPV6_FL_S_USER) &&
571 uid_eq(fl1->owner.uid, fl->owner.uid)))
572 goto release; 563 goto release;
573 564
574 err = -EINVAL; 565 err = -EINVAL;
@@ -628,7 +619,6 @@ done:
628 619
629struct ip6fl_iter_state { 620struct ip6fl_iter_state {
630 struct seq_net_private p; 621 struct seq_net_private p;
631 struct pid_namespace *pid_ns;
632 int bucket; 622 int bucket;
633}; 623};
634 624
@@ -707,7 +697,6 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
707 697
708static int ip6fl_seq_show(struct seq_file *seq, void *v) 698static int ip6fl_seq_show(struct seq_file *seq, void *v)
709{ 699{
710 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
711 if (v == SEQ_START_TOKEN) 700 if (v == SEQ_START_TOKEN)
712 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", 701 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
713 "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); 702 "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
@@ -715,13 +704,9 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
715 struct ip6_flowlabel *fl = v; 704 struct ip6_flowlabel *fl = v;
716 seq_printf(seq, 705 seq_printf(seq,
717 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", 706 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
718 (unsigned int)ntohl(fl->label), 707 (unsigned)ntohl(fl->label),
719 fl->share, 708 fl->share,
720 ((fl->share == IPV6_FL_S_PROCESS) ? 709 (unsigned)fl->owner,
721 pid_nr_ns(fl->owner.pid, state->pid_ns) :
722 ((fl->share == IPV6_FL_S_USER) ?
723 from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
724 0)),
725 atomic_read(&fl->users), 710 atomic_read(&fl->users),
726 fl->linger/HZ, 711 fl->linger/HZ,
727 (long)(fl->expires - jiffies)/HZ, 712 (long)(fl->expires - jiffies)/HZ,
@@ -740,29 +725,8 @@ static const struct seq_operations ip6fl_seq_ops = {
740 725
741static int ip6fl_seq_open(struct inode *inode, struct file *file) 726static int ip6fl_seq_open(struct inode *inode, struct file *file)
742{ 727{
743 struct seq_file *seq; 728 return seq_open_net(inode, file, &ip6fl_seq_ops,
744 struct ip6fl_iter_state *state; 729 sizeof(struct ip6fl_iter_state));
745 int err;
746
747 err = seq_open_net(inode, file, &ip6fl_seq_ops,
748 sizeof(struct ip6fl_iter_state));
749
750 if (!err) {
751 seq = file->private_data;
752 state = ip6fl_seq_private(seq);
753 rcu_read_lock();
754 state->pid_ns = get_pid_ns(task_active_pid_ns(current));
755 rcu_read_unlock();
756 }
757 return err;
758}
759
760static int ip6fl_seq_release(struct inode *inode, struct file *file)
761{
762 struct seq_file *seq = file->private_data;
763 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
764 put_pid_ns(state->pid_ns);
765 return seq_release_net(inode, file);
766} 730}
767 731
768static const struct file_operations ip6fl_seq_fops = { 732static const struct file_operations ip6fl_seq_fops = {
@@ -770,7 +734,7 @@ static const struct file_operations ip6fl_seq_fops = {
770 .open = ip6fl_seq_open, 734 .open = ip6fl_seq_open,
771 .read = seq_read, 735 .read = seq_read,
772 .llseek = seq_lseek, 736 .llseek = seq_lseek,
773 .release = ip6fl_seq_release, 737 .release = seq_release_net,
774}; 738};
775 739
776static int __net_init ip6_flowlabel_proc_init(struct net *net) 740static int __net_init ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
deleted file mode 100644
index c727e471275..00000000000
--- a/net/ipv6/ip6_gre.c
+++ /dev/null
@@ -1,1754 +0,0 @@
1/*
2 * GRE over IPv6 protocol decoder.
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <linux/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35#include <linux/hash.h>
36#include <linux/if_tunnel.h>
37#include <linux/ip6_tunnel.h>
38
39#include <net/sock.h>
40#include <net/ip.h>
41#include <net/icmp.h>
42#include <net/protocol.h>
43#include <net/addrconf.h>
44#include <net/arp.h>
45#include <net/checksum.h>
46#include <net/dsfield.h>
47#include <net/inet_ecn.h>
48#include <net/xfrm.h>
49#include <net/net_namespace.h>
50#include <net/netns/generic.h>
51#include <net/rtnetlink.h>
52
53#include <net/ipv6.h>
54#include <net/ip6_fib.h>
55#include <net/ip6_route.h>
56#include <net/ip6_tunnel.h>
57
58
59static bool log_ecn_error = true;
60module_param(log_ecn_error, bool, 0644);
61MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
62
63#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
64#define IPV6_TCLASS_SHIFT 20
65
66#define HASH_SIZE_SHIFT 5
67#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
68
69static int ip6gre_net_id __read_mostly;
70struct ip6gre_net {
71 struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
72
73 struct net_device *fb_tunnel_dev;
74};
75
76static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
77static int ip6gre_tunnel_init(struct net_device *dev);
78static void ip6gre_tunnel_setup(struct net_device *dev);
79static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
80static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
81
82/* Tunnel hash table */
83
84/*
85 4 hash tables:
86
87 3: (remote,local)
88 2: (remote,*)
89 1: (*,local)
90 0: (*,*)
91
92 We require exact key match i.e. if a key is present in packet
93 it will match only tunnel with the same key; if it is not present,
94 it will match only keyless tunnel.
95
96 All keysless packets, if not matched configured keyless tunnels
97 will match fallback tunnel.
98 */
99
100#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
101static u32 HASH_ADDR(const struct in6_addr *addr)
102{
103 u32 hash = ipv6_addr_hash(addr);
104
105 return hash_32(hash, HASH_SIZE_SHIFT);
106}
107
108#define tunnels_r_l tunnels[3]
109#define tunnels_r tunnels[2]
110#define tunnels_l tunnels[1]
111#define tunnels_wc tunnels[0]
112
113static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
114 struct rtnl_link_stats64 *tot)
115{
116 int i;
117
118 for_each_possible_cpu(i) {
119 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
120 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
121 unsigned int start;
122
123 do {
124 start = u64_stats_fetch_begin_bh(&tstats->syncp);
125 rx_packets = tstats->rx_packets;
126 tx_packets = tstats->tx_packets;
127 rx_bytes = tstats->rx_bytes;
128 tx_bytes = tstats->tx_bytes;
129 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
130
131 tot->rx_packets += rx_packets;
132 tot->tx_packets += tx_packets;
133 tot->rx_bytes += rx_bytes;
134 tot->tx_bytes += tx_bytes;
135 }
136
137 tot->multicast = dev->stats.multicast;
138 tot->rx_crc_errors = dev->stats.rx_crc_errors;
139 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
140 tot->rx_length_errors = dev->stats.rx_length_errors;
141 tot->rx_frame_errors = dev->stats.rx_frame_errors;
142 tot->rx_errors = dev->stats.rx_errors;
143
144 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
145 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
146 tot->tx_dropped = dev->stats.tx_dropped;
147 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
148 tot->tx_errors = dev->stats.tx_errors;
149
150 return tot;
151}
152
153/* Given src, dst and key, find appropriate for input tunnel. */
154
155static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
156 const struct in6_addr *remote, const struct in6_addr *local,
157 __be32 key, __be16 gre_proto)
158{
159 struct net *net = dev_net(dev);
160 int link = dev->ifindex;
161 unsigned int h0 = HASH_ADDR(remote);
162 unsigned int h1 = HASH_KEY(key);
163 struct ip6_tnl *t, *cand = NULL;
164 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
165 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
166 ARPHRD_ETHER : ARPHRD_IP6GRE;
167 int score, cand_score = 4;
168
169 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
170 if (!ipv6_addr_equal(local, &t->parms.laddr) ||
171 !ipv6_addr_equal(remote, &t->parms.raddr) ||
172 key != t->parms.i_key ||
173 !(t->dev->flags & IFF_UP))
174 continue;
175
176 if (t->dev->type != ARPHRD_IP6GRE &&
177 t->dev->type != dev_type)
178 continue;
179
180 score = 0;
181 if (t->parms.link != link)
182 score |= 1;
183 if (t->dev->type != dev_type)
184 score |= 2;
185 if (score == 0)
186 return t;
187
188 if (score < cand_score) {
189 cand = t;
190 cand_score = score;
191 }
192 }
193
194 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
195 if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
196 key != t->parms.i_key ||
197 !(t->dev->flags & IFF_UP))
198 continue;
199
200 if (t->dev->type != ARPHRD_IP6GRE &&
201 t->dev->type != dev_type)
202 continue;
203
204 score = 0;
205 if (t->parms.link != link)
206 score |= 1;
207 if (t->dev->type != dev_type)
208 score |= 2;
209 if (score == 0)
210 return t;
211
212 if (score < cand_score) {
213 cand = t;
214 cand_score = score;
215 }
216 }
217
218 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
219 if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
220 (!ipv6_addr_equal(local, &t->parms.raddr) ||
221 !ipv6_addr_is_multicast(local))) ||
222 key != t->parms.i_key ||
223 !(t->dev->flags & IFF_UP))
224 continue;
225
226 if (t->dev->type != ARPHRD_IP6GRE &&
227 t->dev->type != dev_type)
228 continue;
229
230 score = 0;
231 if (t->parms.link != link)
232 score |= 1;
233 if (t->dev->type != dev_type)
234 score |= 2;
235 if (score == 0)
236 return t;
237
238 if (score < cand_score) {
239 cand = t;
240 cand_score = score;
241 }
242 }
243
244 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
245 if (t->parms.i_key != key ||
246 !(t->dev->flags & IFF_UP))
247 continue;
248
249 if (t->dev->type != ARPHRD_IP6GRE &&
250 t->dev->type != dev_type)
251 continue;
252
253 score = 0;
254 if (t->parms.link != link)
255 score |= 1;
256 if (t->dev->type != dev_type)
257 score |= 2;
258 if (score == 0)
259 return t;
260
261 if (score < cand_score) {
262 cand = t;
263 cand_score = score;
264 }
265 }
266
267 if (cand != NULL)
268 return cand;
269
270 dev = ign->fb_tunnel_dev;
271 if (dev->flags & IFF_UP)
272 return netdev_priv(dev);
273
274 return NULL;
275}
276
277static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
278 const struct __ip6_tnl_parm *p)
279{
280 const struct in6_addr *remote = &p->raddr;
281 const struct in6_addr *local = &p->laddr;
282 unsigned int h = HASH_KEY(p->i_key);
283 int prio = 0;
284
285 if (!ipv6_addr_any(local))
286 prio |= 1;
287 if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
288 prio |= 2;
289 h ^= HASH_ADDR(remote);
290 }
291
292 return &ign->tunnels[prio][h];
293}
294
295static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
296 const struct ip6_tnl *t)
297{
298 return __ip6gre_bucket(ign, &t->parms);
299}
300
301static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
302{
303 struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
304
305 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
306 rcu_assign_pointer(*tp, t);
307}
308
309static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
310{
311 struct ip6_tnl __rcu **tp;
312 struct ip6_tnl *iter;
313
314 for (tp = ip6gre_bucket(ign, t);
315 (iter = rtnl_dereference(*tp)) != NULL;
316 tp = &iter->next) {
317 if (t == iter) {
318 rcu_assign_pointer(*tp, t->next);
319 break;
320 }
321 }
322}
323
324static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
325 const struct __ip6_tnl_parm *parms,
326 int type)
327{
328 const struct in6_addr *remote = &parms->raddr;
329 const struct in6_addr *local = &parms->laddr;
330 __be32 key = parms->i_key;
331 int link = parms->link;
332 struct ip6_tnl *t;
333 struct ip6_tnl __rcu **tp;
334 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
335
336 for (tp = __ip6gre_bucket(ign, parms);
337 (t = rtnl_dereference(*tp)) != NULL;
338 tp = &t->next)
339 if (ipv6_addr_equal(local, &t->parms.laddr) &&
340 ipv6_addr_equal(remote, &t->parms.raddr) &&
341 key == t->parms.i_key &&
342 link == t->parms.link &&
343 type == t->dev->type)
344 break;
345
346 return t;
347}
348
349static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
350 const struct __ip6_tnl_parm *parms, int create)
351{
352 struct ip6_tnl *t, *nt;
353 struct net_device *dev;
354 char name[IFNAMSIZ];
355 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
356
357 t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
358 if (t || !create)
359 return t;
360
361 if (parms->name[0])
362 strlcpy(name, parms->name, IFNAMSIZ);
363 else
364 strcpy(name, "ip6gre%d");
365
366 dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
367 if (!dev)
368 return NULL;
369
370 dev_net_set(dev, net);
371
372 nt = netdev_priv(dev);
373 nt->parms = *parms;
374 dev->rtnl_link_ops = &ip6gre_link_ops;
375
376 nt->dev = dev;
377 ip6gre_tnl_link_config(nt, 1);
378
379 if (register_netdevice(dev) < 0)
380 goto failed_free;
381
382 /* Can use a lockless transmit, unless we generate output sequences */
383 if (!(nt->parms.o_flags & GRE_SEQ))
384 dev->features |= NETIF_F_LLTX;
385
386 dev_hold(dev);
387 ip6gre_tunnel_link(ign, nt);
388 return nt;
389
390failed_free:
391 free_netdev(dev);
392 return NULL;
393}
394
395static void ip6gre_tunnel_uninit(struct net_device *dev)
396{
397 struct net *net = dev_net(dev);
398 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
399
400 ip6gre_tunnel_unlink(ign, netdev_priv(dev));
401 dev_put(dev);
402}
403
404
405static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
406 u8 type, u8 code, int offset, __be32 info)
407{
408 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
409 __be16 *p = (__be16 *)(skb->data + offset);
410 int grehlen = offset + 4;
411 struct ip6_tnl *t;
412 __be16 flags;
413
414 flags = p[0];
415 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
416 if (flags&(GRE_VERSION|GRE_ROUTING))
417 return;
418 if (flags&GRE_KEY) {
419 grehlen += 4;
420 if (flags&GRE_CSUM)
421 grehlen += 4;
422 }
423 }
424
425 /* If only 8 bytes returned, keyed message will be dropped here */
426 if (!pskb_may_pull(skb, grehlen))
427 return;
428 ipv6h = (const struct ipv6hdr *)skb->data;
429 p = (__be16 *)(skb->data + offset);
430
431 t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
432 flags & GRE_KEY ?
433 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
434 p[1]);
435 if (t == NULL)
436 return;
437
438 switch (type) {
439 __u32 teli;
440 struct ipv6_tlv_tnl_enc_lim *tel;
441 __u32 mtu;
442 case ICMPV6_DEST_UNREACH:
443 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
444 t->parms.name);
445 break;
446 case ICMPV6_TIME_EXCEED:
447 if (code == ICMPV6_EXC_HOPLIMIT) {
448 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
449 t->parms.name);
450 }
451 break;
452 case ICMPV6_PARAMPROB:
453 teli = 0;
454 if (code == ICMPV6_HDR_FIELD)
455 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
456
457 if (teli && teli == info - 2) {
458 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
459 if (tel->encap_limit == 0) {
460 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
461 t->parms.name);
462 }
463 } else {
464 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
465 t->parms.name);
466 }
467 break;
468 case ICMPV6_PKT_TOOBIG:
469 mtu = info - offset;
470 if (mtu < IPV6_MIN_MTU)
471 mtu = IPV6_MIN_MTU;
472 t->dev->mtu = mtu;
473 break;
474 }
475
476 if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
477 t->err_count++;
478 else
479 t->err_count = 1;
480 t->err_time = jiffies;
481}
482
483static int ip6gre_rcv(struct sk_buff *skb)
484{
485 const struct ipv6hdr *ipv6h;
486 u8 *h;
487 __be16 flags;
488 __sum16 csum = 0;
489 __be32 key = 0;
490 u32 seqno = 0;
491 struct ip6_tnl *tunnel;
492 int offset = 4;
493 __be16 gre_proto;
494 int err;
495
496 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
497 goto drop;
498
499 ipv6h = ipv6_hdr(skb);
500 h = skb->data;
501 flags = *(__be16 *)h;
502
503 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
504 /* - Version must be 0.
505 - We do not support routing headers.
506 */
507 if (flags&(GRE_VERSION|GRE_ROUTING))
508 goto drop;
509
510 if (flags&GRE_CSUM) {
511 switch (skb->ip_summed) {
512 case CHECKSUM_COMPLETE:
513 csum = csum_fold(skb->csum);
514 if (!csum)
515 break;
516 /* fall through */
517 case CHECKSUM_NONE:
518 skb->csum = 0;
519 csum = __skb_checksum_complete(skb);
520 skb->ip_summed = CHECKSUM_COMPLETE;
521 }
522 offset += 4;
523 }
524 if (flags&GRE_KEY) {
525 key = *(__be32 *)(h + offset);
526 offset += 4;
527 }
528 if (flags&GRE_SEQ) {
529 seqno = ntohl(*(__be32 *)(h + offset));
530 offset += 4;
531 }
532 }
533
534 gre_proto = *(__be16 *)(h + 2);
535
536 tunnel = ip6gre_tunnel_lookup(skb->dev,
537 &ipv6h->saddr, &ipv6h->daddr, key,
538 gre_proto);
539 if (tunnel) {
540 struct pcpu_tstats *tstats;
541
542 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
543 goto drop;
544
545 if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
546 tunnel->dev->stats.rx_dropped++;
547 goto drop;
548 }
549
550 secpath_reset(skb);
551
552 skb->protocol = gre_proto;
553 /* WCCP version 1 and 2 protocol decoding.
554 * - Change protocol to IP
555 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
556 */
557 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
558 skb->protocol = htons(ETH_P_IP);
559 if ((*(h + offset) & 0xF0) != 0x40)
560 offset += 4;
561 }
562
563 skb->mac_header = skb->network_header;
564 __pskb_pull(skb, offset);
565 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
566 skb->pkt_type = PACKET_HOST;
567
568 if (((flags&GRE_CSUM) && csum) ||
569 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
570 tunnel->dev->stats.rx_crc_errors++;
571 tunnel->dev->stats.rx_errors++;
572 goto drop;
573 }
574 if (tunnel->parms.i_flags&GRE_SEQ) {
575 if (!(flags&GRE_SEQ) ||
576 (tunnel->i_seqno &&
577 (s32)(seqno - tunnel->i_seqno) < 0)) {
578 tunnel->dev->stats.rx_fifo_errors++;
579 tunnel->dev->stats.rx_errors++;
580 goto drop;
581 }
582 tunnel->i_seqno = seqno + 1;
583 }
584
585 /* Warning: All skb pointers will be invalidated! */
586 if (tunnel->dev->type == ARPHRD_ETHER) {
587 if (!pskb_may_pull(skb, ETH_HLEN)) {
588 tunnel->dev->stats.rx_length_errors++;
589 tunnel->dev->stats.rx_errors++;
590 goto drop;
591 }
592
593 ipv6h = ipv6_hdr(skb);
594 skb->protocol = eth_type_trans(skb, tunnel->dev);
595 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
596 }
597
598 __skb_tunnel_rx(skb, tunnel->dev);
599
600 skb_reset_network_header(skb);
601
602 err = IP6_ECN_decapsulate(ipv6h, skb);
603 if (unlikely(err)) {
604 if (log_ecn_error)
605 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
606 &ipv6h->saddr,
607 ipv6_get_dsfield(ipv6h));
608 if (err > 1) {
609 ++tunnel->dev->stats.rx_frame_errors;
610 ++tunnel->dev->stats.rx_errors;
611 goto drop;
612 }
613 }
614
615 tstats = this_cpu_ptr(tunnel->dev->tstats);
616 u64_stats_update_begin(&tstats->syncp);
617 tstats->rx_packets++;
618 tstats->rx_bytes += skb->len;
619 u64_stats_update_end(&tstats->syncp);
620
621 netif_rx(skb);
622
623 return 0;
624 }
625 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
626
627drop:
628 kfree_skb(skb);
629 return 0;
630}
631
632struct ipv6_tel_txoption {
633 struct ipv6_txoptions ops;
634 __u8 dst_opt[8];
635};
636
637static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
638{
639 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
640
641 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
642 opt->dst_opt[3] = 1;
643 opt->dst_opt[4] = encap_limit;
644 opt->dst_opt[5] = IPV6_TLV_PADN;
645 opt->dst_opt[6] = 1;
646
647 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
648 opt->ops.opt_nflen = 8;
649}
650
651static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
652 struct net_device *dev,
653 __u8 dsfield,
654 struct flowi6 *fl6,
655 int encap_limit,
656 __u32 *pmtu)
657{
658 struct net *net = dev_net(dev);
659 struct ip6_tnl *tunnel = netdev_priv(dev);
660 struct net_device *tdev; /* Device to other host */
661 struct ipv6hdr *ipv6h; /* Our new IP header */
662 unsigned int max_headroom; /* The extra header space needed */
663 int gre_hlen;
664 struct ipv6_tel_txoption opt;
665 int mtu;
666 struct dst_entry *dst = NULL, *ndst = NULL;
667 struct net_device_stats *stats = &tunnel->dev->stats;
668 int err = -1;
669 u8 proto;
670 int pkt_len;
671 struct sk_buff *new_skb;
672
673 if (dev->type == ARPHRD_ETHER)
674 IPCB(skb)->flags = 0;
675
676 if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
677 gre_hlen = 0;
678 ipv6h = (struct ipv6hdr *)skb->data;
679 fl6->daddr = ipv6h->daddr;
680 } else {
681 gre_hlen = tunnel->hlen;
682 fl6->daddr = tunnel->parms.raddr;
683 }
684
685 if (!fl6->flowi6_mark)
686 dst = ip6_tnl_dst_check(tunnel);
687
688 if (!dst) {
689 ndst = ip6_route_output(net, NULL, fl6);
690
691 if (ndst->error)
692 goto tx_err_link_failure;
693 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
694 if (IS_ERR(ndst)) {
695 err = PTR_ERR(ndst);
696 ndst = NULL;
697 goto tx_err_link_failure;
698 }
699 dst = ndst;
700 }
701
702 tdev = dst->dev;
703
704 if (tdev == dev) {
705 stats->collisions++;
706 net_warn_ratelimited("%s: Local routing loop detected!\n",
707 tunnel->parms.name);
708 goto tx_err_dst_release;
709 }
710
711 mtu = dst_mtu(dst) - sizeof(*ipv6h);
712 if (encap_limit >= 0) {
713 max_headroom += 8;
714 mtu -= 8;
715 }
716 if (mtu < IPV6_MIN_MTU)
717 mtu = IPV6_MIN_MTU;
718 if (skb_dst(skb))
719 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
720 if (skb->len > mtu) {
721 *pmtu = mtu;
722 err = -EMSGSIZE;
723 goto tx_err_dst_release;
724 }
725
726 if (tunnel->err_count > 0) {
727 if (time_before(jiffies,
728 tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
729 tunnel->err_count--;
730
731 dst_link_failure(skb);
732 } else
733 tunnel->err_count = 0;
734 }
735
736 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
737
738 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
739 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
740 new_skb = skb_realloc_headroom(skb, max_headroom);
741 if (max_headroom > dev->needed_headroom)
742 dev->needed_headroom = max_headroom;
743 if (!new_skb)
744 goto tx_err_dst_release;
745
746 if (skb->sk)
747 skb_set_owner_w(new_skb, skb->sk);
748 consume_skb(skb);
749 skb = new_skb;
750 }
751
752 skb_dst_drop(skb);
753
754 if (fl6->flowi6_mark) {
755 skb_dst_set(skb, dst);
756 ndst = NULL;
757 } else {
758 skb_dst_set_noref(skb, dst);
759 }
760
761 proto = NEXTHDR_GRE;
762 if (encap_limit >= 0) {
763 init_tel_txopt(&opt, encap_limit);
764 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
765 }
766
767 skb_push(skb, gre_hlen);
768 skb_reset_network_header(skb);
769 skb_set_transport_header(skb, sizeof(*ipv6h));
770
771 /*
772 * Push down and install the IP header.
773 */
774 ipv6h = ipv6_hdr(skb);
775 *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
776 dsfield = INET_ECN_encapsulate(0, dsfield);
777 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
778 ipv6h->hop_limit = tunnel->parms.hop_limit;
779 ipv6h->nexthdr = proto;
780 ipv6h->saddr = fl6->saddr;
781 ipv6h->daddr = fl6->daddr;
782
783 ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
784 ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
785 htons(ETH_P_TEB) : skb->protocol;
786
787 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
788 __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
789
790 if (tunnel->parms.o_flags&GRE_SEQ) {
791 ++tunnel->o_seqno;
792 *ptr = htonl(tunnel->o_seqno);
793 ptr--;
794 }
795 if (tunnel->parms.o_flags&GRE_KEY) {
796 *ptr = tunnel->parms.o_key;
797 ptr--;
798 }
799 if (tunnel->parms.o_flags&GRE_CSUM) {
800 *ptr = 0;
801 *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
802 skb->len - sizeof(struct ipv6hdr));
803 }
804 }
805
806 nf_reset(skb);
807 pkt_len = skb->len;
808 err = ip6_local_out(skb);
809
810 if (net_xmit_eval(err) == 0) {
811 struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
812
813 tstats->tx_bytes += pkt_len;
814 tstats->tx_packets++;
815 } else {
816 stats->tx_errors++;
817 stats->tx_aborted_errors++;
818 }
819
820 if (ndst)
821 ip6_tnl_dst_store(tunnel, ndst);
822
823 return 0;
824tx_err_link_failure:
825 stats->tx_carrier_errors++;
826 dst_link_failure(skb);
827tx_err_dst_release:
828 dst_release(ndst);
829 return err;
830}
831
832static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
833{
834 struct ip6_tnl *t = netdev_priv(dev);
835 const struct iphdr *iph = ip_hdr(skb);
836 int encap_limit = -1;
837 struct flowi6 fl6;
838 __u8 dsfield;
839 __u32 mtu;
840 int err;
841
842 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
843 encap_limit = t->parms.encap_limit;
844
845 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
846 fl6.flowi6_proto = IPPROTO_IPIP;
847
848 dsfield = ipv4_get_dsfield(iph);
849
850 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
851 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
852 & IPV6_TCLASS_MASK;
853 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
854 fl6.flowi6_mark = skb->mark;
855
856 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
857 if (err != 0) {
858 /* XXX: send ICMP error even if DF is not set. */
859 if (err == -EMSGSIZE)
860 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
861 htonl(mtu));
862 return -1;
863 }
864
865 return 0;
866}
867
868static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
869{
870 struct ip6_tnl *t = netdev_priv(dev);
871 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
872 int encap_limit = -1;
873 __u16 offset;
874 struct flowi6 fl6;
875 __u8 dsfield;
876 __u32 mtu;
877 int err;
878
879 if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
880 return -1;
881
882 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
883 if (offset > 0) {
884 struct ipv6_tlv_tnl_enc_lim *tel;
885 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
886 if (tel->encap_limit == 0) {
887 icmpv6_send(skb, ICMPV6_PARAMPROB,
888 ICMPV6_HDR_FIELD, offset + 2);
889 return -1;
890 }
891 encap_limit = tel->encap_limit - 1;
892 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
893 encap_limit = t->parms.encap_limit;
894
895 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
896 fl6.flowi6_proto = IPPROTO_IPV6;
897
898 dsfield = ipv6_get_dsfield(ipv6h);
899 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
900 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
901 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
902 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
903 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
904 fl6.flowi6_mark = skb->mark;
905
906 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
907 if (err != 0) {
908 if (err == -EMSGSIZE)
909 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
910 return -1;
911 }
912
913 return 0;
914}
915
916/**
917 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
918 * @t: the outgoing tunnel device
919 * @hdr: IPv6 header from the incoming packet
920 *
921 * Description:
922 * Avoid trivial tunneling loop by checking that tunnel exit-point
923 * doesn't match source of incoming packet.
924 *
925 * Return:
926 * 1 if conflict,
927 * 0 else
928 **/
929
930static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
931 const struct ipv6hdr *hdr)
932{
933 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
934}
935
936static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
937{
938 struct ip6_tnl *t = netdev_priv(dev);
939 int encap_limit = -1;
940 struct flowi6 fl6;
941 __u32 mtu;
942 int err;
943
944 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
945 encap_limit = t->parms.encap_limit;
946
947 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
948 fl6.flowi6_proto = skb->protocol;
949
950 err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
951
952 return err;
953}
954
955static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
956 struct net_device *dev)
957{
958 struct ip6_tnl *t = netdev_priv(dev);
959 struct net_device_stats *stats = &t->dev->stats;
960 int ret;
961
962 if (!ip6_tnl_xmit_ctl(t))
963 return -1;
964
965 switch (skb->protocol) {
966 case htons(ETH_P_IP):
967 ret = ip6gre_xmit_ipv4(skb, dev);
968 break;
969 case htons(ETH_P_IPV6):
970 ret = ip6gre_xmit_ipv6(skb, dev);
971 break;
972 default:
973 ret = ip6gre_xmit_other(skb, dev);
974 break;
975 }
976
977 if (ret < 0)
978 goto tx_err;
979
980 return NETDEV_TX_OK;
981
982tx_err:
983 stats->tx_errors++;
984 stats->tx_dropped++;
985 kfree_skb(skb);
986 return NETDEV_TX_OK;
987}
988
989static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
990{
991 struct net_device *dev = t->dev;
992 struct __ip6_tnl_parm *p = &t->parms;
993 struct flowi6 *fl6 = &t->fl.u.ip6;
994 int addend = sizeof(struct ipv6hdr) + 4;
995
996 if (dev->type != ARPHRD_ETHER) {
997 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
998 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
999 }
1000
1001 /* Set up flowi template */
1002 fl6->saddr = p->laddr;
1003 fl6->daddr = p->raddr;
1004 fl6->flowi6_oif = p->link;
1005 fl6->flowlabel = 0;
1006
1007 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1008 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1009 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1010 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1011
1012 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1013 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1014
1015 if (p->flags&IP6_TNL_F_CAP_XMIT &&
1016 p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
1017 dev->flags |= IFF_POINTOPOINT;
1018 else
1019 dev->flags &= ~IFF_POINTOPOINT;
1020
1021 dev->iflink = p->link;
1022
1023 /* Precalculate GRE options length */
1024 if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1025 if (t->parms.o_flags&GRE_CSUM)
1026 addend += 4;
1027 if (t->parms.o_flags&GRE_KEY)
1028 addend += 4;
1029 if (t->parms.o_flags&GRE_SEQ)
1030 addend += 4;
1031 }
1032
1033 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1034 int strict = (ipv6_addr_type(&p->raddr) &
1035 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1036
1037 struct rt6_info *rt = rt6_lookup(dev_net(dev),
1038 &p->raddr, &p->laddr,
1039 p->link, strict);
1040
1041 if (rt == NULL)
1042 return;
1043
1044 if (rt->dst.dev) {
1045 dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
1046
1047 if (set_mtu) {
1048 dev->mtu = rt->dst.dev->mtu - addend;
1049 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1050 dev->mtu -= 8;
1051
1052 if (dev->mtu < IPV6_MIN_MTU)
1053 dev->mtu = IPV6_MIN_MTU;
1054 }
1055 }
1056 ip6_rt_put(rt);
1057 }
1058
1059 t->hlen = addend;
1060}
1061
1062static int ip6gre_tnl_change(struct ip6_tnl *t,
1063 const struct __ip6_tnl_parm *p, int set_mtu)
1064{
1065 t->parms.laddr = p->laddr;
1066 t->parms.raddr = p->raddr;
1067 t->parms.flags = p->flags;
1068 t->parms.hop_limit = p->hop_limit;
1069 t->parms.encap_limit = p->encap_limit;
1070 t->parms.flowinfo = p->flowinfo;
1071 t->parms.link = p->link;
1072 t->parms.proto = p->proto;
1073 t->parms.i_key = p->i_key;
1074 t->parms.o_key = p->o_key;
1075 t->parms.i_flags = p->i_flags;
1076 t->parms.o_flags = p->o_flags;
1077 ip6_tnl_dst_reset(t);
1078 ip6gre_tnl_link_config(t, set_mtu);
1079 return 0;
1080}
1081
1082static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
1083 const struct ip6_tnl_parm2 *u)
1084{
1085 p->laddr = u->laddr;
1086 p->raddr = u->raddr;
1087 p->flags = u->flags;
1088 p->hop_limit = u->hop_limit;
1089 p->encap_limit = u->encap_limit;
1090 p->flowinfo = u->flowinfo;
1091 p->link = u->link;
1092 p->i_key = u->i_key;
1093 p->o_key = u->o_key;
1094 p->i_flags = u->i_flags;
1095 p->o_flags = u->o_flags;
1096 memcpy(p->name, u->name, sizeof(u->name));
1097}
1098
1099static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
1100 const struct __ip6_tnl_parm *p)
1101{
1102 u->proto = IPPROTO_GRE;
1103 u->laddr = p->laddr;
1104 u->raddr = p->raddr;
1105 u->flags = p->flags;
1106 u->hop_limit = p->hop_limit;
1107 u->encap_limit = p->encap_limit;
1108 u->flowinfo = p->flowinfo;
1109 u->link = p->link;
1110 u->i_key = p->i_key;
1111 u->o_key = p->o_key;
1112 u->i_flags = p->i_flags;
1113 u->o_flags = p->o_flags;
1114 memcpy(u->name, p->name, sizeof(u->name));
1115}
1116
1117static int ip6gre_tunnel_ioctl(struct net_device *dev,
1118 struct ifreq *ifr, int cmd)
1119{
1120 int err = 0;
1121 struct ip6_tnl_parm2 p;
1122 struct __ip6_tnl_parm p1;
1123 struct ip6_tnl *t;
1124 struct net *net = dev_net(dev);
1125 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1126
1127 switch (cmd) {
1128 case SIOCGETTUNNEL:
1129 t = NULL;
1130 if (dev == ign->fb_tunnel_dev) {
1131 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1132 err = -EFAULT;
1133 break;
1134 }
1135 ip6gre_tnl_parm_from_user(&p1, &p);
1136 t = ip6gre_tunnel_locate(net, &p1, 0);
1137 }
1138 if (t == NULL)
1139 t = netdev_priv(dev);
1140 ip6gre_tnl_parm_to_user(&p, &t->parms);
1141 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1142 err = -EFAULT;
1143 break;
1144
1145 case SIOCADDTUNNEL:
1146 case SIOCCHGTUNNEL:
1147 err = -EPERM;
1148 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1149 goto done;
1150
1151 err = -EFAULT;
1152 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1153 goto done;
1154
1155 err = -EINVAL;
1156 if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
1157 goto done;
1158
1159 if (!(p.i_flags&GRE_KEY))
1160 p.i_key = 0;
1161 if (!(p.o_flags&GRE_KEY))
1162 p.o_key = 0;
1163
1164 ip6gre_tnl_parm_from_user(&p1, &p);
1165 t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
1166
1167 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1168 if (t != NULL) {
1169 if (t->dev != dev) {
1170 err = -EEXIST;
1171 break;
1172 }
1173 } else {
1174 t = netdev_priv(dev);
1175
1176 ip6gre_tunnel_unlink(ign, t);
1177 synchronize_net();
1178 ip6gre_tnl_change(t, &p1, 1);
1179 ip6gre_tunnel_link(ign, t);
1180 netdev_state_change(dev);
1181 }
1182 }
1183
1184 if (t) {
1185 err = 0;
1186
1187 ip6gre_tnl_parm_to_user(&p, &t->parms);
1188 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1189 err = -EFAULT;
1190 } else
1191 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1192 break;
1193
1194 case SIOCDELTUNNEL:
1195 err = -EPERM;
1196 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1197 goto done;
1198
1199 if (dev == ign->fb_tunnel_dev) {
1200 err = -EFAULT;
1201 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1202 goto done;
1203 err = -ENOENT;
1204 ip6gre_tnl_parm_from_user(&p1, &p);
1205 t = ip6gre_tunnel_locate(net, &p1, 0);
1206 if (t == NULL)
1207 goto done;
1208 err = -EPERM;
1209 if (t == netdev_priv(ign->fb_tunnel_dev))
1210 goto done;
1211 dev = t->dev;
1212 }
1213 unregister_netdevice(dev);
1214 err = 0;
1215 break;
1216
1217 default:
1218 err = -EINVAL;
1219 }
1220
1221done:
1222 return err;
1223}
1224
1225static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1226{
1227 struct ip6_tnl *tunnel = netdev_priv(dev);
1228 if (new_mtu < 68 ||
1229 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1230 return -EINVAL;
1231 dev->mtu = new_mtu;
1232 return 0;
1233}
1234
1235static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
1236 unsigned short type,
1237 const void *daddr, const void *saddr, unsigned int len)
1238{
1239 struct ip6_tnl *t = netdev_priv(dev);
1240 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
1241 __be16 *p = (__be16 *)(ipv6h+1);
1242
1243 *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
1244 ipv6h->hop_limit = t->parms.hop_limit;
1245 ipv6h->nexthdr = NEXTHDR_GRE;
1246 ipv6h->saddr = t->parms.laddr;
1247 ipv6h->daddr = t->parms.raddr;
1248
1249 p[0] = t->parms.o_flags;
1250 p[1] = htons(type);
1251
1252 /*
1253 * Set the source hardware address.
1254 */
1255
1256 if (saddr)
1257 memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
1258 if (daddr)
1259 memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
1260 if (!ipv6_addr_any(&ipv6h->daddr))
1261 return t->hlen;
1262
1263 return -t->hlen;
1264}
1265
1266static const struct header_ops ip6gre_header_ops = {
1267 .create = ip6gre_header,
1268};
1269
1270static const struct net_device_ops ip6gre_netdev_ops = {
1271 .ndo_init = ip6gre_tunnel_init,
1272 .ndo_uninit = ip6gre_tunnel_uninit,
1273 .ndo_start_xmit = ip6gre_tunnel_xmit,
1274 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1275 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1276 .ndo_get_stats64 = ip6gre_get_stats64,
1277};
1278
1279static void ip6gre_dev_free(struct net_device *dev)
1280{
1281 free_percpu(dev->tstats);
1282 free_netdev(dev);
1283}
1284
1285static void ip6gre_tunnel_setup(struct net_device *dev)
1286{
1287 struct ip6_tnl *t;
1288
1289 dev->netdev_ops = &ip6gre_netdev_ops;
1290 dev->destructor = ip6gre_dev_free;
1291
1292 dev->type = ARPHRD_IP6GRE;
1293 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
1294 dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
1295 t = netdev_priv(dev);
1296 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1297 dev->mtu -= 8;
1298 dev->flags |= IFF_NOARP;
1299 dev->iflink = 0;
1300 dev->addr_len = sizeof(struct in6_addr);
1301 dev->features |= NETIF_F_NETNS_LOCAL;
1302 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1303}
1304
1305static int ip6gre_tunnel_init(struct net_device *dev)
1306{
1307 struct ip6_tnl *tunnel;
1308
1309 tunnel = netdev_priv(dev);
1310
1311 tunnel->dev = dev;
1312 strcpy(tunnel->parms.name, dev->name);
1313
1314 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1315 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1316
1317 if (ipv6_addr_any(&tunnel->parms.raddr))
1318 dev->header_ops = &ip6gre_header_ops;
1319
1320 dev->tstats = alloc_percpu(struct pcpu_tstats);
1321 if (!dev->tstats)
1322 return -ENOMEM;
1323
1324 return 0;
1325}
1326
1327static void ip6gre_fb_tunnel_init(struct net_device *dev)
1328{
1329 struct ip6_tnl *tunnel = netdev_priv(dev);
1330
1331 tunnel->dev = dev;
1332 strcpy(tunnel->parms.name, dev->name);
1333
1334 tunnel->hlen = sizeof(struct ipv6hdr) + 4;
1335
1336 dev_hold(dev);
1337}
1338
1339
1340static struct inet6_protocol ip6gre_protocol __read_mostly = {
1341 .handler = ip6gre_rcv,
1342 .err_handler = ip6gre_err,
1343 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1344};
1345
1346static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
1347 struct list_head *head)
1348{
1349 int prio;
1350
1351 for (prio = 0; prio < 4; prio++) {
1352 int h;
1353 for (h = 0; h < HASH_SIZE; h++) {
1354 struct ip6_tnl *t;
1355
1356 t = rtnl_dereference(ign->tunnels[prio][h]);
1357
1358 while (t != NULL) {
1359 unregister_netdevice_queue(t->dev, head);
1360 t = rtnl_dereference(t->next);
1361 }
1362 }
1363 }
1364}
1365
1366static int __net_init ip6gre_init_net(struct net *net)
1367{
1368 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1369 int err;
1370
1371 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
1372 ip6gre_tunnel_setup);
1373 if (!ign->fb_tunnel_dev) {
1374 err = -ENOMEM;
1375 goto err_alloc_dev;
1376 }
1377 dev_net_set(ign->fb_tunnel_dev, net);
1378
1379 ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
1380 ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
1381
1382 err = register_netdev(ign->fb_tunnel_dev);
1383 if (err)
1384 goto err_reg_dev;
1385
1386 rcu_assign_pointer(ign->tunnels_wc[0],
1387 netdev_priv(ign->fb_tunnel_dev));
1388 return 0;
1389
1390err_reg_dev:
1391 ip6gre_dev_free(ign->fb_tunnel_dev);
1392err_alloc_dev:
1393 return err;
1394}
1395
1396static void __net_exit ip6gre_exit_net(struct net *net)
1397{
1398 struct ip6gre_net *ign;
1399 LIST_HEAD(list);
1400
1401 ign = net_generic(net, ip6gre_net_id);
1402 rtnl_lock();
1403 ip6gre_destroy_tunnels(ign, &list);
1404 unregister_netdevice_many(&list);
1405 rtnl_unlock();
1406}
1407
1408static struct pernet_operations ip6gre_net_ops = {
1409 .init = ip6gre_init_net,
1410 .exit = ip6gre_exit_net,
1411 .id = &ip6gre_net_id,
1412 .size = sizeof(struct ip6gre_net),
1413};
1414
1415static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1416{
1417 __be16 flags;
1418
1419 if (!data)
1420 return 0;
1421
1422 flags = 0;
1423 if (data[IFLA_GRE_IFLAGS])
1424 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1425 if (data[IFLA_GRE_OFLAGS])
1426 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1427 if (flags & (GRE_VERSION|GRE_ROUTING))
1428 return -EINVAL;
1429
1430 return 0;
1431}
1432
1433static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1434{
1435 struct in6_addr daddr;
1436
1437 if (tb[IFLA_ADDRESS]) {
1438 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1439 return -EINVAL;
1440 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1441 return -EADDRNOTAVAIL;
1442 }
1443
1444 if (!data)
1445 goto out;
1446
1447 if (data[IFLA_GRE_REMOTE]) {
1448 nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1449 if (ipv6_addr_any(&daddr))
1450 return -EINVAL;
1451 }
1452
1453out:
1454 return ip6gre_tunnel_validate(tb, data);
1455}
1456
1457
1458static void ip6gre_netlink_parms(struct nlattr *data[],
1459 struct __ip6_tnl_parm *parms)
1460{
1461 memset(parms, 0, sizeof(*parms));
1462
1463 if (!data)
1464 return;
1465
1466 if (data[IFLA_GRE_LINK])
1467 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1468
1469 if (data[IFLA_GRE_IFLAGS])
1470 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1471
1472 if (data[IFLA_GRE_OFLAGS])
1473 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1474
1475 if (data[IFLA_GRE_IKEY])
1476 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1477
1478 if (data[IFLA_GRE_OKEY])
1479 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1480
1481 if (data[IFLA_GRE_LOCAL])
1482 nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
1483
1484 if (data[IFLA_GRE_REMOTE])
1485 nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1486
1487 if (data[IFLA_GRE_TTL])
1488 parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
1489
1490 if (data[IFLA_GRE_ENCAP_LIMIT])
1491 parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
1492
1493 if (data[IFLA_GRE_FLOWINFO])
1494 parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
1495
1496 if (data[IFLA_GRE_FLAGS])
1497 parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
1498}
1499
1500static int ip6gre_tap_init(struct net_device *dev)
1501{
1502 struct ip6_tnl *tunnel;
1503
1504 tunnel = netdev_priv(dev);
1505
1506 tunnel->dev = dev;
1507 strcpy(tunnel->parms.name, dev->name);
1508
1509 ip6gre_tnl_link_config(tunnel, 1);
1510
1511 dev->tstats = alloc_percpu(struct pcpu_tstats);
1512 if (!dev->tstats)
1513 return -ENOMEM;
1514
1515 return 0;
1516}
1517
1518static const struct net_device_ops ip6gre_tap_netdev_ops = {
1519 .ndo_init = ip6gre_tap_init,
1520 .ndo_uninit = ip6gre_tunnel_uninit,
1521 .ndo_start_xmit = ip6gre_tunnel_xmit,
1522 .ndo_set_mac_address = eth_mac_addr,
1523 .ndo_validate_addr = eth_validate_addr,
1524 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1525 .ndo_get_stats64 = ip6gre_get_stats64,
1526};
1527
1528static void ip6gre_tap_setup(struct net_device *dev)
1529{
1530
1531 ether_setup(dev);
1532
1533 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1534 dev->destructor = ip6gre_dev_free;
1535
1536 dev->iflink = 0;
1537 dev->features |= NETIF_F_NETNS_LOCAL;
1538}
1539
1540static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1541 struct nlattr *tb[], struct nlattr *data[])
1542{
1543 struct ip6_tnl *nt;
1544 struct net *net = dev_net(dev);
1545 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1546 int err;
1547
1548 nt = netdev_priv(dev);
1549 ip6gre_netlink_parms(data, &nt->parms);
1550
1551 if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
1552 return -EEXIST;
1553
1554 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1555 eth_hw_addr_random(dev);
1556
1557 nt->dev = dev;
1558 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1559
1560 /* Can use a lockless transmit, unless we generate output sequences */
1561 if (!(nt->parms.o_flags & GRE_SEQ))
1562 dev->features |= NETIF_F_LLTX;
1563
1564 err = register_netdevice(dev);
1565 if (err)
1566 goto out;
1567
1568 dev_hold(dev);
1569 ip6gre_tunnel_link(ign, nt);
1570
1571out:
1572 return err;
1573}
1574
1575static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
1576 struct nlattr *data[])
1577{
1578 struct ip6_tnl *t, *nt;
1579 struct net *net = dev_net(dev);
1580 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1581 struct __ip6_tnl_parm p;
1582
1583 if (dev == ign->fb_tunnel_dev)
1584 return -EINVAL;
1585
1586 nt = netdev_priv(dev);
1587 ip6gre_netlink_parms(data, &p);
1588
1589 t = ip6gre_tunnel_locate(net, &p, 0);
1590
1591 if (t) {
1592 if (t->dev != dev)
1593 return -EEXIST;
1594 } else {
1595 t = nt;
1596
1597 ip6gre_tunnel_unlink(ign, t);
1598 ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
1599 ip6gre_tunnel_link(ign, t);
1600 netdev_state_change(dev);
1601 }
1602
1603 return 0;
1604}
1605
1606static size_t ip6gre_get_size(const struct net_device *dev)
1607{
1608 return
1609 /* IFLA_GRE_LINK */
1610 nla_total_size(4) +
1611 /* IFLA_GRE_IFLAGS */
1612 nla_total_size(2) +
1613 /* IFLA_GRE_OFLAGS */
1614 nla_total_size(2) +
1615 /* IFLA_GRE_IKEY */
1616 nla_total_size(4) +
1617 /* IFLA_GRE_OKEY */
1618 nla_total_size(4) +
1619 /* IFLA_GRE_LOCAL */
1620 nla_total_size(sizeof(struct in6_addr)) +
1621 /* IFLA_GRE_REMOTE */
1622 nla_total_size(sizeof(struct in6_addr)) +
1623 /* IFLA_GRE_TTL */
1624 nla_total_size(1) +
1625 /* IFLA_GRE_TOS */
1626 nla_total_size(1) +
1627 /* IFLA_GRE_ENCAP_LIMIT */
1628 nla_total_size(1) +
1629 /* IFLA_GRE_FLOWINFO */
1630 nla_total_size(4) +
1631 /* IFLA_GRE_FLAGS */
1632 nla_total_size(4) +
1633 0;
1634}
1635
1636static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1637{
1638 struct ip6_tnl *t = netdev_priv(dev);
1639 struct __ip6_tnl_parm *p = &t->parms;
1640
1641 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1642 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1643 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1644 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1645 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1646 nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
1647 nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
1648 nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
1649 /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
1650 nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
1651 nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
1652 nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
1653 goto nla_put_failure;
1654 return 0;
1655
1656nla_put_failure:
1657 return -EMSGSIZE;
1658}
1659
1660static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
1661 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1662 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1663 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1664 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1665 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1666 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
1667 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
1668 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1669 [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
1670 [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
1671 [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
1672};
1673
1674static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1675 .kind = "ip6gre",
1676 .maxtype = IFLA_GRE_MAX,
1677 .policy = ip6gre_policy,
1678 .priv_size = sizeof(struct ip6_tnl),
1679 .setup = ip6gre_tunnel_setup,
1680 .validate = ip6gre_tunnel_validate,
1681 .newlink = ip6gre_newlink,
1682 .changelink = ip6gre_changelink,
1683 .get_size = ip6gre_get_size,
1684 .fill_info = ip6gre_fill_info,
1685};
1686
1687static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
1688 .kind = "ip6gretap",
1689 .maxtype = IFLA_GRE_MAX,
1690 .policy = ip6gre_policy,
1691 .priv_size = sizeof(struct ip6_tnl),
1692 .setup = ip6gre_tap_setup,
1693 .validate = ip6gre_tap_validate,
1694 .newlink = ip6gre_newlink,
1695 .changelink = ip6gre_changelink,
1696 .get_size = ip6gre_get_size,
1697 .fill_info = ip6gre_fill_info,
1698};
1699
1700/*
1701 * And now the modules code and kernel interface.
1702 */
1703
1704static int __init ip6gre_init(void)
1705{
1706 int err;
1707
1708 pr_info("GRE over IPv6 tunneling driver\n");
1709
1710 err = register_pernet_device(&ip6gre_net_ops);
1711 if (err < 0)
1712 return err;
1713
1714 err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
1715 if (err < 0) {
1716 pr_info("%s: can't add protocol\n", __func__);
1717 goto add_proto_failed;
1718 }
1719
1720 err = rtnl_link_register(&ip6gre_link_ops);
1721 if (err < 0)
1722 goto rtnl_link_failed;
1723
1724 err = rtnl_link_register(&ip6gre_tap_ops);
1725 if (err < 0)
1726 goto tap_ops_failed;
1727
1728out:
1729 return err;
1730
1731tap_ops_failed:
1732 rtnl_link_unregister(&ip6gre_link_ops);
1733rtnl_link_failed:
1734 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1735add_proto_failed:
1736 unregister_pernet_device(&ip6gre_net_ops);
1737 goto out;
1738}
1739
1740static void __exit ip6gre_fini(void)
1741{
1742 rtnl_link_unregister(&ip6gre_tap_ops);
1743 rtnl_link_unregister(&ip6gre_link_ops);
1744 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1745 unregister_pernet_device(&ip6gre_net_ops);
1746}
1747
1748module_init(ip6gre_init);
1749module_exit(ip6gre_fini);
1750MODULE_LICENSE("GPL");
1751MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
1752MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
1753MODULE_ALIAS_RTNL_LINK("ip6gre");
1754MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a52d864d562..027c7ff6f1e 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,16 +47,9 @@
47 47
48 48
49 49
50int ip6_rcv_finish(struct sk_buff *skb) 50inline int ip6_rcv_finish( struct sk_buff *skb)
51{ 51{
52 if (sysctl_ip_early_demux && !skb_dst(skb)) { 52 if (skb_dst(skb) == NULL)
53 const struct inet6_protocol *ipprot;
54
55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
56 if (ipprot && ipprot->early_demux)
57 ipprot->early_demux(skb);
58 }
59 if (!skb_dst(skb))
60 ip6_route_input(skb); 53 ip6_route_input(skb);
61 54
62 return dst_input(skb); 55 return dst_input(skb);
@@ -118,14 +111,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
118 ipv6_addr_loopback(&hdr->daddr)) 111 ipv6_addr_loopback(&hdr->daddr))
119 goto err; 112 goto err;
120 113
121 /*
122 * RFC4291 2.7
123 * Multicast addresses must not be used as source addresses in IPv6
124 * packets or appear in any Routing header.
125 */
126 if (ipv6_addr_is_multicast(&hdr->saddr))
127 goto err;
128
129 skb->transport_header = skb->network_header + sizeof(*hdr); 114 skb->transport_header = skb->network_header + sizeof(*hdr);
130 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 115 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
131 116
@@ -175,12 +160,12 @@ drop:
175 160
176static int ip6_input_finish(struct sk_buff *skb) 161static int ip6_input_finish(struct sk_buff *skb)
177{ 162{
178 struct net *net = dev_net(skb_dst(skb)->dev);
179 const struct inet6_protocol *ipprot; 163 const struct inet6_protocol *ipprot;
180 struct inet6_dev *idev;
181 unsigned int nhoff; 164 unsigned int nhoff;
182 int nexthdr; 165 int nexthdr, raw;
183 bool raw; 166 u8 hash;
167 struct inet6_dev *idev;
168 struct net *net = dev_net(skb_dst(skb)->dev);
184 169
185 /* 170 /*
186 * Parse extension headers 171 * Parse extension headers
@@ -195,7 +180,9 @@ resubmit:
195 nexthdr = skb_network_header(skb)[nhoff]; 180 nexthdr = skb_network_header(skb)[nhoff];
196 181
197 raw = raw6_local_deliver(skb, nexthdr); 182 raw = raw6_local_deliver(skb, nexthdr);
198 if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { 183
184 hash = nexthdr & (MAX_INET_PROTOS - 1);
185 if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
199 int ret; 186 int ret;
200 187
201 if (ipprot->flags & INET6_PROTO_FINAL) { 188 if (ipprot->flags & INET6_PROTO_FINAL) {
@@ -256,7 +243,7 @@ int ip6_input(struct sk_buff *skb)
256int ip6_mc_input(struct sk_buff *skb) 243int ip6_mc_input(struct sk_buff *skb)
257{ 244{
258 const struct ipv6hdr *hdr; 245 const struct ipv6hdr *hdr;
259 bool deliver; 246 int deliver;
260 247
261 IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev), 248 IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
262 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, 249 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
@@ -285,21 +272,20 @@ int ip6_mc_input(struct sk_buff *skb)
285 u8 *ptr = skb_network_header(skb) + opt->ra; 272 u8 *ptr = skb_network_header(skb) + opt->ra;
286 struct icmp6hdr *icmp6; 273 struct icmp6hdr *icmp6;
287 u8 nexthdr = hdr->nexthdr; 274 u8 nexthdr = hdr->nexthdr;
288 __be16 frag_off;
289 int offset; 275 int offset;
290 276
291 /* Check if the value of Router Alert 277 /* Check if the value of Router Alert
292 * is for MLD (0x0000). 278 * is for MLD (0x0000).
293 */ 279 */
294 if ((ptr[2] | ptr[3]) == 0) { 280 if ((ptr[2] | ptr[3]) == 0) {
295 deliver = false; 281 deliver = 0;
296 282
297 if (!ipv6_ext_hdr(nexthdr)) { 283 if (!ipv6_ext_hdr(nexthdr)) {
298 /* BUG */ 284 /* BUG */
299 goto out; 285 goto out;
300 } 286 }
301 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), 287 offset = ipv6_skip_exthdr(skb, sizeof(*hdr),
302 &nexthdr, &frag_off); 288 &nexthdr);
303 if (offset < 0) 289 if (offset < 0)
304 goto out; 290 goto out;
305 291
@@ -317,7 +303,7 @@ int ip6_mc_input(struct sk_buff *skb)
317 case ICMPV6_MGM_REPORT: 303 case ICMPV6_MGM_REPORT:
318 case ICMPV6_MGM_REDUCTION: 304 case ICMPV6_MGM_REDUCTION:
319 case ICMPV6_MLD2_REPORT: 305 case ICMPV6_MLD2_REPORT:
320 deliver = true; 306 deliver = 1;
321 break; 307 break;
322 } 308 }
323 goto out; 309 goto out;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
deleted file mode 100644
index f26f0da7f09..00000000000
--- a/net/ipv6/ip6_offload.c
+++ /dev/null
@@ -1,282 +0,0 @@
1/*
2 * IPV6 GSO/GRO offload support
3 * Linux INET6 implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#include <linux/kernel.h>
12#include <linux/socket.h>
13#include <linux/netdevice.h>
14#include <linux/skbuff.h>
15#include <linux/printk.h>
16
17#include <net/protocol.h>
18#include <net/ipv6.h>
19
20#include "ip6_offload.h"
21
22static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
23{
24 const struct net_offload *ops = NULL;
25
26 for (;;) {
27 struct ipv6_opt_hdr *opth;
28 int len;
29
30 if (proto != NEXTHDR_HOP) {
31 ops = rcu_dereference(inet6_offloads[proto]);
32
33 if (unlikely(!ops))
34 break;
35
36 if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
37 break;
38 }
39
40 if (unlikely(!pskb_may_pull(skb, 8)))
41 break;
42
43 opth = (void *)skb->data;
44 len = ipv6_optlen(opth);
45
46 if (unlikely(!pskb_may_pull(skb, len)))
47 break;
48
49 proto = opth->nexthdr;
50 __skb_pull(skb, len);
51 }
52
53 return proto;
54}
55
56static int ipv6_gso_send_check(struct sk_buff *skb)
57{
58 const struct ipv6hdr *ipv6h;
59 const struct net_offload *ops;
60 int err = -EINVAL;
61
62 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
63 goto out;
64
65 ipv6h = ipv6_hdr(skb);
66 __skb_pull(skb, sizeof(*ipv6h));
67 err = -EPROTONOSUPPORT;
68
69 rcu_read_lock();
70 ops = rcu_dereference(inet6_offloads[
71 ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
72
73 if (likely(ops && ops->callbacks.gso_send_check)) {
74 skb_reset_transport_header(skb);
75 err = ops->callbacks.gso_send_check(skb);
76 }
77 rcu_read_unlock();
78
79out:
80 return err;
81}
82
83static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
84 netdev_features_t features)
85{
86 struct sk_buff *segs = ERR_PTR(-EINVAL);
87 struct ipv6hdr *ipv6h;
88 const struct net_offload *ops;
89 int proto;
90 struct frag_hdr *fptr;
91 unsigned int unfrag_ip6hlen;
92 u8 *prevhdr;
93 int offset = 0;
94
95 if (!(features & NETIF_F_V6_CSUM))
96 features &= ~NETIF_F_SG;
97
98 if (unlikely(skb_shinfo(skb)->gso_type &
99 ~(SKB_GSO_UDP |
100 SKB_GSO_DODGY |
101 SKB_GSO_TCP_ECN |
102 SKB_GSO_TCPV6 |
103 0)))
104 goto out;
105
106 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
107 goto out;
108
109 ipv6h = ipv6_hdr(skb);
110 __skb_pull(skb, sizeof(*ipv6h));
111 segs = ERR_PTR(-EPROTONOSUPPORT);
112
113 proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
114 rcu_read_lock();
115 ops = rcu_dereference(inet6_offloads[proto]);
116 if (likely(ops && ops->callbacks.gso_segment)) {
117 skb_reset_transport_header(skb);
118 segs = ops->callbacks.gso_segment(skb, features);
119 }
120 rcu_read_unlock();
121
122 if (IS_ERR(segs))
123 goto out;
124
125 for (skb = segs; skb; skb = skb->next) {
126 ipv6h = ipv6_hdr(skb);
127 ipv6h->payload_len = htons(skb->len - skb->mac_len -
128 sizeof(*ipv6h));
129 if (proto == IPPROTO_UDP) {
130 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
131 fptr = (struct frag_hdr *)(skb_network_header(skb) +
132 unfrag_ip6hlen);
133 fptr->frag_off = htons(offset);
134 if (skb->next != NULL)
135 fptr->frag_off |= htons(IP6_MF);
136 offset += (ntohs(ipv6h->payload_len) -
137 sizeof(struct frag_hdr));
138 }
139 }
140
141out:
142 return segs;
143}
144
145static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
146 struct sk_buff *skb)
147{
148 const struct net_offload *ops;
149 struct sk_buff **pp = NULL;
150 struct sk_buff *p;
151 struct ipv6hdr *iph;
152 unsigned int nlen;
153 unsigned int hlen;
154 unsigned int off;
155 int flush = 1;
156 int proto;
157 __wsum csum;
158
159 off = skb_gro_offset(skb);
160 hlen = off + sizeof(*iph);
161 iph = skb_gro_header_fast(skb, off);
162 if (skb_gro_header_hard(skb, hlen)) {
163 iph = skb_gro_header_slow(skb, hlen, off);
164 if (unlikely(!iph))
165 goto out;
166 }
167
168 skb_gro_pull(skb, sizeof(*iph));
169 skb_set_transport_header(skb, skb_gro_offset(skb));
170
171 flush += ntohs(iph->payload_len) != skb_gro_len(skb);
172
173 rcu_read_lock();
174 proto = iph->nexthdr;
175 ops = rcu_dereference(inet6_offloads[proto]);
176 if (!ops || !ops->callbacks.gro_receive) {
177 __pskb_pull(skb, skb_gro_offset(skb));
178 proto = ipv6_gso_pull_exthdrs(skb, proto);
179 skb_gro_pull(skb, -skb_transport_offset(skb));
180 skb_reset_transport_header(skb);
181 __skb_push(skb, skb_gro_offset(skb));
182
183 ops = rcu_dereference(inet6_offloads[proto]);
184 if (!ops || !ops->callbacks.gro_receive)
185 goto out_unlock;
186
187 iph = ipv6_hdr(skb);
188 }
189
190 NAPI_GRO_CB(skb)->proto = proto;
191
192 flush--;
193 nlen = skb_network_header_len(skb);
194
195 for (p = *head; p; p = p->next) {
196 const struct ipv6hdr *iph2;
197 __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
198
199 if (!NAPI_GRO_CB(p)->same_flow)
200 continue;
201
202 iph2 = ipv6_hdr(p);
203 first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
204
205 /* All fields must match except length and Traffic Class. */
206 if (nlen != skb_network_header_len(p) ||
207 (first_word & htonl(0xF00FFFFF)) ||
208 memcmp(&iph->nexthdr, &iph2->nexthdr,
209 nlen - offsetof(struct ipv6hdr, nexthdr))) {
210 NAPI_GRO_CB(p)->same_flow = 0;
211 continue;
212 }
213 /* flush if Traffic Class fields are different */
214 NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
215 NAPI_GRO_CB(p)->flush |= flush;
216 }
217
218 NAPI_GRO_CB(skb)->flush |= flush;
219
220 csum = skb->csum;
221 skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
222
223 pp = ops->callbacks.gro_receive(head, skb);
224
225 skb->csum = csum;
226
227out_unlock:
228 rcu_read_unlock();
229
230out:
231 NAPI_GRO_CB(skb)->flush |= flush;
232
233 return pp;
234}
235
236static int ipv6_gro_complete(struct sk_buff *skb)
237{
238 const struct net_offload *ops;
239 struct ipv6hdr *iph = ipv6_hdr(skb);
240 int err = -ENOSYS;
241
242 iph->payload_len = htons(skb->len - skb_network_offset(skb) -
243 sizeof(*iph));
244
245 rcu_read_lock();
246 ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]);
247 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
248 goto out_unlock;
249
250 err = ops->callbacks.gro_complete(skb);
251
252out_unlock:
253 rcu_read_unlock();
254
255 return err;
256}
257
258static struct packet_offload ipv6_packet_offload __read_mostly = {
259 .type = cpu_to_be16(ETH_P_IPV6),
260 .callbacks = {
261 .gso_send_check = ipv6_gso_send_check,
262 .gso_segment = ipv6_gso_segment,
263 .gro_receive = ipv6_gro_receive,
264 .gro_complete = ipv6_gro_complete,
265 },
266};
267
268static int __init ipv6_offload_init(void)
269{
270
271 if (tcpv6_offload_init() < 0)
272 pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
273 if (udp_offload_init() < 0)
274 pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
275 if (ipv6_exthdrs_offload_init() < 0)
276 pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
277
278 dev_add_offload(&ipv6_packet_offload);
279 return 0;
280}
281
282fs_initcall(ipv6_offload_init);
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
deleted file mode 100644
index 2e155c651b3..00000000000
--- a/net/ipv6/ip6_offload.h
+++ /dev/null
@@ -1,18 +0,0 @@
1/*
2 * IPV6 GSO/GRO offload support
3 * Linux INET6 implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#ifndef __ip6_offload_h
12#define __ip6_offload_h
13
14int ipv6_exthdrs_offload_init(void);
15int udp_offload_init(void);
16int tcpv6_offload_init(void);
17
18#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5552d13ae92..55a35c1dede 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -83,12 +83,24 @@ int ip6_local_out(struct sk_buff *skb)
83} 83}
84EXPORT_SYMBOL_GPL(ip6_local_out); 84EXPORT_SYMBOL_GPL(ip6_local_out);
85 85
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
94
95 netif_rx_ni(newskb);
96 return 0;
97}
98
86static int ip6_finish_output2(struct sk_buff *skb) 99static int ip6_finish_output2(struct sk_buff *skb)
87{ 100{
88 struct dst_entry *dst = skb_dst(skb); 101 struct dst_entry *dst = skb_dst(skb);
89 struct net_device *dev = dst->dev; 102 struct net_device *dev = dst->dev;
90 struct neighbour *neigh; 103 struct neighbour *neigh;
91 struct rt6_info *rt;
92 104
93 skb->protocol = htons(ETH_P_IPV6); 105 skb->protocol = htons(ETH_P_IPV6);
94 skb->dev = dev; 106 skb->dev = dev;
@@ -109,7 +121,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
109 if (newskb) 121 if (newskb)
110 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, 122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
111 newskb, NULL, newskb->dev, 123 newskb, NULL, newskb->dev,
112 dev_loopback_xmit); 124 ip6_dev_loopback_xmit);
113 125
114 if (ipv6_hdr(skb)->hop_limit == 0) { 126 if (ipv6_hdr(skb)->hop_limit == 0) {
115 IP6_INC_STATS(dev_net(dev), idev, 127 IP6_INC_STATS(dev_net(dev), idev,
@@ -123,11 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb)
123 skb->len); 135 skb->len);
124 } 136 }
125 137
126 rt = (struct rt6_info *) dst; 138 rcu_read_lock();
127 neigh = rt->n; 139 neigh = dst_get_neighbour(dst);
128 if (neigh) 140 if (neigh) {
129 return dst_neigh_output(dst, neigh, skb); 141 int res = neigh_output(neigh, skb);
130 142
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
131 IP6_INC_STATS_BH(dev_net(dst->dev), 147 IP6_INC_STATS_BH(dev_net(dst->dev),
132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
133 kfree_skb(skb); 149 kfree_skb(skb);
@@ -164,7 +180,7 @@ int ip6_output(struct sk_buff *skb)
164 */ 180 */
165 181
166int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, 182int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
167 struct ipv6_txoptions *opt, int tclass) 183 struct ipv6_txoptions *opt)
168{ 184{
169 struct net *net = sock_net(sk); 185 struct net *net = sock_net(sk);
170 struct ipv6_pinfo *np = inet6_sk(sk); 186 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -174,6 +190,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
174 u8 proto = fl6->flowi6_proto; 190 u8 proto = fl6->flowi6_proto;
175 int seg_len = skb->len; 191 int seg_len = skb->len;
176 int hlimit = -1; 192 int hlimit = -1;
193 int tclass = 0;
177 u32 mtu; 194 u32 mtu;
178 195
179 if (opt) { 196 if (opt) {
@@ -194,7 +211,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
194 kfree_skb(skb); 211 kfree_skb(skb);
195 return -ENOBUFS; 212 return -ENOBUFS;
196 } 213 }
197 consume_skb(skb); 214 kfree_skb(skb);
198 skb = skb2; 215 skb = skb2;
199 skb_set_owner_w(skb, sk); 216 skb_set_owner_w(skb, sk);
200 } 217 }
@@ -211,8 +228,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
211 /* 228 /*
212 * Fill in the IPv6 header 229 * Fill in the IPv6 header
213 */ 230 */
214 if (np) 231 if (np) {
232 tclass = np->tclass;
215 hlimit = np->hop_limit; 233 hlimit = np->hop_limit;
234 }
216 if (hlimit < 0) 235 if (hlimit < 0)
217 hlimit = ip6_dst_hoplimit(dst); 236 hlimit = ip6_dst_hoplimit(dst);
218 237
@@ -222,8 +241,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
222 hdr->nexthdr = proto; 241 hdr->nexthdr = proto;
223 hdr->hop_limit = hlimit; 242 hdr->hop_limit = hlimit;
224 243
225 hdr->saddr = fl6->saddr; 244 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
226 hdr->daddr = *first_hop; 245 ipv6_addr_copy(&hdr->daddr, first_hop);
227 246
228 skb->priority = sk->sk_priority; 247 skb->priority = sk->sk_priority;
229 skb->mark = sk->sk_mark; 248 skb->mark = sk->sk_mark;
@@ -236,7 +255,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
236 dst->dev, dst_output); 255 dst->dev, dst_output);
237 } 256 }
238 257
239 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n"); 258 if (net_ratelimit())
259 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
240 skb->dev = dst->dev; 260 skb->dev = dst->dev;
241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 261 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
242 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); 262 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
@@ -273,8 +293,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
273 hdr->nexthdr = proto; 293 hdr->nexthdr = proto;
274 hdr->hop_limit = np->hop_limit; 294 hdr->hop_limit = np->hop_limit;
275 295
276 hdr->saddr = *saddr; 296 ipv6_addr_copy(&hdr->saddr, saddr);
277 hdr->daddr = *daddr; 297 ipv6_addr_copy(&hdr->daddr, daddr);
278 298
279 return 0; 299 return 0;
280} 300}
@@ -312,11 +332,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
312{ 332{
313 struct ipv6hdr *hdr = ipv6_hdr(skb); 333 struct ipv6hdr *hdr = ipv6_hdr(skb);
314 u8 nexthdr = hdr->nexthdr; 334 u8 nexthdr = hdr->nexthdr;
315 __be16 frag_off;
316 int offset; 335 int offset;
317 336
318 if (ipv6_ext_hdr(nexthdr)) { 337 if (ipv6_ext_hdr(nexthdr)) {
319 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); 338 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
320 if (offset < 0) 339 if (offset < 0)
321 return 0; 340 return 0;
322 } else 341 } else
@@ -371,6 +390,7 @@ int ip6_forward(struct sk_buff *skb)
371 struct ipv6hdr *hdr = ipv6_hdr(skb); 390 struct ipv6hdr *hdr = ipv6_hdr(skb);
372 struct inet6_skb_parm *opt = IP6CB(skb); 391 struct inet6_skb_parm *opt = IP6CB(skb);
373 struct net *net = dev_net(dst->dev); 392 struct net *net = dev_net(dst->dev);
393 struct neighbour *n;
374 u32 mtu; 394 u32 mtu;
375 395
376 if (net->ipv6.devconf_all->forwarding == 0) 396 if (net->ipv6.devconf_all->forwarding == 0)
@@ -445,9 +465,9 @@ int ip6_forward(struct sk_buff *skb)
445 send redirects to source routed frames. 465 send redirects to source routed frames.
446 We don't send redirects to frames decapsulated from IPsec. 466 We don't send redirects to frames decapsulated from IPsec.
447 */ 467 */
448 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { 468 n = dst_get_neighbour(dst);
469 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
449 struct in6_addr *target = NULL; 470 struct in6_addr *target = NULL;
450 struct inet_peer *peer;
451 struct rt6_info *rt; 471 struct rt6_info *rt;
452 472
453 /* 473 /*
@@ -456,20 +476,19 @@ int ip6_forward(struct sk_buff *skb)
456 */ 476 */
457 477
458 rt = (struct rt6_info *) dst; 478 rt = (struct rt6_info *) dst;
459 if (rt->rt6i_flags & RTF_GATEWAY) 479 if ((rt->rt6i_flags & RTF_GATEWAY))
460 target = &rt->rt6i_gateway; 480 target = (struct in6_addr*)&n->primary_key;
461 else 481 else
462 target = &hdr->daddr; 482 target = &hdr->daddr;
463 483
464 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 484 if (!rt->rt6i_peer)
485 rt6_bind_peer(rt, 1);
465 486
466 /* Limit redirects both by destination (here) 487 /* Limit redirects both by destination (here)
467 and by source (inside ndisc_send_redirect) 488 and by source (inside ndisc_send_redirect)
468 */ 489 */
469 if (inet_peer_xrlim_allow(peer, 1*HZ)) 490 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
470 ndisc_send_redirect(skb, target); 491 ndisc_send_redirect(skb, n, target);
471 if (peer)
472 inet_putpeer(peer);
473 } else { 492 } else {
474 int addrtype = ipv6_addr_type(&hdr->saddr); 493 int addrtype = ipv6_addr_type(&hdr->saddr);
475 494
@@ -488,8 +507,7 @@ int ip6_forward(struct sk_buff *skb)
488 if (mtu < IPV6_MIN_MTU) 507 if (mtu < IPV6_MIN_MTU)
489 mtu = IPV6_MIN_MTU; 508 mtu = IPV6_MIN_MTU;
490 509
491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || 510 if (skb->len > mtu && !skb_is_gso(skb)) {
492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
493 /* Again, force OUTPUT device used as source address */ 511 /* Again, force OUTPUT device used as source address */
494 skb->dev = dst->dev; 512 skb->dev = dst->dev;
495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -513,7 +531,6 @@ int ip6_forward(struct sk_buff *skb)
513 hdr->hop_limit--; 531 hdr->hop_limit--;
514 532
515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
516 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
517 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 534 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
518 ip6_forward_finish); 535 ip6_forward_finish);
519 536
@@ -538,12 +555,77 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
538 to->tc_index = from->tc_index; 555 to->tc_index = from->tc_index;
539#endif 556#endif
540 nf_copy(to, from); 557 nf_copy(to, from);
541#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 558#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
559 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
542 to->nf_trace = from->nf_trace; 560 to->nf_trace = from->nf_trace;
543#endif 561#endif
544 skb_copy_secmark(to, from); 562 skb_copy_secmark(to, from);
545} 563}
546 564
565int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
566{
567 u16 offset = sizeof(struct ipv6hdr);
568 struct ipv6_opt_hdr *exthdr =
569 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
570 unsigned int packet_len = skb->tail - skb->network_header;
571 int found_rhdr = 0;
572 *nexthdr = &ipv6_hdr(skb)->nexthdr;
573
574 while (offset + 1 <= packet_len) {
575
576 switch (**nexthdr) {
577
578 case NEXTHDR_HOP:
579 break;
580 case NEXTHDR_ROUTING:
581 found_rhdr = 1;
582 break;
583 case NEXTHDR_DEST:
584#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
585 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
586 break;
587#endif
588 if (found_rhdr)
589 return offset;
590 break;
591 default :
592 return offset;
593 }
594
595 offset += ipv6_optlen(exthdr);
596 *nexthdr = &exthdr->nexthdr;
597 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
598 offset);
599 }
600
601 return offset;
602}
603
604void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
605{
606 static atomic_t ipv6_fragmentation_id;
607 int old, new;
608
609 if (rt && !(rt->dst.flags & DST_NOPEER)) {
610 struct inet_peer *peer;
611
612 if (!rt->rt6i_peer)
613 rt6_bind_peer(rt, 1);
614 peer = rt->rt6i_peer;
615 if (peer) {
616 fhdr->identification = htonl(inet_getid(peer, 0));
617 return;
618 }
619 }
620 do {
621 old = atomic_read(&ipv6_fragmentation_id);
622 new = old + 1;
623 if (!new)
624 new = 1;
625 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
626 fhdr->identification = htonl(new);
627}
628
547int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 629int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
548{ 630{
549 struct sk_buff *frag; 631 struct sk_buff *frag;
@@ -552,7 +634,6 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
552 struct ipv6hdr *tmp_hdr; 634 struct ipv6hdr *tmp_hdr;
553 struct frag_hdr *fh; 635 struct frag_hdr *fh;
554 unsigned int mtu, hlen, left, len; 636 unsigned int mtu, hlen, left, len;
555 int hroom, troom;
556 __be32 frag_id = 0; 637 __be32 frag_id = 0;
557 int ptr, offset = 0, err=0; 638 int ptr, offset = 0, err=0;
558 u8 *prevhdr, nexthdr = 0; 639 u8 *prevhdr, nexthdr = 0;
@@ -566,12 +647,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
566 /* We must not fragment if the socket is set to force MTU discovery 647 /* We must not fragment if the socket is set to force MTU discovery
567 * or if the skb it not generated by a local socket. 648 * or if the skb it not generated by a local socket.
568 */ 649 */
569 if (unlikely(!skb->local_df && skb->len > mtu) || 650 if (!skb->local_df && skb->len > mtu) {
570 (IP6CB(skb)->frag_max_size &&
571 IP6CB(skb)->frag_max_size > mtu)) {
572 if (skb->sk && dst_allfrag(skb_dst(skb)))
573 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
574
575 skb->dev = skb_dst(skb)->dev; 651 skb->dev = skb_dst(skb)->dev;
576 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 652 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
577 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 653 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
@@ -690,7 +766,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
690 if (err == 0) { 766 if (err == 0) {
691 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 767 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
692 IPSTATS_MIB_FRAGOKS); 768 IPSTATS_MIB_FRAGOKS);
693 ip6_rt_put(rt); 769 dst_release(&rt->dst);
694 return 0; 770 return 0;
695 } 771 }
696 772
@@ -702,7 +778,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
702 778
703 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 779 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
704 IPSTATS_MIB_FRAGFAILS); 780 IPSTATS_MIB_FRAGFAILS);
705 ip6_rt_put(rt); 781 dst_release(&rt->dst);
706 return err; 782 return err;
707 783
708slow_path_clean: 784slow_path_clean:
@@ -716,10 +792,6 @@ slow_path_clean:
716 } 792 }
717 793
718slow_path: 794slow_path:
719 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
720 skb_checksum_help(skb))
721 goto fail;
722
723 left = skb->len - hlen; /* Space per frame */ 795 left = skb->len - hlen; /* Space per frame */
724 ptr = hlen; /* Where to start from */ 796 ptr = hlen; /* Where to start from */
725 797
@@ -728,8 +800,6 @@ slow_path:
728 */ 800 */
729 801
730 *prevhdr = NEXTHDR_FRAGMENT; 802 *prevhdr = NEXTHDR_FRAGMENT;
731 hroom = LL_RESERVED_SPACE(rt->dst.dev);
732 troom = rt->dst.dev->needed_tailroom;
733 803
734 /* 804 /*
735 * Keep copying data until we run out. 805 * Keep copying data until we run out.
@@ -748,8 +818,7 @@ slow_path:
748 * Allocate buffer. 818 * Allocate buffer.
749 */ 819 */
750 820
751 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + 821 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
752 hroom + troom, GFP_ATOMIC)) == NULL) {
753 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 822 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
754 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
755 IPSTATS_MIB_FRAGFAILS); 824 IPSTATS_MIB_FRAGFAILS);
@@ -762,7 +831,7 @@ slow_path:
762 */ 831 */
763 832
764 ip6_copy_metadata(frag, skb); 833 ip6_copy_metadata(frag, skb);
765 skb_reserve(frag, hroom); 834 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
766 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 835 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
767 skb_reset_network_header(frag); 836 skb_reset_network_header(frag);
768 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 837 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -820,7 +889,7 @@ slow_path:
820 } 889 }
821 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 890 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
822 IPSTATS_MIB_FRAGOKS); 891 IPSTATS_MIB_FRAGOKS);
823 consume_skb(skb); 892 kfree_skb(skb);
824 return err; 893 return err;
825 894
826fail: 895fail:
@@ -884,7 +953,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
884 struct net *net = sock_net(sk); 953 struct net *net = sock_net(sk);
885#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 954#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
886 struct neighbour *n; 955 struct neighbour *n;
887 struct rt6_info *rt;
888#endif 956#endif
889 int err; 957 int err;
890 958
@@ -912,13 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk,
912 * dst entry and replace it instead with the 980 * dst entry and replace it instead with the
913 * dst entry of the nexthop router 981 * dst entry of the nexthop router
914 */ 982 */
915 rt = (struct rt6_info *) *dst; 983 rcu_read_lock();
916 n = rt->n; 984 n = dst_get_neighbour(*dst);
917 if (n && !(n->nud_state & NUD_VALID)) { 985 if (n && !(n->nud_state & NUD_VALID)) {
918 struct inet6_ifaddr *ifp; 986 struct inet6_ifaddr *ifp;
919 struct flowi6 fl_gw6; 987 struct flowi6 fl_gw6;
920 int redirect; 988 int redirect;
921 989
990 rcu_read_unlock();
922 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
923 (*dst)->dev, 1); 992 (*dst)->dev, 1);
924 993
@@ -938,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
938 if ((err = (*dst)->error)) 1007 if ((err = (*dst)->error))
939 goto out_err_release; 1008 goto out_err_release;
940 } 1009 }
1010 } else {
1011 rcu_read_unlock();
941 } 1012 }
942#endif 1013#endif
943 1014
@@ -991,7 +1062,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
991 if (err) 1062 if (err)
992 return ERR_PTR(err); 1063 return ERR_PTR(err);
993 if (final_dst) 1064 if (final_dst)
994 fl6->daddr = *final_dst; 1065 ipv6_addr_copy(&fl6->daddr, final_dst);
995 if (can_sleep) 1066 if (can_sleep)
996 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1067 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
997 1068
@@ -1027,7 +1098,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1027 if (err) 1098 if (err)
1028 return ERR_PTR(err); 1099 return ERR_PTR(err);
1029 if (final_dst) 1100 if (final_dst)
1030 fl6->daddr = *final_dst; 1101 ipv6_addr_copy(&fl6->daddr, final_dst);
1031 if (can_sleep) 1102 if (can_sleep)
1032 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; 1103 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1033 1104
@@ -1055,7 +1126,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1055 hh_len + fragheaderlen + transhdrlen + 20, 1126 hh_len + fragheaderlen + transhdrlen + 20,
1056 (flags & MSG_DONTWAIT), &err); 1127 (flags & MSG_DONTWAIT), &err);
1057 if (skb == NULL) 1128 if (skb == NULL)
1058 return err; 1129 return -ENOMEM;
1059 1130
1060 /* reserve space for Hardware header */ 1131 /* reserve space for Hardware header */
1061 skb_reserve(skb, hh_len); 1132 skb_reserve(skb, hh_len);
@@ -1110,29 +1181,6 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1110 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; 1181 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1111} 1182}
1112 1183
1113static void ip6_append_data_mtu(int *mtu,
1114 int *maxfraglen,
1115 unsigned int fragheaderlen,
1116 struct sk_buff *skb,
1117 struct rt6_info *rt)
1118{
1119 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1120 if (skb == NULL) {
1121 /* first fragment, reserve header_len */
1122 *mtu = *mtu - rt->dst.header_len;
1123
1124 } else {
1125 /*
1126 * this fragment is not first, the headers
1127 * space is regarded as data space.
1128 */
1129 *mtu = dst_mtu(rt->dst.path);
1130 }
1131 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1132 + fragheaderlen - sizeof(struct frag_hdr);
1133 }
1134}
1135
1136int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 1184int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1137 int offset, int len, int odd, struct sk_buff *skb), 1185 int offset, int len, int odd, struct sk_buff *skb),
1138 void *from, int length, int transhdrlen, 1186 void *from, int length, int transhdrlen,
@@ -1142,15 +1190,15 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1142 struct inet_sock *inet = inet_sk(sk); 1190 struct inet_sock *inet = inet_sk(sk);
1143 struct ipv6_pinfo *np = inet6_sk(sk); 1191 struct ipv6_pinfo *np = inet6_sk(sk);
1144 struct inet_cork *cork; 1192 struct inet_cork *cork;
1145 struct sk_buff *skb, *skb_prev = NULL; 1193 struct sk_buff *skb;
1146 unsigned int maxfraglen, fragheaderlen; 1194 unsigned int maxfraglen, fragheaderlen;
1147 int exthdrlen; 1195 int exthdrlen;
1148 int dst_exthdrlen;
1149 int hh_len; 1196 int hh_len;
1150 int mtu; 1197 int mtu;
1151 int copy; 1198 int copy;
1152 int err; 1199 int err;
1153 int offset = 0; 1200 int offset = 0;
1201 int csummode = CHECKSUM_NONE;
1154 __u8 tx_flags = 0; 1202 __u8 tx_flags = 0;
1155 1203
1156 if (flags&MSG_PROBE) 1204 if (flags&MSG_PROBE)
@@ -1199,12 +1247,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1199 inet->cork.fl.u.ip6 = *fl6; 1247 inet->cork.fl.u.ip6 = *fl6;
1200 np->cork.hop_limit = hlimit; 1248 np->cork.hop_limit = hlimit;
1201 np->cork.tclass = tclass; 1249 np->cork.tclass = tclass;
1202 if (rt->dst.flags & DST_XFRM_TUNNEL) 1250 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1203 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1251 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1204 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1205 else
1206 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1207 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1208 if (np->frag_size < mtu) { 1252 if (np->frag_size < mtu) {
1209 if (np->frag_size) 1253 if (np->frag_size)
1210 mtu = np->frag_size; 1254 mtu = np->frag_size;
@@ -1213,17 +1257,18 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1213 if (dst_allfrag(rt->dst.path)) 1257 if (dst_allfrag(rt->dst.path))
1214 cork->flags |= IPCORK_ALLFRAG; 1258 cork->flags |= IPCORK_ALLFRAG;
1215 cork->length = 0; 1259 cork->length = 0;
1216 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 1260 sk->sk_sndmsg_page = NULL;
1261 sk->sk_sndmsg_off = 0;
1262 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1263 rt->rt6i_nfheader_len;
1217 length += exthdrlen; 1264 length += exthdrlen;
1218 transhdrlen += exthdrlen; 1265 transhdrlen += exthdrlen;
1219 dst_exthdrlen = rt->dst.header_len;
1220 } else { 1266 } else {
1221 rt = (struct rt6_info *)cork->dst; 1267 rt = (struct rt6_info *)cork->dst;
1222 fl6 = &inet->cork.fl.u.ip6; 1268 fl6 = &inet->cork.fl.u.ip6;
1223 opt = np->cork.opt; 1269 opt = np->cork.opt;
1224 transhdrlen = 0; 1270 transhdrlen = 0;
1225 exthdrlen = 0; 1271 exthdrlen = 0;
1226 dst_exthdrlen = 0;
1227 mtu = cork->fragsize; 1272 mtu = cork->fragsize;
1228 } 1273 }
1229 1274
@@ -1298,45 +1343,38 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1298 unsigned int fraglen; 1343 unsigned int fraglen;
1299 unsigned int fraggap; 1344 unsigned int fraggap;
1300 unsigned int alloclen; 1345 unsigned int alloclen;
1346 struct sk_buff *skb_prev;
1301alloc_new_skb: 1347alloc_new_skb:
1348 skb_prev = skb;
1349
1302 /* There's no room in the current skb */ 1350 /* There's no room in the current skb */
1303 if (skb) 1351 if (skb_prev)
1304 fraggap = skb->len - maxfraglen; 1352 fraggap = skb_prev->len - maxfraglen;
1305 else 1353 else
1306 fraggap = 0; 1354 fraggap = 0;
1307 /* update mtu and maxfraglen if necessary */
1308 if (skb == NULL || skb_prev == NULL)
1309 ip6_append_data_mtu(&mtu, &maxfraglen,
1310 fragheaderlen, skb, rt);
1311
1312 skb_prev = skb;
1313 1355
1314 /* 1356 /*
1315 * If remaining data exceeds the mtu, 1357 * If remaining data exceeds the mtu,
1316 * we know we need more fragment(s). 1358 * we know we need more fragment(s).
1317 */ 1359 */
1318 datalen = length + fraggap; 1360 datalen = length + fraggap;
1319
1320 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) 1361 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1321 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; 1362 datalen = maxfraglen - fragheaderlen;
1363
1364 fraglen = datalen + fragheaderlen;
1322 if ((flags & MSG_MORE) && 1365 if ((flags & MSG_MORE) &&
1323 !(rt->dst.dev->features&NETIF_F_SG)) 1366 !(rt->dst.dev->features&NETIF_F_SG))
1324 alloclen = mtu; 1367 alloclen = mtu;
1325 else 1368 else
1326 alloclen = datalen + fragheaderlen; 1369 alloclen = datalen + fragheaderlen;
1327 1370
1328 alloclen += dst_exthdrlen; 1371 /*
1329 1372 * The last fragment gets additional space at tail.
1330 if (datalen != length + fraggap) { 1373 * Note: we overallocate on fragments with MSG_MODE
1331 /* 1374 * because we have no idea if we're the last one.
1332 * this is not the last fragment, the trailer 1375 */
1333 * space is regarded as data space. 1376 if (datalen == length + fraggap)
1334 */ 1377 alloclen += rt->dst.trailer_len;
1335 datalen += rt->dst.trailer_len;
1336 }
1337
1338 alloclen += rt->dst.trailer_len;
1339 fraglen = datalen + fragheaderlen;
1340 1378
1341 /* 1379 /*
1342 * We just reserve space for fragment header. 1380 * We just reserve space for fragment header.
@@ -1370,11 +1408,10 @@ alloc_new_skb:
1370 /* 1408 /*
1371 * Fill in the control structures 1409 * Fill in the control structures
1372 */ 1410 */
1373 skb->ip_summed = CHECKSUM_NONE; 1411 skb->ip_summed = csummode;
1374 skb->csum = 0; 1412 skb->csum = 0;
1375 /* reserve for fragmentation and ipsec header */ 1413 /* reserve for fragmentation */
1376 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + 1414 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1377 dst_exthdrlen);
1378 1415
1379 if (sk->sk_type == SOCK_DGRAM) 1416 if (sk->sk_type == SOCK_DGRAM)
1380 skb_shinfo(skb)->tx_flags = tx_flags; 1417 skb_shinfo(skb)->tx_flags = tx_flags;
@@ -1397,7 +1434,6 @@ alloc_new_skb:
1397 pskb_trim_unique(skb_prev, maxfraglen); 1434 pskb_trim_unique(skb_prev, maxfraglen);
1398 } 1435 }
1399 copy = datalen - transhdrlen - fraggap; 1436 copy = datalen - transhdrlen - fraggap;
1400
1401 if (copy < 0) { 1437 if (copy < 0) {
1402 err = -EINVAL; 1438 err = -EINVAL;
1403 kfree_skb(skb); 1439 kfree_skb(skb);
@@ -1412,7 +1448,7 @@ alloc_new_skb:
1412 length -= datalen - fraggap; 1448 length -= datalen - fraggap;
1413 transhdrlen = 0; 1449 transhdrlen = 0;
1414 exthdrlen = 0; 1450 exthdrlen = 0;
1415 dst_exthdrlen = 0; 1451 csummode = CHECKSUM_NONE;
1416 1452
1417 /* 1453 /*
1418 * Put the packet on the pending queue 1454 * Put the packet on the pending queue
@@ -1436,31 +1472,46 @@ alloc_new_skb:
1436 } 1472 }
1437 } else { 1473 } else {
1438 int i = skb_shinfo(skb)->nr_frags; 1474 int i = skb_shinfo(skb)->nr_frags;
1439 struct page_frag *pfrag = sk_page_frag(sk); 1475 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1440 1476 struct page *page = sk->sk_sndmsg_page;
1441 err = -ENOMEM; 1477 int off = sk->sk_sndmsg_off;
1442 if (!sk_page_frag_refill(sk, pfrag)) 1478 unsigned int left;
1443 goto error; 1479
1444 1480 if (page && (left = PAGE_SIZE - off) > 0) {
1445 if (!skb_can_coalesce(skb, i, pfrag->page, 1481 if (copy >= left)
1446 pfrag->offset)) { 1482 copy = left;
1447 err = -EMSGSIZE; 1483 if (page != frag->page) {
1448 if (i == MAX_SKB_FRAGS) 1484 if (i == MAX_SKB_FRAGS) {
1485 err = -EMSGSIZE;
1486 goto error;
1487 }
1488 get_page(page);
1489 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1490 frag = &skb_shinfo(skb)->frags[i];
1491 }
1492 } else if(i < MAX_SKB_FRAGS) {
1493 if (copy > PAGE_SIZE)
1494 copy = PAGE_SIZE;
1495 page = alloc_pages(sk->sk_allocation, 0);
1496 if (page == NULL) {
1497 err = -ENOMEM;
1449 goto error; 1498 goto error;
1499 }
1500 sk->sk_sndmsg_page = page;
1501 sk->sk_sndmsg_off = 0;
1450 1502
1451 __skb_fill_page_desc(skb, i, pfrag->page, 1503 skb_fill_page_desc(skb, i, page, 0, 0);
1452 pfrag->offset, 0); 1504 frag = &skb_shinfo(skb)->frags[i];
1453 skb_shinfo(skb)->nr_frags = ++i; 1505 } else {
1454 get_page(pfrag->page); 1506 err = -EMSGSIZE;
1507 goto error;
1455 } 1508 }
1456 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1509 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1457 if (getfrag(from, 1510 err = -EFAULT;
1458 page_address(pfrag->page) + pfrag->offset, 1511 goto error;
1459 offset, copy, skb->len, skb) < 0) 1512 }
1460 goto error_efault; 1513 sk->sk_sndmsg_off += copy;
1461 1514 frag->size += copy;
1462 pfrag->offset += copy;
1463 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1464 skb->len += copy; 1515 skb->len += copy;
1465 skb->data_len += copy; 1516 skb->data_len += copy;
1466 skb->truesize += copy; 1517 skb->truesize += copy;
@@ -1469,17 +1520,12 @@ alloc_new_skb:
1469 offset += copy; 1520 offset += copy;
1470 length -= copy; 1521 length -= copy;
1471 } 1522 }
1472
1473 return 0; 1523 return 0;
1474
1475error_efault:
1476 err = -EFAULT;
1477error: 1524error:
1478 cork->length -= length; 1525 cork->length -= length;
1479 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1526 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1480 return err; 1527 return err;
1481} 1528}
1482EXPORT_SYMBOL_GPL(ip6_append_data);
1483 1529
1484static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) 1530static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1485{ 1531{
@@ -1537,7 +1583,7 @@ int ip6_push_pending_frames(struct sock *sk)
1537 if (np->pmtudisc < IPV6_PMTUDISC_DO) 1583 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1538 skb->local_df = 1; 1584 skb->local_df = 1;
1539 1585
1540 *final_dst = fl6->daddr; 1586 ipv6_addr_copy(final_dst, &fl6->daddr);
1541 __skb_pull(skb, skb_network_header_len(skb)); 1587 __skb_pull(skb, skb_network_header_len(skb));
1542 if (opt && opt->opt_flen) 1588 if (opt && opt->opt_flen)
1543 ipv6_push_frag_opts(skb, opt, &proto); 1589 ipv6_push_frag_opts(skb, opt, &proto);
@@ -1553,8 +1599,8 @@ int ip6_push_pending_frames(struct sock *sk)
1553 1599
1554 hdr->hop_limit = np->cork.hop_limit; 1600 hdr->hop_limit = np->cork.hop_limit;
1555 hdr->nexthdr = proto; 1601 hdr->nexthdr = proto;
1556 hdr->saddr = fl6->saddr; 1602 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1557 hdr->daddr = *final_dst; 1603 ipv6_addr_copy(&hdr->daddr, final_dst);
1558 1604
1559 skb->priority = sk->sk_priority; 1605 skb->priority = sk->sk_priority;
1560 skb->mark = sk->sk_mark; 1606 skb->mark = sk->sk_mark;
@@ -1583,7 +1629,6 @@ error:
1583 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1629 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1584 goto out; 1630 goto out;
1585} 1631}
1586EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1587 1632
1588void ip6_flush_pending_frames(struct sock *sk) 1633void ip6_flush_pending_frames(struct sock *sk)
1589{ 1634{
@@ -1598,4 +1643,3 @@ void ip6_flush_pending_frames(struct sock *sk)
1598 1643
1599 ip6_cork_release(inet_sk(sk), inet6_sk(sk)); 1644 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1600} 1645}
1601EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a14f28b280f..4e6922f1c68 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -18,8 +18,6 @@
18 * 18 *
19 */ 19 */
20 20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/capability.h> 22#include <linux/capability.h>
25#include <linux/errno.h> 23#include <linux/errno.h>
@@ -40,7 +38,6 @@
40#include <linux/rtnetlink.h> 38#include <linux/rtnetlink.h>
41#include <linux/netfilter_ipv6.h> 39#include <linux/netfilter_ipv6.h>
42#include <linux/slab.h> 40#include <linux/slab.h>
43#include <linux/hash.h>
44 41
45#include <asm/uaccess.h> 42#include <asm/uaccess.h>
46#include <linux/atomic.h> 43#include <linux/atomic.h>
@@ -63,7 +60,7 @@ MODULE_LICENSE("GPL");
63MODULE_ALIAS_NETDEV("ip6tnl0"); 60MODULE_ALIAS_NETDEV("ip6tnl0");
64 61
65#ifdef IP6_TNL_DEBUG 62#ifdef IP6_TNL_DEBUG
66#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) 63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
67#else 64#else
68#define IP6_TNL_TRACE(x...) do {;} while(0) 65#define IP6_TNL_TRACE(x...) do {;} while(0)
69#endif 66#endif
@@ -71,23 +68,14 @@ MODULE_ALIAS_NETDEV("ip6tnl0");
71#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) 68#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
72#define IPV6_TCLASS_SHIFT 20 69#define IPV6_TCLASS_SHIFT 20
73 70
74#define HASH_SIZE_SHIFT 5 71#define HASH_SIZE 32
75#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
76
77static bool log_ecn_error = true;
78module_param(log_ecn_error, bool, 0644);
79MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
80 72
81static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) 73#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
82{ 74 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
83 u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); 75 (HASH_SIZE - 1))
84
85 return hash_32(hash, HASH_SIZE_SHIFT);
86}
87 76
88static int ip6_tnl_dev_init(struct net_device *dev); 77static int ip6_tnl_dev_init(struct net_device *dev);
89static void ip6_tnl_dev_setup(struct net_device *dev); 78static void ip6_tnl_dev_setup(struct net_device *dev);
90static struct rtnl_link_ops ip6_link_ops __read_mostly;
91 79
92static int ip6_tnl_net_id __read_mostly; 80static int ip6_tnl_net_id __read_mostly;
93struct ip6_tnl_net { 81struct ip6_tnl_net {
@@ -99,6 +87,14 @@ struct ip6_tnl_net {
99 struct ip6_tnl __rcu **tnls[2]; 87 struct ip6_tnl __rcu **tnls[2];
100}; 88};
101 89
90/* often modified stats are per cpu, other are shared (netdev->stats) */
91struct pcpu_tstats {
92 unsigned long rx_packets;
93 unsigned long rx_bytes;
94 unsigned long tx_packets;
95 unsigned long tx_bytes;
96};
97
102static struct net_device_stats *ip6_get_stats(struct net_device *dev) 98static struct net_device_stats *ip6_get_stats(struct net_device *dev)
103{ 99{
104 struct pcpu_tstats sum = { 0 }; 100 struct pcpu_tstats sum = { 0 };
@@ -123,7 +119,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
123 * Locking : hash tables are protected by RCU and RTNL 119 * Locking : hash tables are protected by RCU and RTNL
124 */ 120 */
125 121
126struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 122static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
127{ 123{
128 struct dst_entry *dst = t->dst_cache; 124 struct dst_entry *dst = t->dst_cache;
129 125
@@ -136,23 +132,20 @@ struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
136 132
137 return dst; 133 return dst;
138} 134}
139EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
140 135
141void ip6_tnl_dst_reset(struct ip6_tnl *t) 136static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
142{ 137{
143 dst_release(t->dst_cache); 138 dst_release(t->dst_cache);
144 t->dst_cache = NULL; 139 t->dst_cache = NULL;
145} 140}
146EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
147 141
148void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 142static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
149{ 143{
150 struct rt6_info *rt = (struct rt6_info *) dst; 144 struct rt6_info *rt = (struct rt6_info *) dst;
151 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 145 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
152 dst_release(t->dst_cache); 146 dst_release(t->dst_cache);
153 t->dst_cache = dst; 147 t->dst_cache = dst;
154} 148}
155EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
156 149
157/** 150/**
158 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 151 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -171,11 +164,12 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
171static struct ip6_tnl * 164static struct ip6_tnl *
172ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) 165ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
173{ 166{
174 unsigned int hash = HASH(remote, local); 167 unsigned int h0 = HASH(remote);
168 unsigned int h1 = HASH(local);
175 struct ip6_tnl *t; 169 struct ip6_tnl *t;
176 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 170 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
177 171
178 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 172 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) {
179 if (ipv6_addr_equal(local, &t->parms.laddr) && 173 if (ipv6_addr_equal(local, &t->parms.laddr) &&
180 ipv6_addr_equal(remote, &t->parms.raddr) && 174 ipv6_addr_equal(remote, &t->parms.raddr) &&
181 (t->dev->flags & IFF_UP)) 175 (t->dev->flags & IFF_UP))
@@ -200,16 +194,16 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
200 **/ 194 **/
201 195
202static struct ip6_tnl __rcu ** 196static struct ip6_tnl __rcu **
203ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) 197ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
204{ 198{
205 const struct in6_addr *remote = &p->raddr; 199 const struct in6_addr *remote = &p->raddr;
206 const struct in6_addr *local = &p->laddr; 200 const struct in6_addr *local = &p->laddr;
207 unsigned int h = 0; 201 unsigned h = 0;
208 int prio = 0; 202 int prio = 0;
209 203
210 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { 204 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
211 prio = 1; 205 prio = 1;
212 h = HASH(remote, local); 206 h = HASH(remote) ^ HASH(local);
213 } 207 }
214 return &ip6n->tnls[prio][h]; 208 return &ip6n->tnls[prio][h];
215} 209}
@@ -255,35 +249,8 @@ static void ip6_dev_free(struct net_device *dev)
255 free_netdev(dev); 249 free_netdev(dev);
256} 250}
257 251
258static int ip6_tnl_create2(struct net_device *dev)
259{
260 struct ip6_tnl *t = netdev_priv(dev);
261 struct net *net = dev_net(dev);
262 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
263 int err;
264
265 t = netdev_priv(dev);
266 err = ip6_tnl_dev_init(dev);
267 if (err < 0)
268 goto out;
269
270 err = register_netdevice(dev);
271 if (err < 0)
272 goto out;
273
274 strcpy(t->parms.name, dev->name);
275 dev->rtnl_link_ops = &ip6_link_ops;
276
277 dev_hold(dev);
278 ip6_tnl_link(ip6n, t);
279 return 0;
280
281out:
282 return err;
283}
284
285/** 252/**
286 * ip6_tnl_create - create a new tunnel 253 * ip6_tnl_create() - create a new tunnel
287 * @p: tunnel parameters 254 * @p: tunnel parameters
288 * @pt: pointer to new tunnel 255 * @pt: pointer to new tunnel
289 * 256 *
@@ -294,12 +261,13 @@ out:
294 * created tunnel or NULL 261 * created tunnel or NULL
295 **/ 262 **/
296 263
297static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) 264static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
298{ 265{
299 struct net_device *dev; 266 struct net_device *dev;
300 struct ip6_tnl *t; 267 struct ip6_tnl *t;
301 char name[IFNAMSIZ]; 268 char name[IFNAMSIZ];
302 int err; 269 int err;
270 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
303 271
304 if (p->name[0]) 272 if (p->name[0])
305 strlcpy(name, p->name, IFNAMSIZ); 273 strlcpy(name, p->name, IFNAMSIZ);
@@ -314,10 +282,17 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
314 282
315 t = netdev_priv(dev); 283 t = netdev_priv(dev);
316 t->parms = *p; 284 t->parms = *p;
317 err = ip6_tnl_create2(dev); 285 err = ip6_tnl_dev_init(dev);
318 if (err < 0) 286 if (err < 0)
319 goto failed_free; 287 goto failed_free;
320 288
289 if ((err = register_netdevice(dev)) < 0)
290 goto failed_free;
291
292 strcpy(t->parms.name, dev->name);
293
294 dev_hold(dev);
295 ip6_tnl_link(ip6n, t);
321 return t; 296 return t;
322 297
323failed_free: 298failed_free:
@@ -341,7 +316,7 @@ failed:
341 **/ 316 **/
342 317
343static struct ip6_tnl *ip6_tnl_locate(struct net *net, 318static struct ip6_tnl *ip6_tnl_locate(struct net *net,
344 struct __ip6_tnl_parm *p, int create) 319 struct ip6_tnl_parm *p, int create)
345{ 320{
346 const struct in6_addr *remote = &p->raddr; 321 const struct in6_addr *remote = &p->raddr;
347 const struct in6_addr *local = &p->laddr; 322 const struct in6_addr *local = &p->laddr;
@@ -377,7 +352,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
377 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 352 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
378 353
379 if (dev == ip6n->fb_tnl_dev) 354 if (dev == ip6n->fb_tnl_dev)
380 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); 355 rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
381 else 356 else
382 ip6_tnl_unlink(ip6n, t); 357 ip6_tnl_unlink(ip6n, t);
383 ip6_tnl_dst_reset(t); 358 ip6_tnl_dst_reset(t);
@@ -393,7 +368,8 @@ ip6_tnl_dev_uninit(struct net_device *dev)
393 * else index to encapsulation limit 368 * else index to encapsulation limit
394 **/ 369 **/
395 370
396__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) 371static __u16
372parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
397{ 373{
398 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; 374 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
399 __u8 nexthdr = ipv6h->nexthdr; 375 __u8 nexthdr = ipv6h->nexthdr;
@@ -443,7 +419,6 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
443 } 419 }
444 return 0; 420 return 0;
445} 421}
446EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
447 422
448/** 423/**
449 * ip6_tnl_err - tunnel error handler 424 * ip6_tnl_err - tunnel error handler
@@ -485,32 +460,41 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
485 struct ipv6_tlv_tnl_enc_lim *tel; 460 struct ipv6_tlv_tnl_enc_lim *tel;
486 __u32 mtu; 461 __u32 mtu;
487 case ICMPV6_DEST_UNREACH: 462 case ICMPV6_DEST_UNREACH:
488 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", 463 if (net_ratelimit())
489 t->parms.name); 464 printk(KERN_WARNING
465 "%s: Path to destination invalid "
466 "or inactive!\n", t->parms.name);
490 rel_msg = 1; 467 rel_msg = 1;
491 break; 468 break;
492 case ICMPV6_TIME_EXCEED: 469 case ICMPV6_TIME_EXCEED:
493 if ((*code) == ICMPV6_EXC_HOPLIMIT) { 470 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
494 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 471 if (net_ratelimit())
495 t->parms.name); 472 printk(KERN_WARNING
473 "%s: Too small hop limit or "
474 "routing loop in tunnel!\n",
475 t->parms.name);
496 rel_msg = 1; 476 rel_msg = 1;
497 } 477 }
498 break; 478 break;
499 case ICMPV6_PARAMPROB: 479 case ICMPV6_PARAMPROB:
500 teli = 0; 480 teli = 0;
501 if ((*code) == ICMPV6_HDR_FIELD) 481 if ((*code) == ICMPV6_HDR_FIELD)
502 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); 482 teli = parse_tlv_tnl_enc_lim(skb, skb->data);
503 483
504 if (teli && teli == *info - 2) { 484 if (teli && teli == *info - 2) {
505 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 485 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
506 if (tel->encap_limit == 0) { 486 if (tel->encap_limit == 0) {
507 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", 487 if (net_ratelimit())
508 t->parms.name); 488 printk(KERN_WARNING
489 "%s: Too small encapsulation "
490 "limit or routing loop in "
491 "tunnel!\n", t->parms.name);
509 rel_msg = 1; 492 rel_msg = 1;
510 } 493 }
511 } else { 494 } else if (net_ratelimit()) {
512 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 495 printk(KERN_WARNING
513 t->parms.name); 496 "%s: Recipient unable to parse tunneled "
497 "packet!\n ", t->parms.name);
514 } 498 }
515 break; 499 break;
516 case ICMPV6_PKT_TOOBIG: 500 case ICMPV6_PKT_TOOBIG:
@@ -573,9 +557,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
573 rel_type = ICMP_DEST_UNREACH; 557 rel_type = ICMP_DEST_UNREACH;
574 rel_code = ICMP_FRAG_NEEDED; 558 rel_code = ICMP_FRAG_NEEDED;
575 break; 559 break;
576 case NDISC_REDIRECT:
577 rel_type = ICMP_REDIRECT;
578 rel_code = ICMP_REDIR_HOST;
579 default: 560 default:
580 return 0; 561 return 0;
581 } 562 }
@@ -632,10 +613,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
632 if (rel_info > dst_mtu(skb_dst(skb2))) 613 if (rel_info > dst_mtu(skb_dst(skb2)))
633 goto out; 614 goto out;
634 615
635 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); 616 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
636 } 617 }
637 if (rel_type == ICMP_REDIRECT)
638 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
639 618
640 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 619 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
641 620
@@ -674,12 +653,13 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
674 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, 653 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
675 NULL, 0, 0); 654 NULL, 0, 0);
676 655
677 if (rt && rt->dst.dev) 656 if (rt && rt->rt6i_dev)
678 skb2->dev = rt->dst.dev; 657 skb2->dev = rt->rt6i_dev;
679 658
680 icmpv6_send(skb2, rel_type, rel_code, rel_info); 659 icmpv6_send(skb2, rel_type, rel_code, rel_info);
681 660
682 ip6_rt_put(rt); 661 if (rt)
662 dst_release(&rt->dst);
683 663
684 kfree_skb(skb2); 664 kfree_skb(skb2);
685 } 665 }
@@ -687,77 +667,51 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
687 return 0; 667 return 0;
688} 668}
689 669
690static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 670static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
691 const struct ipv6hdr *ipv6h, 671 const struct ipv6hdr *ipv6h,
692 struct sk_buff *skb) 672 struct sk_buff *skb)
693{ 673{
694 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; 674 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
695 675
696 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 676 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
697 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); 677 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
698 678
699 return IP6_ECN_decapsulate(ipv6h, skb); 679 if (INET_ECN_is_ce(dsfield))
680 IP_ECN_set_ce(ip_hdr(skb));
700} 681}
701 682
702static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 683static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
703 const struct ipv6hdr *ipv6h, 684 const struct ipv6hdr *ipv6h,
704 struct sk_buff *skb) 685 struct sk_buff *skb)
705{ 686{
706 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 687 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
707 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); 688 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
708 689
709 return IP6_ECN_decapsulate(ipv6h, skb); 690 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
710} 691 IP6_ECN_set_ce(ipv6_hdr(skb));
711
712__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
713 const struct in6_addr *laddr,
714 const struct in6_addr *raddr)
715{
716 struct __ip6_tnl_parm *p = &t->parms;
717 int ltype = ipv6_addr_type(laddr);
718 int rtype = ipv6_addr_type(raddr);
719 __u32 flags = 0;
720
721 if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
722 flags = IP6_TNL_F_CAP_PER_PACKET;
723 } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
724 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
725 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
726 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
727 if (ltype&IPV6_ADDR_UNICAST)
728 flags |= IP6_TNL_F_CAP_XMIT;
729 if (rtype&IPV6_ADDR_UNICAST)
730 flags |= IP6_TNL_F_CAP_RCV;
731 }
732 return flags;
733} 692}
734EXPORT_SYMBOL(ip6_tnl_get_cap);
735 693
736/* called with rcu_read_lock() */ 694/* called with rcu_read_lock() */
737int ip6_tnl_rcv_ctl(struct ip6_tnl *t, 695static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
738 const struct in6_addr *laddr,
739 const struct in6_addr *raddr)
740{ 696{
741 struct __ip6_tnl_parm *p = &t->parms; 697 struct ip6_tnl_parm *p = &t->parms;
742 int ret = 0; 698 int ret = 0;
743 struct net *net = dev_net(t->dev); 699 struct net *net = dev_net(t->dev);
744 700
745 if ((p->flags & IP6_TNL_F_CAP_RCV) || 701 if (p->flags & IP6_TNL_F_CAP_RCV) {
746 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
747 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
748 struct net_device *ldev = NULL; 702 struct net_device *ldev = NULL;
749 703
750 if (p->link) 704 if (p->link)
751 ldev = dev_get_by_index_rcu(net, p->link); 705 ldev = dev_get_by_index_rcu(net, p->link);
752 706
753 if ((ipv6_addr_is_multicast(laddr) || 707 if ((ipv6_addr_is_multicast(&p->laddr) ||
754 likely(ipv6_chk_addr(net, laddr, ldev, 0))) && 708 likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
755 likely(!ipv6_chk_addr(net, raddr, NULL, 0))) 709 likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
756 ret = 1; 710 ret = 1;
711
757 } 712 }
758 return ret; 713 return ret;
759} 714}
760EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
761 715
762/** 716/**
763 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 717 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -770,13 +724,12 @@ EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
770 724
771static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, 725static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
772 __u8 ipproto, 726 __u8 ipproto,
773 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, 727 void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
774 const struct ipv6hdr *ipv6h, 728 const struct ipv6hdr *ipv6h,
775 struct sk_buff *skb)) 729 struct sk_buff *skb))
776{ 730{
777 struct ip6_tnl *t; 731 struct ip6_tnl *t;
778 const struct ipv6hdr *ipv6h = ipv6_hdr(skb); 732 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
779 int err;
780 733
781 rcu_read_lock(); 734 rcu_read_lock();
782 735
@@ -794,7 +747,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
794 goto discard; 747 goto discard;
795 } 748 }
796 749
797 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { 750 if (!ip6_tnl_rcv_ctl(t)) {
798 t->dev->stats.rx_dropped++; 751 t->dev->stats.rx_dropped++;
799 rcu_read_unlock(); 752 rcu_read_unlock();
800 goto discard; 753 goto discard;
@@ -806,26 +759,14 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
806 skb->pkt_type = PACKET_HOST; 759 skb->pkt_type = PACKET_HOST;
807 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 760 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
808 761
809 __skb_tunnel_rx(skb, t->dev);
810
811 err = dscp_ecn_decapsulate(t, ipv6h, skb);
812 if (unlikely(err)) {
813 if (log_ecn_error)
814 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
815 &ipv6h->saddr,
816 ipv6_get_dsfield(ipv6h));
817 if (err > 1) {
818 ++t->dev->stats.rx_frame_errors;
819 ++t->dev->stats.rx_errors;
820 rcu_read_unlock();
821 goto discard;
822 }
823 }
824
825 tstats = this_cpu_ptr(t->dev->tstats); 762 tstats = this_cpu_ptr(t->dev->tstats);
826 tstats->rx_packets++; 763 tstats->rx_packets++;
827 tstats->rx_bytes += skb->len; 764 tstats->rx_bytes += skb->len;
828 765
766 __skb_tunnel_rx(skb, t->dev);
767
768 dscp_ecn_decapsulate(t, ipv6h, skb);
769
829 netif_rx(skb); 770 netif_rx(skb);
830 771
831 rcu_read_unlock(); 772 rcu_read_unlock();
@@ -884,15 +825,15 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
884 * 0 else 825 * 0 else
885 **/ 826 **/
886 827
887static inline bool 828static inline int
888ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) 829ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
889{ 830{
890 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 831 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
891} 832}
892 833
893int ip6_tnl_xmit_ctl(struct ip6_tnl *t) 834static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
894{ 835{
895 struct __ip6_tnl_parm *p = &t->parms; 836 struct ip6_tnl_parm *p = &t->parms;
896 int ret = 0; 837 int ret = 0;
897 struct net *net = dev_net(t->dev); 838 struct net *net = dev_net(t->dev);
898 839
@@ -904,20 +845,21 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
904 ldev = dev_get_by_index_rcu(net, p->link); 845 ldev = dev_get_by_index_rcu(net, p->link);
905 846
906 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) 847 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
907 pr_warn("%s xmit: Local address not yet configured!\n", 848 printk(KERN_WARNING
908 p->name); 849 "%s xmit: Local address not yet configured!\n",
850 p->name);
909 else if (!ipv6_addr_is_multicast(&p->raddr) && 851 else if (!ipv6_addr_is_multicast(&p->raddr) &&
910 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) 852 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
911 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", 853 printk(KERN_WARNING
912 p->name); 854 "%s xmit: Routing loop! "
855 "Remote address found on this node!\n",
856 p->name);
913 else 857 else
914 ret = 1; 858 ret = 1;
915 rcu_read_unlock(); 859 rcu_read_unlock();
916 } 860 }
917 return ret; 861 return ret;
918} 862}
919EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
920
921/** 863/**
922 * ip6_tnl_xmit2 - encapsulate packet and send 864 * ip6_tnl_xmit2 - encapsulate packet and send
923 * @skb: the outgoing socket buffer 865 * @skb: the outgoing socket buffer
@@ -949,7 +891,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
949 struct net_device_stats *stats = &t->dev->stats; 891 struct net_device_stats *stats = &t->dev->stats;
950 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 892 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
951 struct ipv6_tel_txoption opt; 893 struct ipv6_tel_txoption opt;
952 struct dst_entry *dst = NULL, *ndst = NULL; 894 struct dst_entry *dst;
953 struct net_device *tdev; 895 struct net_device *tdev;
954 int mtu; 896 int mtu;
955 unsigned int max_headroom = sizeof(struct ipv6hdr); 897 unsigned int max_headroom = sizeof(struct ipv6hdr);
@@ -957,28 +899,29 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
957 int err = -1; 899 int err = -1;
958 int pkt_len; 900 int pkt_len;
959 901
960 if (!fl6->flowi6_mark) 902 if ((dst = ip6_tnl_dst_check(t)) != NULL)
961 dst = ip6_tnl_dst_check(t); 903 dst_hold(dst);
962 if (!dst) { 904 else {
963 ndst = ip6_route_output(net, NULL, fl6); 905 dst = ip6_route_output(net, NULL, fl6);
964 906
965 if (ndst->error) 907 if (dst->error)
966 goto tx_err_link_failure; 908 goto tx_err_link_failure;
967 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 909 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
968 if (IS_ERR(ndst)) { 910 if (IS_ERR(dst)) {
969 err = PTR_ERR(ndst); 911 err = PTR_ERR(dst);
970 ndst = NULL; 912 dst = NULL;
971 goto tx_err_link_failure; 913 goto tx_err_link_failure;
972 } 914 }
973 dst = ndst;
974 } 915 }
975 916
976 tdev = dst->dev; 917 tdev = dst->dev;
977 918
978 if (tdev == dev) { 919 if (tdev == dev) {
979 stats->collisions++; 920 stats->collisions++;
980 net_warn_ratelimited("%s: Local routing loop detected!\n", 921 if (net_ratelimit())
981 t->parms.name); 922 printk(KERN_WARNING
923 "%s: Local routing loop detected!\n",
924 t->parms.name);
982 goto tx_err_dst_release; 925 goto tx_err_dst_release;
983 } 926 }
984 mtu = dst_mtu(dst) - sizeof (*ipv6h); 927 mtu = dst_mtu(dst) - sizeof (*ipv6h);
@@ -989,7 +932,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
989 if (mtu < IPV6_MIN_MTU) 932 if (mtu < IPV6_MIN_MTU)
990 mtu = IPV6_MIN_MTU; 933 mtu = IPV6_MIN_MTU;
991 if (skb_dst(skb)) 934 if (skb_dst(skb))
992 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 935 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
993 if (skb->len > mtu) { 936 if (skb->len > mtu) {
994 *pmtu = mtu; 937 *pmtu = mtu;
995 err = -EMSGSIZE; 938 err = -EMSGSIZE;
@@ -1010,16 +953,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1010 953
1011 if (skb->sk) 954 if (skb->sk)
1012 skb_set_owner_w(new_skb, skb->sk); 955 skb_set_owner_w(new_skb, skb->sk);
1013 consume_skb(skb); 956 kfree_skb(skb);
1014 skb = new_skb; 957 skb = new_skb;
1015 } 958 }
1016 skb_dst_drop(skb); 959 skb_dst_drop(skb);
1017 if (fl6->flowi6_mark) { 960 skb_dst_set(skb, dst_clone(dst));
1018 skb_dst_set(skb, dst); 961
1019 ndst = NULL;
1020 } else {
1021 skb_dst_set_noref(skb, dst);
1022 }
1023 skb->transport_header = skb->network_header; 962 skb->transport_header = skb->network_header;
1024 963
1025 proto = fl6->flowi6_proto; 964 proto = fl6->flowi6_proto;
@@ -1035,8 +974,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1035 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); 974 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
1036 ipv6h->hop_limit = t->parms.hop_limit; 975 ipv6h->hop_limit = t->parms.hop_limit;
1037 ipv6h->nexthdr = proto; 976 ipv6h->nexthdr = proto;
1038 ipv6h->saddr = fl6->saddr; 977 ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr);
1039 ipv6h->daddr = fl6->daddr; 978 ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr);
1040 nf_reset(skb); 979 nf_reset(skb);
1041 pkt_len = skb->len; 980 pkt_len = skb->len;
1042 err = ip6_local_out(skb); 981 err = ip6_local_out(skb);
@@ -1050,14 +989,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1050 stats->tx_errors++; 989 stats->tx_errors++;
1051 stats->tx_aborted_errors++; 990 stats->tx_aborted_errors++;
1052 } 991 }
1053 if (ndst) 992 ip6_tnl_dst_store(t, dst);
1054 ip6_tnl_dst_store(t, ndst);
1055 return 0; 993 return 0;
1056tx_err_link_failure: 994tx_err_link_failure:
1057 stats->tx_carrier_errors++; 995 stats->tx_carrier_errors++;
1058 dst_link_failure(skb); 996 dst_link_failure(skb);
1059tx_err_dst_release: 997tx_err_dst_release:
1060 dst_release(ndst); 998 dst_release(dst);
1061 return err; 999 return err;
1062} 1000}
1063 1001
@@ -1084,11 +1022,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1084 1022
1085 dsfield = ipv4_get_dsfield(iph); 1023 dsfield = ipv4_get_dsfield(iph);
1086 1024
1087 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1025 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1088 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 1026 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1089 & IPV6_TCLASS_MASK; 1027 & IPV6_TCLASS_MASK;
1090 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1091 fl6.flowi6_mark = skb->mark;
1092 1028
1093 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1029 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1094 if (err != 0) { 1030 if (err != 0) {
@@ -1118,7 +1054,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1118 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) 1054 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1119 return -1; 1055 return -1;
1120 1056
1121 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); 1057 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
1122 if (offset > 0) { 1058 if (offset > 0) {
1123 struct ipv6_tlv_tnl_enc_lim *tel; 1059 struct ipv6_tlv_tnl_enc_lim *tel;
1124 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; 1060 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1135,12 +1071,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1135 fl6.flowi6_proto = IPPROTO_IPV6; 1071 fl6.flowi6_proto = IPPROTO_IPV6;
1136 1072
1137 dsfield = ipv6_get_dsfield(ipv6h); 1073 dsfield = ipv6_get_dsfield(ipv6h);
1138 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1074 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1139 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1075 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1140 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) 1076 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1141 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); 1077 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1142 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1143 fl6.flowi6_mark = skb->mark;
1144 1078
1145 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1079 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1146 if (err != 0) { 1080 if (err != 0) {
@@ -1182,18 +1116,37 @@ tx_err:
1182 return NETDEV_TX_OK; 1116 return NETDEV_TX_OK;
1183} 1117}
1184 1118
1119static void ip6_tnl_set_cap(struct ip6_tnl *t)
1120{
1121 struct ip6_tnl_parm *p = &t->parms;
1122 int ltype = ipv6_addr_type(&p->laddr);
1123 int rtype = ipv6_addr_type(&p->raddr);
1124
1125 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
1126
1127 if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1128 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1129 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
1130 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
1131 if (ltype&IPV6_ADDR_UNICAST)
1132 p->flags |= IP6_TNL_F_CAP_XMIT;
1133 if (rtype&IPV6_ADDR_UNICAST)
1134 p->flags |= IP6_TNL_F_CAP_RCV;
1135 }
1136}
1137
1185static void ip6_tnl_link_config(struct ip6_tnl *t) 1138static void ip6_tnl_link_config(struct ip6_tnl *t)
1186{ 1139{
1187 struct net_device *dev = t->dev; 1140 struct net_device *dev = t->dev;
1188 struct __ip6_tnl_parm *p = &t->parms; 1141 struct ip6_tnl_parm *p = &t->parms;
1189 struct flowi6 *fl6 = &t->fl.u.ip6; 1142 struct flowi6 *fl6 = &t->fl.u.ip6;
1190 1143
1191 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1144 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1192 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1145 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1193 1146
1194 /* Set up flowi template */ 1147 /* Set up flowi template */
1195 fl6->saddr = p->laddr; 1148 ipv6_addr_copy(&fl6->saddr, &p->laddr);
1196 fl6->daddr = p->raddr; 1149 ipv6_addr_copy(&fl6->daddr, &p->raddr);
1197 fl6->flowi6_oif = p->link; 1150 fl6->flowi6_oif = p->link;
1198 fl6->flowlabel = 0; 1151 fl6->flowlabel = 0;
1199 1152
@@ -1202,8 +1155,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1202 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1155 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1203 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1156 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1204 1157
1205 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); 1158 ip6_tnl_set_cap(t);
1206 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1207 1159
1208 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) 1160 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1209 dev->flags |= IFF_POINTOPOINT; 1161 dev->flags |= IFF_POINTOPOINT;
@@ -1223,18 +1175,18 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1223 if (rt == NULL) 1175 if (rt == NULL)
1224 return; 1176 return;
1225 1177
1226 if (rt->dst.dev) { 1178 if (rt->rt6i_dev) {
1227 dev->hard_header_len = rt->dst.dev->hard_header_len + 1179 dev->hard_header_len = rt->rt6i_dev->hard_header_len +
1228 sizeof (struct ipv6hdr); 1180 sizeof (struct ipv6hdr);
1229 1181
1230 dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); 1182 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
1231 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1183 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1232 dev->mtu-=8; 1184 dev->mtu-=8;
1233 1185
1234 if (dev->mtu < IPV6_MIN_MTU) 1186 if (dev->mtu < IPV6_MIN_MTU)
1235 dev->mtu = IPV6_MIN_MTU; 1187 dev->mtu = IPV6_MIN_MTU;
1236 } 1188 }
1237 ip6_rt_put(rt); 1189 dst_release(&rt->dst);
1238 } 1190 }
1239} 1191}
1240 1192
@@ -1248,10 +1200,10 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1248 **/ 1200 **/
1249 1201
1250static int 1202static int
1251ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) 1203ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
1252{ 1204{
1253 t->parms.laddr = p->laddr; 1205 ipv6_addr_copy(&t->parms.laddr, &p->laddr);
1254 t->parms.raddr = p->raddr; 1206 ipv6_addr_copy(&t->parms.raddr, &p->raddr);
1255 t->parms.flags = p->flags; 1207 t->parms.flags = p->flags;
1256 t->parms.hop_limit = p->hop_limit; 1208 t->parms.hop_limit = p->hop_limit;
1257 t->parms.encap_limit = p->encap_limit; 1209 t->parms.encap_limit = p->encap_limit;
@@ -1263,48 +1215,6 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1263 return 0; 1215 return 0;
1264} 1216}
1265 1217
1266static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1267{
1268 struct net *net = dev_net(t->dev);
1269 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1270 int err;
1271
1272 ip6_tnl_unlink(ip6n, t);
1273 synchronize_net();
1274 err = ip6_tnl_change(t, p);
1275 ip6_tnl_link(ip6n, t);
1276 netdev_state_change(t->dev);
1277 return err;
1278}
1279
1280static void
1281ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1282{
1283 p->laddr = u->laddr;
1284 p->raddr = u->raddr;
1285 p->flags = u->flags;
1286 p->hop_limit = u->hop_limit;
1287 p->encap_limit = u->encap_limit;
1288 p->flowinfo = u->flowinfo;
1289 p->link = u->link;
1290 p->proto = u->proto;
1291 memcpy(p->name, u->name, sizeof(u->name));
1292}
1293
1294static void
1295ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1296{
1297 u->laddr = p->laddr;
1298 u->raddr = p->raddr;
1299 u->flags = p->flags;
1300 u->hop_limit = p->hop_limit;
1301 u->encap_limit = p->encap_limit;
1302 u->flowinfo = p->flowinfo;
1303 u->link = p->link;
1304 u->proto = p->proto;
1305 memcpy(u->name, p->name, sizeof(u->name));
1306}
1307
1308/** 1218/**
1309 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1219 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1310 * @dev: virtual device associated with tunnel 1220 * @dev: virtual device associated with tunnel
@@ -1338,7 +1248,6 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1338{ 1248{
1339 int err = 0; 1249 int err = 0;
1340 struct ip6_tnl_parm p; 1250 struct ip6_tnl_parm p;
1341 struct __ip6_tnl_parm p1;
1342 struct ip6_tnl *t = NULL; 1251 struct ip6_tnl *t = NULL;
1343 struct net *net = dev_net(dev); 1252 struct net *net = dev_net(dev);
1344 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1253 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -1350,14 +1259,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1350 err = -EFAULT; 1259 err = -EFAULT;
1351 break; 1260 break;
1352 } 1261 }
1353 ip6_tnl_parm_from_user(&p1, &p); 1262 t = ip6_tnl_locate(net, &p, 0);
1354 t = ip6_tnl_locate(net, &p1, 0);
1355 } else {
1356 memset(&p, 0, sizeof(p));
1357 } 1263 }
1358 if (t == NULL) 1264 if (t == NULL)
1359 t = netdev_priv(dev); 1265 t = netdev_priv(dev);
1360 ip6_tnl_parm_to_user(&p, &t->parms); 1266 memcpy(&p, &t->parms, sizeof (p));
1361 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 1267 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1362 err = -EFAULT; 1268 err = -EFAULT;
1363 } 1269 }
@@ -1365,7 +1271,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1365 case SIOCADDTUNNEL: 1271 case SIOCADDTUNNEL:
1366 case SIOCCHGTUNNEL: 1272 case SIOCCHGTUNNEL:
1367 err = -EPERM; 1273 err = -EPERM;
1368 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1274 if (!capable(CAP_NET_ADMIN))
1369 break; 1275 break;
1370 err = -EFAULT; 1276 err = -EFAULT;
1371 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1277 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
@@ -1374,8 +1280,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1374 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1280 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1375 p.proto != 0) 1281 p.proto != 0)
1376 break; 1282 break;
1377 ip6_tnl_parm_from_user(&p1, &p); 1283 t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
1378 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1379 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1284 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1380 if (t != NULL) { 1285 if (t != NULL) {
1381 if (t->dev != dev) { 1286 if (t->dev != dev) {
@@ -1385,12 +1290,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1385 } else 1290 } else
1386 t = netdev_priv(dev); 1291 t = netdev_priv(dev);
1387 1292
1388 err = ip6_tnl_update(t, &p1); 1293 ip6_tnl_unlink(ip6n, t);
1294 synchronize_net();
1295 err = ip6_tnl_change(t, &p);
1296 ip6_tnl_link(ip6n, t);
1297 netdev_state_change(dev);
1389 } 1298 }
1390 if (t) { 1299 if (t) {
1391 err = 0; 1300 err = 0;
1392 ip6_tnl_parm_to_user(&p, &t->parms); 1301 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
1393 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1394 err = -EFAULT; 1302 err = -EFAULT;
1395 1303
1396 } else 1304 } else
@@ -1398,7 +1306,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1398 break; 1306 break;
1399 case SIOCDELTUNNEL: 1307 case SIOCDELTUNNEL:
1400 err = -EPERM; 1308 err = -EPERM;
1401 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1309 if (!capable(CAP_NET_ADMIN))
1402 break; 1310 break;
1403 1311
1404 if (dev == ip6n->fb_tnl_dev) { 1312 if (dev == ip6n->fb_tnl_dev) {
@@ -1406,9 +1314,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1406 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1314 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1407 break; 1315 break;
1408 err = -ENOENT; 1316 err = -ENOENT;
1409 ip6_tnl_parm_from_user(&p1, &p); 1317 if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
1410 t = ip6_tnl_locate(net, &p1, 0);
1411 if (t == NULL)
1412 break; 1318 break;
1413 err = -EPERM; 1319 err = -EPERM;
1414 if (t->dev == ip6n->fb_tnl_dev) 1320 if (t->dev == ip6n->fb_tnl_dev)
@@ -1534,171 +1440,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1534 1440
1535 t->parms.proto = IPPROTO_IPV6; 1441 t->parms.proto = IPPROTO_IPV6;
1536 dev_hold(dev); 1442 dev_hold(dev);
1537
1538 ip6_tnl_link_config(t);
1539
1540 rcu_assign_pointer(ip6n->tnls_wc[0], t); 1443 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1541 return 0; 1444 return 0;
1542} 1445}
1543 1446
1544static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
1545{
1546 u8 proto;
1547
1548 if (!data)
1549 return 0;
1550
1551 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1552 if (proto != IPPROTO_IPV6 &&
1553 proto != IPPROTO_IPIP &&
1554 proto != 0)
1555 return -EINVAL;
1556
1557 return 0;
1558}
1559
1560static void ip6_tnl_netlink_parms(struct nlattr *data[],
1561 struct __ip6_tnl_parm *parms)
1562{
1563 memset(parms, 0, sizeof(*parms));
1564
1565 if (!data)
1566 return;
1567
1568 if (data[IFLA_IPTUN_LINK])
1569 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1570
1571 if (data[IFLA_IPTUN_LOCAL])
1572 nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
1573 sizeof(struct in6_addr));
1574
1575 if (data[IFLA_IPTUN_REMOTE])
1576 nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
1577 sizeof(struct in6_addr));
1578
1579 if (data[IFLA_IPTUN_TTL])
1580 parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
1581
1582 if (data[IFLA_IPTUN_ENCAP_LIMIT])
1583 parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
1584
1585 if (data[IFLA_IPTUN_FLOWINFO])
1586 parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1587
1588 if (data[IFLA_IPTUN_FLAGS])
1589 parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
1590
1591 if (data[IFLA_IPTUN_PROTO])
1592 parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1593}
1594
1595static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1596 struct nlattr *tb[], struct nlattr *data[])
1597{
1598 struct net *net = dev_net(dev);
1599 struct ip6_tnl *nt;
1600
1601 nt = netdev_priv(dev);
1602 ip6_tnl_netlink_parms(data, &nt->parms);
1603
1604 if (ip6_tnl_locate(net, &nt->parms, 0))
1605 return -EEXIST;
1606
1607 return ip6_tnl_create2(dev);
1608}
1609
1610static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
1611 struct nlattr *data[])
1612{
1613 struct ip6_tnl *t;
1614 struct __ip6_tnl_parm p;
1615 struct net *net = dev_net(dev);
1616 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1617
1618 if (dev == ip6n->fb_tnl_dev)
1619 return -EINVAL;
1620
1621 ip6_tnl_netlink_parms(data, &p);
1622
1623 t = ip6_tnl_locate(net, &p, 0);
1624
1625 if (t) {
1626 if (t->dev != dev)
1627 return -EEXIST;
1628 } else
1629 t = netdev_priv(dev);
1630
1631 return ip6_tnl_update(t, &p);
1632}
1633
1634static size_t ip6_tnl_get_size(const struct net_device *dev)
1635{
1636 return
1637 /* IFLA_IPTUN_LINK */
1638 nla_total_size(4) +
1639 /* IFLA_IPTUN_LOCAL */
1640 nla_total_size(sizeof(struct in6_addr)) +
1641 /* IFLA_IPTUN_REMOTE */
1642 nla_total_size(sizeof(struct in6_addr)) +
1643 /* IFLA_IPTUN_TTL */
1644 nla_total_size(1) +
1645 /* IFLA_IPTUN_ENCAP_LIMIT */
1646 nla_total_size(1) +
1647 /* IFLA_IPTUN_FLOWINFO */
1648 nla_total_size(4) +
1649 /* IFLA_IPTUN_FLAGS */
1650 nla_total_size(4) +
1651 /* IFLA_IPTUN_PROTO */
1652 nla_total_size(1) +
1653 0;
1654}
1655
1656static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1657{
1658 struct ip6_tnl *tunnel = netdev_priv(dev);
1659 struct __ip6_tnl_parm *parm = &tunnel->parms;
1660
1661 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1662 nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
1663 &parm->raddr) ||
1664 nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
1665 &parm->laddr) ||
1666 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
1667 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
1668 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1669 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
1670 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
1671 goto nla_put_failure;
1672 return 0;
1673
1674nla_put_failure:
1675 return -EMSGSIZE;
1676}
1677
1678static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
1679 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
1680 [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
1681 [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
1682 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
1683 [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
1684 [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
1685 [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
1686 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
1687};
1688
1689static struct rtnl_link_ops ip6_link_ops __read_mostly = {
1690 .kind = "ip6tnl",
1691 .maxtype = IFLA_IPTUN_MAX,
1692 .policy = ip6_tnl_policy,
1693 .priv_size = sizeof(struct ip6_tnl),
1694 .setup = ip6_tnl_dev_setup,
1695 .validate = ip6_tnl_validate,
1696 .newlink = ip6_tnl_newlink,
1697 .changelink = ip6_tnl_changelink,
1698 .get_size = ip6_tnl_get_size,
1699 .fill_info = ip6_tnl_fill_info,
1700};
1701
1702static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { 1447static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1703 .handler = ip4ip6_rcv, 1448 .handler = ip4ip6_rcv,
1704 .err_handler = ip4ip6_err, 1449 .err_handler = ip4ip6_err,
@@ -1798,23 +1543,18 @@ static int __init ip6_tunnel_init(void)
1798 1543
1799 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); 1544 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
1800 if (err < 0) { 1545 if (err < 0) {
1801 pr_err("%s: can't register ip4ip6\n", __func__); 1546 printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
1802 goto out_ip4ip6; 1547 goto out_ip4ip6;
1803 } 1548 }
1804 1549
1805 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); 1550 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
1806 if (err < 0) { 1551 if (err < 0) {
1807 pr_err("%s: can't register ip6ip6\n", __func__); 1552 printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
1808 goto out_ip6ip6; 1553 goto out_ip6ip6;
1809 } 1554 }
1810 err = rtnl_link_register(&ip6_link_ops);
1811 if (err < 0)
1812 goto rtnl_link_failed;
1813 1555
1814 return 0; 1556 return 0;
1815 1557
1816rtnl_link_failed:
1817 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1818out_ip6ip6: 1558out_ip6ip6:
1819 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); 1559 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1820out_ip4ip6: 1560out_ip4ip6:
@@ -1829,12 +1569,11 @@ out_pernet:
1829 1569
1830static void __exit ip6_tunnel_cleanup(void) 1570static void __exit ip6_tunnel_cleanup(void)
1831{ 1571{
1832 rtnl_link_unregister(&ip6_link_ops);
1833 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) 1572 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1834 pr_info("%s: can't deregister ip4ip6\n", __func__); 1573 printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
1835 1574
1836 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) 1575 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
1837 pr_info("%s: can't deregister ip6ip6\n", __func__); 1576 printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
1838 1577
1839 unregister_pernet_device(&ip6_tnl_net_ops); 1578 unregister_pernet_device(&ip6_tnl_net_ops);
1840} 1579}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 26dcdec9e3a..def0538e241 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -16,6 +16,7 @@
16 * 16 *
17 */ 17 */
18 18
19#include <asm/system.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <linux/types.h> 21#include <linux/types.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -50,9 +51,7 @@
50#include <linux/pim.h> 51#include <linux/pim.h>
51#include <net/addrconf.h> 52#include <net/addrconf.h>
52#include <linux/netfilter_ipv6.h> 53#include <linux/netfilter_ipv6.h>
53#include <linux/export.h>
54#include <net/ip6_checksum.h> 54#include <net/ip6_checksum.h>
55#include <linux/netconf.h>
56 55
57struct mr6_table { 56struct mr6_table {
58 struct list_head list; 57 struct list_head list;
@@ -67,8 +66,8 @@ struct mr6_table {
67 struct mif_device vif6_table[MAXMIFS]; 66 struct mif_device vif6_table[MAXMIFS];
68 int maxvif; 67 int maxvif;
69 atomic_t cache_resolve_queue_len; 68 atomic_t cache_resolve_queue_len;
70 bool mroute_do_assert; 69 int mroute_do_assert;
71 bool mroute_do_pim; 70 int mroute_do_pim;
72#ifdef CONFIG_IPV6_PIMSM_V2 71#ifdef CONFIG_IPV6_PIMSM_V2
73 int mroute_reg_vif_num; 72 int mroute_reg_vif_num;
74#endif 73#endif
@@ -116,8 +115,6 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 mifi_t mifi, int assert); 115 mifi_t mifi, int assert);
117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 116static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 struct mfc6_cache *c, struct rtmsg *rtm); 117 struct mfc6_cache *c, struct rtmsg *rtm);
119static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120 int cmd);
121static int ip6mr_rtm_dumproute(struct sk_buff *skb, 118static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 struct netlink_callback *cb); 119 struct netlink_callback *cb);
123static void mroute_clean_tables(struct mr6_table *mrt); 120static void mroute_clean_tables(struct mr6_table *mrt);
@@ -208,7 +205,7 @@ static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 return 0; 205 return 0;
209} 206}
210 207
211static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { 208static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
212 .family = RTNL_FAMILY_IP6MR, 209 .family = RTNL_FAMILY_IP6MR,
213 .rule_size = sizeof(struct ip6mr_rule), 210 .rule_size = sizeof(struct ip6mr_rule),
214 .addr_size = sizeof(struct in6_addr), 211 .addr_size = sizeof(struct in6_addr),
@@ -808,12 +805,8 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
808 dev_set_allmulti(dev, -1); 805 dev_set_allmulti(dev, -1);
809 806
810 in6_dev = __in6_dev_get(dev); 807 in6_dev = __in6_dev_get(dev);
811 if (in6_dev) { 808 if (in6_dev)
812 in6_dev->cnf.mc_forwarding--; 809 in6_dev->cnf.mc_forwarding--;
813 inet6_netconf_notify_devconf(dev_net(dev),
814 NETCONFA_MC_FORWARDING,
815 dev->ifindex, &in6_dev->cnf);
816 }
817 810
818 if (v->flags & MIFF_REGISTER) 811 if (v->flags & MIFF_REGISTER)
819 unregister_netdevice_queue(dev, head); 812 unregister_netdevice_queue(dev, head);
@@ -845,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
845 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 838 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
846 skb_trim(skb, nlh->nlmsg_len); 839 skb_trim(skb, nlh->nlmsg_len);
847 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 840 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
848 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 841 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
849 } else 842 } else
850 kfree_skb(skb); 843 kfree_skb(skb);
851 } 844 }
@@ -872,7 +865,6 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
872 } 865 }
873 866
874 list_del(&c->list); 867 list_del(&c->list);
875 mr6_netlink_event(mrt, c, RTM_DELROUTE);
876 ip6mr_destroy_unres(mrt, c); 868 ip6mr_destroy_unres(mrt, c);
877 } 869 }
878 870
@@ -966,12 +958,8 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
966 } 958 }
967 959
968 in6_dev = __in6_dev_get(dev); 960 in6_dev = __in6_dev_get(dev);
969 if (in6_dev) { 961 if (in6_dev)
970 in6_dev->cnf.mc_forwarding++; 962 in6_dev->cnf.mc_forwarding++;
971 inet6_netconf_notify_devconf(dev_net(dev),
972 NETCONFA_MC_FORWARDING,
973 dev->ifindex, &in6_dev->cnf);
974 }
975 963
976 /* 964 /*
977 * Fill in the VIF structures 965 * Fill in the VIF structures
@@ -1064,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1064 skb_trim(skb, nlh->nlmsg_len); 1052 skb_trim(skb, nlh->nlmsg_len);
1065 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1053 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1066 } 1054 }
1067 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1055 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1068 } else 1056 } else
1069 ip6_mr_forward(net, mrt, skb, c); 1057 ip6_mr_forward(net, mrt, skb, c);
1070 } 1058 }
@@ -1116,8 +1104,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1116 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1104 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1117 msg->im6_mif = mrt->mroute_reg_vif_num; 1105 msg->im6_mif = mrt->mroute_reg_vif_num;
1118 msg->im6_pad = 0; 1106 msg->im6_pad = 0;
1119 msg->im6_src = ipv6_hdr(pkt)->saddr; 1107 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1120 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1108 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1121 1109
1122 skb->ip_summed = CHECKSUM_UNNECESSARY; 1110 skb->ip_summed = CHECKSUM_UNNECESSARY;
1123 } else 1111 } else
@@ -1142,8 +1130,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1142 msg->im6_msgtype = assert; 1130 msg->im6_msgtype = assert;
1143 msg->im6_mif = mifi; 1131 msg->im6_mif = mifi;
1144 msg->im6_pad = 0; 1132 msg->im6_pad = 0;
1145 msg->im6_src = ipv6_hdr(pkt)->saddr; 1133 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1146 msg->im6_dst = ipv6_hdr(pkt)->daddr; 1134 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1147 1135
1148 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 1136 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1149 skb->ip_summed = CHECKSUM_UNNECESSARY; 1137 skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1159,7 +1147,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1159 */ 1147 */
1160 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); 1148 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1161 if (ret < 0) { 1149 if (ret < 0) {
1162 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); 1150 if (net_ratelimit())
1151 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1163 kfree_skb(skb); 1152 kfree_skb(skb);
1164 } 1153 }
1165 1154
@@ -1223,7 +1212,6 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1223 1212
1224 atomic_inc(&mrt->cache_resolve_queue_len); 1213 atomic_inc(&mrt->cache_resolve_queue_len);
1225 list_add(&c->list, &mrt->mfc6_unres_queue); 1214 list_add(&c->list, &mrt->mfc6_unres_queue);
1226 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1227 1215
1228 ipmr_do_expire_process(mrt); 1216 ipmr_do_expire_process(mrt);
1229 } 1217 }
@@ -1261,7 +1249,6 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1261 list_del(&c->list); 1249 list_del(&c->list);
1262 write_unlock_bh(&mrt_lock); 1250 write_unlock_bh(&mrt_lock);
1263 1251
1264 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1265 ip6mr_cache_free(c); 1252 ip6mr_cache_free(c);
1266 return 0; 1253 return 0;
1267 } 1254 }
@@ -1364,7 +1351,7 @@ int __init ip6_mr_init(void)
1364 goto reg_notif_fail; 1351 goto reg_notif_fail;
1365#ifdef CONFIG_IPV6_PIMSM_V2 1352#ifdef CONFIG_IPV6_PIMSM_V2
1366 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { 1353 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1367 pr_err("%s: can't add PIM protocol\n", __func__); 1354 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1368 err = -EAGAIN; 1355 err = -EAGAIN;
1369 goto add_proto_fail; 1356 goto add_proto_fail;
1370 } 1357 }
@@ -1426,7 +1413,6 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1426 if (!mrtsock) 1413 if (!mrtsock)
1427 c->mfc_flags |= MFC_STATIC; 1414 c->mfc_flags |= MFC_STATIC;
1428 write_unlock_bh(&mrt_lock); 1415 write_unlock_bh(&mrt_lock);
1429 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1430 return 0; 1416 return 0;
1431 } 1417 }
1432 1418
@@ -1471,7 +1457,6 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1471 ip6mr_cache_resolve(net, mrt, uc, c); 1457 ip6mr_cache_resolve(net, mrt, uc, c);
1472 ip6mr_cache_free(uc); 1458 ip6mr_cache_free(uc);
1473 } 1459 }
1474 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1475 return 0; 1460 return 0;
1476} 1461}
1477 1462
@@ -1505,7 +1490,6 @@ static void mroute_clean_tables(struct mr6_table *mrt)
1505 list_del(&c->list); 1490 list_del(&c->list);
1506 write_unlock_bh(&mrt_lock); 1491 write_unlock_bh(&mrt_lock);
1507 1492
1508 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1509 ip6mr_cache_free(c); 1493 ip6mr_cache_free(c);
1510 } 1494 }
1511 } 1495 }
@@ -1514,7 +1498,6 @@ static void mroute_clean_tables(struct mr6_table *mrt)
1514 spin_lock_bh(&mfc_unres_lock); 1498 spin_lock_bh(&mfc_unres_lock);
1515 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { 1499 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1516 list_del(&c->list); 1500 list_del(&c->list);
1517 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1518 ip6mr_destroy_unres(mrt, c); 1501 ip6mr_destroy_unres(mrt, c);
1519 } 1502 }
1520 spin_unlock_bh(&mfc_unres_lock); 1503 spin_unlock_bh(&mfc_unres_lock);
@@ -1531,9 +1514,6 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1531 if (likely(mrt->mroute6_sk == NULL)) { 1514 if (likely(mrt->mroute6_sk == NULL)) {
1532 mrt->mroute6_sk = sk; 1515 mrt->mroute6_sk = sk;
1533 net->ipv6.devconf_all->mc_forwarding++; 1516 net->ipv6.devconf_all->mc_forwarding++;
1534 inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1535 NETCONFA_IFINDEX_ALL,
1536 net->ipv6.devconf_all);
1537 } 1517 }
1538 else 1518 else
1539 err = -EADDRINUSE; 1519 err = -EADDRINUSE;
@@ -1556,10 +1536,6 @@ int ip6mr_sk_done(struct sock *sk)
1556 write_lock_bh(&mrt_lock); 1536 write_lock_bh(&mrt_lock);
1557 mrt->mroute6_sk = NULL; 1537 mrt->mroute6_sk = NULL;
1558 net->ipv6.devconf_all->mc_forwarding--; 1538 net->ipv6.devconf_all->mc_forwarding--;
1559 inet6_netconf_notify_devconf(net,
1560 NETCONFA_MC_FORWARDING,
1561 NETCONFA_IFINDEX_ALL,
1562 net->ipv6.devconf_all);
1563 write_unlock_bh(&mrt_lock); 1539 write_unlock_bh(&mrt_lock);
1564 1540
1565 mroute_clean_tables(mrt); 1541 mroute_clean_tables(mrt);
@@ -1608,7 +1584,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1608 return -ENOENT; 1584 return -ENOENT;
1609 1585
1610 if (optname != MRT6_INIT) { 1586 if (optname != MRT6_INIT) {
1611 if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1587 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1612 return -EACCES; 1588 return -EACCES;
1613 } 1589 }
1614 1590
@@ -1671,12 +1647,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1671 case MRT6_ASSERT: 1647 case MRT6_ASSERT:
1672 { 1648 {
1673 int v; 1649 int v;
1674
1675 if (optlen != sizeof(v))
1676 return -EINVAL;
1677 if (get_user(v, (int __user *)optval)) 1650 if (get_user(v, (int __user *)optval))
1678 return -EFAULT; 1651 return -EFAULT;
1679 mrt->mroute_do_assert = v; 1652 mrt->mroute_do_assert = !!v;
1680 return 0; 1653 return 0;
1681 } 1654 }
1682 1655
@@ -1684,9 +1657,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1684 case MRT6_PIM: 1657 case MRT6_PIM:
1685 { 1658 {
1686 int v; 1659 int v;
1687
1688 if (optlen != sizeof(v))
1689 return -EINVAL;
1690 if (get_user(v, (int __user *)optval)) 1660 if (get_user(v, (int __user *)optval))
1691 return -EFAULT; 1661 return -EFAULT;
1692 v = !!v; 1662 v = !!v;
@@ -1917,8 +1887,6 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1917{ 1887{
1918 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), 1888 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1919 IPSTATS_MIB_OUTFORWDATAGRAMS); 1889 IPSTATS_MIB_OUTFORWDATAGRAMS);
1920 IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1921 IPSTATS_MIB_OUTOCTETS, skb->len);
1922 return dst_output(skb); 1890 return dst_output(skb);
1923} 1891}
1924 1892
@@ -1957,10 +1925,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1957 }; 1925 };
1958 1926
1959 dst = ip6_route_output(net, NULL, &fl6); 1927 dst = ip6_route_output(net, NULL, &fl6);
1960 if (dst->error) { 1928 if (!dst)
1961 dst_release(dst);
1962 goto out_free; 1929 goto out_free;
1963 }
1964 1930
1965 skb_dst_drop(skb); 1931 skb_dst_drop(skb);
1966 skb_dst_set(skb, dst); 1932 skb_dst_set(skb, dst);
@@ -2128,45 +2094,37 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2128{ 2094{
2129 int ct; 2095 int ct;
2130 struct rtnexthop *nhp; 2096 struct rtnexthop *nhp;
2131 struct nlattr *mp_attr; 2097 u8 *b = skb_tail_pointer(skb);
2132 struct rta_mfc_stats mfcs; 2098 struct rtattr *mp_head;
2133 2099
2134 /* If cache is unresolved, don't try to parse IIF and OIF */ 2100 /* If cache is unresolved, don't try to parse IIF and OIF */
2135 if (c->mf6c_parent >= MAXMIFS) 2101 if (c->mf6c_parent >= MAXMIFS)
2136 return -ENOENT; 2102 return -ENOENT;
2137 2103
2138 if (MIF_EXISTS(mrt, c->mf6c_parent) && 2104 if (MIF_EXISTS(mrt, c->mf6c_parent))
2139 nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) 2105 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2140 return -EMSGSIZE; 2106
2141 mp_attr = nla_nest_start(skb, RTA_MULTIPATH); 2107 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2142 if (mp_attr == NULL)
2143 return -EMSGSIZE;
2144 2108
2145 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2109 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2146 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { 2110 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2147 nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); 2111 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2148 if (nhp == NULL) { 2112 goto rtattr_failure;
2149 nla_nest_cancel(skb, mp_attr); 2113 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2150 return -EMSGSIZE;
2151 }
2152
2153 nhp->rtnh_flags = 0; 2114 nhp->rtnh_flags = 0;
2154 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2115 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2155 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; 2116 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2156 nhp->rtnh_len = sizeof(*nhp); 2117 nhp->rtnh_len = sizeof(*nhp);
2157 } 2118 }
2158 } 2119 }
2159 2120 mp_head->rta_type = RTA_MULTIPATH;
2160 nla_nest_end(skb, mp_attr); 2121 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2161
2162 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2163 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2164 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2165 if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2166 return -EMSGSIZE;
2167
2168 rtm->rtm_type = RTN_MULTICAST; 2122 rtm->rtm_type = RTN_MULTICAST;
2169 return 1; 2123 return 1;
2124
2125rtattr_failure:
2126 nlmsg_trim(skb, b);
2127 return -EMSGSIZE;
2170} 2128}
2171 2129
2172int ip6mr_get_route(struct net *net, 2130int ip6mr_get_route(struct net *net,
@@ -2222,8 +2180,8 @@ int ip6mr_get_route(struct net *net,
2222 iph->payload_len = 0; 2180 iph->payload_len = 0;
2223 iph->nexthdr = IPPROTO_NONE; 2181 iph->nexthdr = IPPROTO_NONE;
2224 iph->hop_limit = 0; 2182 iph->hop_limit = 0;
2225 iph->saddr = rt->rt6i_src.addr; 2183 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2226 iph->daddr = rt->rt6i_dst.addr; 2184 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2227 2185
2228 err = ip6mr_cache_unresolved(mrt, vif, skb2); 2186 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2229 read_unlock(&mrt_lock); 2187 read_unlock(&mrt_lock);
@@ -2240,38 +2198,30 @@ int ip6mr_get_route(struct net *net,
2240} 2198}
2241 2199
2242static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2200static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2243 u32 portid, u32 seq, struct mfc6_cache *c, int cmd) 2201 u32 pid, u32 seq, struct mfc6_cache *c)
2244{ 2202{
2245 struct nlmsghdr *nlh; 2203 struct nlmsghdr *nlh;
2246 struct rtmsg *rtm; 2204 struct rtmsg *rtm;
2247 int err;
2248 2205
2249 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); 2206 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2250 if (nlh == NULL) 2207 if (nlh == NULL)
2251 return -EMSGSIZE; 2208 return -EMSGSIZE;
2252 2209
2253 rtm = nlmsg_data(nlh); 2210 rtm = nlmsg_data(nlh);
2254 rtm->rtm_family = RTNL_FAMILY_IP6MR; 2211 rtm->rtm_family = RTNL_FAMILY_IPMR;
2255 rtm->rtm_dst_len = 128; 2212 rtm->rtm_dst_len = 128;
2256 rtm->rtm_src_len = 128; 2213 rtm->rtm_src_len = 128;
2257 rtm->rtm_tos = 0; 2214 rtm->rtm_tos = 0;
2258 rtm->rtm_table = mrt->id; 2215 rtm->rtm_table = mrt->id;
2259 if (nla_put_u32(skb, RTA_TABLE, mrt->id)) 2216 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2260 goto nla_put_failure;
2261 rtm->rtm_type = RTN_MULTICAST;
2262 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2217 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2263 if (c->mfc_flags & MFC_STATIC) 2218 rtm->rtm_protocol = RTPROT_UNSPEC;
2264 rtm->rtm_protocol = RTPROT_STATIC;
2265 else
2266 rtm->rtm_protocol = RTPROT_MROUTED;
2267 rtm->rtm_flags = 0; 2219 rtm->rtm_flags = 0;
2268 2220
2269 if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || 2221 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2270 nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) 2222 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2271 goto nla_put_failure; 2223
2272 err = __ip6mr_fill_mroute(mrt, skb, c, rtm); 2224 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2273 /* do not break the dump if cache is unresolved */
2274 if (err < 0 && err != -ENOENT)
2275 goto nla_put_failure; 2225 goto nla_put_failure;
2276 2226
2277 return nlmsg_end(skb, nlh); 2227 return nlmsg_end(skb, nlh);
@@ -2281,52 +2231,6 @@ nla_put_failure:
2281 return -EMSGSIZE; 2231 return -EMSGSIZE;
2282} 2232}
2283 2233
2284static int mr6_msgsize(bool unresolved, int maxvif)
2285{
2286 size_t len =
2287 NLMSG_ALIGN(sizeof(struct rtmsg))
2288 + nla_total_size(4) /* RTA_TABLE */
2289 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2290 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2291 ;
2292
2293 if (!unresolved)
2294 len = len
2295 + nla_total_size(4) /* RTA_IIF */
2296 + nla_total_size(0) /* RTA_MULTIPATH */
2297 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2298 /* RTA_MFC_STATS */
2299 + nla_total_size(sizeof(struct rta_mfc_stats))
2300 ;
2301
2302 return len;
2303}
2304
2305static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2306 int cmd)
2307{
2308 struct net *net = read_pnet(&mrt->net);
2309 struct sk_buff *skb;
2310 int err = -ENOBUFS;
2311
2312 skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2313 GFP_ATOMIC);
2314 if (skb == NULL)
2315 goto errout;
2316
2317 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2318 if (err < 0)
2319 goto errout;
2320
2321 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2322 return;
2323
2324errout:
2325 kfree_skb(skb);
2326 if (err < 0)
2327 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2328}
2329
2330static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) 2234static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2331{ 2235{
2332 struct net *net = sock_net(skb->sk); 2236 struct net *net = sock_net(skb->sk);
@@ -2351,31 +2255,15 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2351 if (e < s_e) 2255 if (e < s_e)
2352 goto next_entry; 2256 goto next_entry;
2353 if (ip6mr_fill_mroute(mrt, skb, 2257 if (ip6mr_fill_mroute(mrt, skb,
2354 NETLINK_CB(cb->skb).portid, 2258 NETLINK_CB(cb->skb).pid,
2355 cb->nlh->nlmsg_seq, 2259 cb->nlh->nlmsg_seq,
2356 mfc, RTM_NEWROUTE) < 0) 2260 mfc) < 0)
2357 goto done; 2261 goto done;
2358next_entry: 2262next_entry:
2359 e++; 2263 e++;
2360 } 2264 }
2361 e = s_e = 0; 2265 e = s_e = 0;
2362 } 2266 }
2363 spin_lock_bh(&mfc_unres_lock);
2364 list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2365 if (e < s_e)
2366 goto next_entry2;
2367 if (ip6mr_fill_mroute(mrt, skb,
2368 NETLINK_CB(cb->skb).portid,
2369 cb->nlh->nlmsg_seq,
2370 mfc, RTM_NEWROUTE) < 0) {
2371 spin_unlock_bh(&mfc_unres_lock);
2372 goto done;
2373 }
2374next_entry2:
2375 e++;
2376 }
2377 spin_unlock_bh(&mfc_unres_lock);
2378 e = s_e = 0;
2379 s_h = 0; 2267 s_h = 0;
2380next_table: 2268next_table:
2381 t++; 2269 t++;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d9..bba658d9a03 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -30,9 +30,6 @@
30 * The decompression of IP datagram MUST be done after the reassembly, 30 * The decompression of IP datagram MUST be done after the reassembly,
31 * AH/ESP processing. 31 * AH/ESP processing.
32 */ 32 */
33
34#define pr_fmt(fmt) "IPv6: " fmt
35
36#include <linux/module.h> 33#include <linux/module.h>
37#include <net/ip.h> 34#include <net/ip.h>
38#include <net/xfrm.h> 35#include <net/xfrm.h>
@@ -46,7 +43,6 @@
46#include <linux/list.h> 43#include <linux/list.h>
47#include <linux/vmalloc.h> 44#include <linux/vmalloc.h>
48#include <linux/rtnetlink.h> 45#include <linux/rtnetlink.h>
49#include <net/ip6_route.h>
50#include <net/icmp.h> 46#include <net/icmp.h>
51#include <net/ipv6.h> 47#include <net/ipv6.h>
52#include <net/protocol.h> 48#include <net/protocol.h>
@@ -64,9 +60,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
64 (struct ip_comp_hdr *)(skb->data + offset); 60 (struct ip_comp_hdr *)(skb->data + offset);
65 struct xfrm_state *x; 61 struct xfrm_state *x;
66 62
67 if (type != ICMPV6_DEST_UNREACH && 63 if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
68 type != ICMPV6_PKT_TOOBIG &&
69 type != NDISC_REDIRECT)
70 return; 64 return;
71 65
72 spi = htonl(ntohs(ipcomph->cpi)); 66 spi = htonl(ntohs(ipcomph->cpi));
@@ -75,10 +69,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 if (!x) 69 if (!x)
76 return; 70 return;
77 71
78 if (type == NDISC_REDIRECT) 72 printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n",
79 ip6_redirect(skb, net, 0, 0); 73 spi, &iph->daddr);
80 else
81 ip6_update_pmtu(skb, net, info, 0, 0);
82 xfrm_state_put(x); 74 xfrm_state_put(x);
83} 75}
84 76
@@ -198,11 +190,11 @@ static const struct inet6_protocol ipcomp6_protocol =
198static int __init ipcomp6_init(void) 190static int __init ipcomp6_init(void)
199{ 191{
200 if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { 192 if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
201 pr_info("%s: can't add xfrm type\n", __func__); 193 printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
202 return -EAGAIN; 194 return -EAGAIN;
203 } 195 }
204 if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { 196 if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
205 pr_info("%s: can't add protocol\n", __func__); 197 printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
206 xfrm_unregister_type(&ipcomp6_type, AF_INET6); 198 xfrm_unregister_type(&ipcomp6_type, AF_INET6);
207 return -EAGAIN; 199 return -EAGAIN;
208 } 200 }
@@ -212,9 +204,9 @@ static int __init ipcomp6_init(void)
212static void __exit ipcomp6_fini(void) 204static void __exit ipcomp6_fini(void)
213{ 205{
214 if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) 206 if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
215 pr_info("%s: can't remove protocol\n", __func__); 207 printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
216 if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0) 208 if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
217 pr_info("%s: can't remove xfrm type\n", __func__); 209 printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
218} 210}
219 211
220module_init(ipcomp6_init); 212module_init(ipcomp6_init);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ee94d31c9d4..2fbda5fc4cc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,8 +343,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
343 break; 343 break;
344 344
345 case IPV6_TRANSPARENT: 345 case IPV6_TRANSPARENT:
346 if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) && 346 if (!capable(CAP_NET_ADMIN)) {
347 !ns_capable(net->user_ns, CAP_NET_RAW)) {
348 retv = -EPERM; 347 retv = -EPERM;
349 break; 348 break;
350 } 349 }
@@ -382,7 +381,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
382 381
383 /* hop-by-hop / destination options are privileged option */ 382 /* hop-by-hop / destination options are privileged option */
384 retv = -EPERM; 383 retv = -EPERM;
385 if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) 384 if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
386 break; 385 break;
387 386
388 opt = ipv6_renew_options(sk, np->opt, optname, 387 opt = ipv6_renew_options(sk, np->opt, optname,
@@ -398,7 +397,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
398 if (optname == IPV6_RTHDR && opt && opt->srcrt) { 397 if (optname == IPV6_RTHDR && opt && opt->srcrt) {
399 struct ipv6_rt_hdr *rthdr = opt->srcrt; 398 struct ipv6_rt_hdr *rthdr = opt->srcrt;
400 switch (rthdr->type) { 399 switch (rthdr->type) {
401#if IS_ENABLED(CONFIG_IPV6_MIP6) 400#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
402 case IPV6_SRCRT_TYPE_2: 401 case IPV6_SRCRT_TYPE_2:
403 if (rthdr->hdrlen != 2 || 402 if (rthdr->hdrlen != 2 ||
404 rthdr->segments_left != 1) 403 rthdr->segments_left != 1)
@@ -436,7 +435,7 @@ sticky_done:
436 goto e_inval; 435 goto e_inval;
437 436
438 np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; 437 np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
439 np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr; 438 ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
440 retv = 0; 439 retv = 0;
441 break; 440 break;
442 } 441 }
@@ -504,7 +503,7 @@ done:
504 goto e_inval; 503 goto e_inval;
505 if (val > 255 || val < -1) 504 if (val > 255 || val < -1)
506 goto e_inval; 505 goto e_inval;
507 np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); 506 np->mcast_hops = val;
508 retv = 0; 507 retv = 0;
509 break; 508 break;
510 509
@@ -517,36 +516,6 @@ done:
517 retv = 0; 516 retv = 0;
518 break; 517 break;
519 518
520 case IPV6_UNICAST_IF:
521 {
522 struct net_device *dev = NULL;
523 int ifindex;
524
525 if (optlen != sizeof(int))
526 goto e_inval;
527
528 ifindex = (__force int)ntohl((__force __be32)val);
529 if (ifindex == 0) {
530 np->ucast_oif = 0;
531 retv = 0;
532 break;
533 }
534
535 dev = dev_get_by_index(net, ifindex);
536 retv = -EADDRNOTAVAIL;
537 if (!dev)
538 break;
539 dev_put(dev);
540
541 retv = -EINVAL;
542 if (sk->sk_bound_dev_if)
543 break;
544
545 np->ucast_oif = ifindex;
546 retv = 0;
547 break;
548 }
549
550 case IPV6_MULTICAST_IF: 519 case IPV6_MULTICAST_IF:
551 if (sk->sk_type == SOCK_STREAM) 520 if (sk->sk_type == SOCK_STREAM)
552 break; 521 break;
@@ -679,6 +648,7 @@ done:
679 } 648 }
680 case MCAST_MSFILTER: 649 case MCAST_MSFILTER:
681 { 650 {
651 extern int sysctl_mld_max_msf;
682 struct group_filter *gsf; 652 struct group_filter *gsf;
683 653
684 if (optlen < GROUP_FILTER_SIZE(0)) 654 if (optlen < GROUP_FILTER_SIZE(0))
@@ -755,7 +725,7 @@ done:
755 case IPV6_IPSEC_POLICY: 725 case IPV6_IPSEC_POLICY:
756 case IPV6_XFRM_POLICY: 726 case IPV6_XFRM_POLICY:
757 retv = -EPERM; 727 retv = -EPERM;
758 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 728 if (!capable(CAP_NET_ADMIN))
759 break; 729 break;
760 retv = xfrm_user_policy(sk, optname, optval, optlen); 730 retv = xfrm_user_policy(sk, optname, optval, optlen);
761 break; 731 break;
@@ -828,7 +798,6 @@ pref_skip_coa:
828 if (val < 0 || val > 255) 798 if (val < 0 || val > 255)
829 goto e_inval; 799 goto e_inval;
830 np->min_hopcount = val; 800 np->min_hopcount = val;
831 retv = 0;
832 break; 801 break;
833 case IPV6_DONTFRAG: 802 case IPV6_DONTFRAG:
834 np->dontfrag = valbool; 803 np->dontfrag = valbool;
@@ -944,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
944} 913}
945 914
946static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, 915static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
947 char __user *optval, int __user *optlen, unsigned int flags) 916 char __user *optval, int __user *optlen, unsigned flags)
948{ 917{
949 struct ipv6_pinfo *np = inet6_sk(sk); 918 struct ipv6_pinfo *np = inet6_sk(sk);
950 int len; 919 int len;
@@ -1011,22 +980,20 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1011 struct in6_pktinfo src_info; 980 struct in6_pktinfo src_info;
1012 src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : 981 src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
1013 np->sticky_pktinfo.ipi6_ifindex; 982 np->sticky_pktinfo.ipi6_ifindex;
1014 src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; 983 np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
984 ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
1015 put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); 985 put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
1016 } 986 }
1017 if (np->rxopt.bits.rxhlim) { 987 if (np->rxopt.bits.rxhlim) {
1018 int hlim = np->mcast_hops; 988 int hlim = np->mcast_hops;
1019 put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); 989 put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
1020 } 990 }
1021 if (np->rxopt.bits.rxtclass) {
1022 int tclass = np->rcv_tclass;
1023 put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
1024 }
1025 if (np->rxopt.bits.rxoinfo) { 991 if (np->rxopt.bits.rxoinfo) {
1026 struct in6_pktinfo src_info; 992 struct in6_pktinfo src_info;
1027 src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : 993 src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
1028 np->sticky_pktinfo.ipi6_ifindex; 994 np->sticky_pktinfo.ipi6_ifindex;
1029 src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; 995 np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
996 ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
1030 put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); 997 put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
1031 } 998 }
1032 if (np->rxopt.bits.rxohlim) { 999 if (np->rxopt.bits.rxohlim) {
@@ -1195,10 +1162,6 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1195 val = np->mcast_oif; 1162 val = np->mcast_oif;
1196 break; 1163 break;
1197 1164
1198 case IPV6_UNICAST_IF:
1199 val = (__force int)htonl((__u32) np->ucast_oif);
1200 break;
1201
1202 case IPV6_MTU_DISCOVER: 1165 case IPV6_MTU_DISCOVER:
1203 val = np->pmtudisc; 1166 val = np->pmtudisc;
1204 break; 1167 break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 28dfa5f3801..ee7839f4d6e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -155,15 +155,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
155 return -ENOMEM; 155 return -ENOMEM;
156 156
157 mc_lst->next = NULL; 157 mc_lst->next = NULL;
158 mc_lst->addr = *addr; 158 ipv6_addr_copy(&mc_lst->addr, addr);
159 159
160 rcu_read_lock(); 160 rcu_read_lock();
161 if (ifindex == 0) { 161 if (ifindex == 0) {
162 struct rt6_info *rt; 162 struct rt6_info *rt;
163 rt = rt6_lookup(net, addr, NULL, 0, 0); 163 rt = rt6_lookup(net, addr, NULL, 0, 0);
164 if (rt) { 164 if (rt) {
165 dev = rt->dst.dev; 165 dev = rt->rt6i_dev;
166 ip6_rt_put(rt); 166 dst_release(&rt->dst);
167 } 167 }
168 } else 168 } else
169 dev = dev_get_by_index_rcu(net, ifindex); 169 dev = dev_get_by_index_rcu(net, ifindex);
@@ -211,9 +211,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
211 struct ipv6_mc_socklist __rcu **lnk; 211 struct ipv6_mc_socklist __rcu **lnk;
212 struct net *net = sock_net(sk); 212 struct net *net = sock_net(sk);
213 213
214 if (!ipv6_addr_is_multicast(addr))
215 return -EINVAL;
216
217 spin_lock(&ipv6_sk_mc_lock); 214 spin_lock(&ipv6_sk_mc_lock);
218 for (lnk = &np->ipv6_mc_list; 215 for (lnk = &np->ipv6_mc_list;
219 (mc_lst = rcu_dereference_protected(*lnk, 216 (mc_lst = rcu_dereference_protected(*lnk,
@@ -259,8 +256,9 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
259 struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); 256 struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
260 257
261 if (rt) { 258 if (rt) {
262 dev = rt->dst.dev; 259 dev = rt->rt6i_dev;
263 ip6_rt_put(rt); 260 dev_hold(dev);
261 dst_release(&rt->dst);
264 } 262 }
265 } else 263 } else
266 dev = dev_get_by_index_rcu(net, ifindex); 264 dev = dev_get_by_index_rcu(net, ifindex);
@@ -284,9 +282,6 @@ void ipv6_sock_mc_close(struct sock *sk)
284 struct ipv6_mc_socklist *mc_lst; 282 struct ipv6_mc_socklist *mc_lst;
285 struct net *net = sock_net(sk); 283 struct net *net = sock_net(sk);
286 284
287 if (!rcu_access_pointer(np->ipv6_mc_list))
288 return;
289
290 spin_lock(&ipv6_sk_mc_lock); 285 spin_lock(&ipv6_sk_mc_lock);
291 while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, 286 while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
292 lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { 287 lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
@@ -612,13 +607,13 @@ done:
612 return err; 607 return err;
613} 608}
614 609
615bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, 610int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
616 const struct in6_addr *src_addr) 611 const struct in6_addr *src_addr)
617{ 612{
618 struct ipv6_pinfo *np = inet6_sk(sk); 613 struct ipv6_pinfo *np = inet6_sk(sk);
619 struct ipv6_mc_socklist *mc; 614 struct ipv6_mc_socklist *mc;
620 struct ip6_sf_socklist *psl; 615 struct ip6_sf_socklist *psl;
621 bool rv = true; 616 int rv = 1;
622 617
623 rcu_read_lock(); 618 rcu_read_lock();
624 for_each_pmc_rcu(np, mc) { 619 for_each_pmc_rcu(np, mc) {
@@ -627,7 +622,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
627 } 622 }
628 if (!mc) { 623 if (!mc) {
629 rcu_read_unlock(); 624 rcu_read_unlock();
630 return true; 625 return 1;
631 } 626 }
632 read_lock(&mc->sflock); 627 read_lock(&mc->sflock);
633 psl = mc->sflist; 628 psl = mc->sflist;
@@ -641,9 +636,9 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
641 break; 636 break;
642 } 637 }
643 if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) 638 if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
644 rv = false; 639 rv = 0;
645 if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) 640 if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
646 rv = false; 641 rv = 0;
647 } 642 }
648 read_unlock(&mc->sflock); 643 read_unlock(&mc->sflock);
649 rcu_read_unlock(); 644 rcu_read_unlock();
@@ -863,7 +858,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
863 858
864 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); 859 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
865 860
866 mc->mca_addr = *addr; 861 ipv6_addr_copy(&mc->mca_addr, addr);
867 mc->idev = idev; /* (reference taken) */ 862 mc->idev = idev; /* (reference taken) */
868 mc->mca_users = 1; 863 mc->mca_users = 1;
869 /* mca_stamp should be updated upon changes */ 864 /* mca_stamp should be updated upon changes */
@@ -937,15 +932,15 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
937/* 932/*
938 * identify MLD packets for MLD filter exceptions 933 * identify MLD packets for MLD filter exceptions
939 */ 934 */
940bool ipv6_is_mld(struct sk_buff *skb, int nexthdr) 935int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
941{ 936{
942 struct icmp6hdr *pic; 937 struct icmp6hdr *pic;
943 938
944 if (nexthdr != IPPROTO_ICMPV6) 939 if (nexthdr != IPPROTO_ICMPV6)
945 return false; 940 return 0;
946 941
947 if (!pskb_may_pull(skb, sizeof(struct icmp6hdr))) 942 if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
948 return false; 943 return 0;
949 944
950 pic = icmp6_hdr(skb); 945 pic = icmp6_hdr(skb);
951 946
@@ -954,22 +949,22 @@ bool ipv6_is_mld(struct sk_buff *skb, int nexthdr)
954 case ICMPV6_MGM_REPORT: 949 case ICMPV6_MGM_REPORT:
955 case ICMPV6_MGM_REDUCTION: 950 case ICMPV6_MGM_REDUCTION:
956 case ICMPV6_MLD2_REPORT: 951 case ICMPV6_MLD2_REPORT:
957 return true; 952 return 1;
958 default: 953 default:
959 break; 954 break;
960 } 955 }
961 return false; 956 return 0;
962} 957}
963 958
964/* 959/*
965 * check if the interface/address pair is valid 960 * check if the interface/address pair is valid
966 */ 961 */
967bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, 962int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
968 const struct in6_addr *src_addr) 963 const struct in6_addr *src_addr)
969{ 964{
970 struct inet6_dev *idev; 965 struct inet6_dev *idev;
971 struct ifmcaddr6 *mc; 966 struct ifmcaddr6 *mc;
972 bool rv = false; 967 int rv = 0;
973 968
974 rcu_read_lock(); 969 rcu_read_lock();
975 idev = __in6_dev_get(dev); 970 idev = __in6_dev_get(dev);
@@ -996,7 +991,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
996 rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0; 991 rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
997 spin_unlock_bh(&mc->mca_lock); 992 spin_unlock_bh(&mc->mca_lock);
998 } else 993 } else
999 rv = true; /* don't filter unspecified source */ 994 rv = 1; /* don't filter unspecified source */
1000 } 995 }
1001 read_unlock_bh(&idev->lock); 996 read_unlock_bh(&idev->lock);
1002 } 997 }
@@ -1052,8 +1047,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
1052} 1047}
1053 1048
1054/* mark EXCLUDE-mode sources */ 1049/* mark EXCLUDE-mode sources */
1055static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, 1050static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1056 const struct in6_addr *srcs) 1051 const struct in6_addr *srcs)
1057{ 1052{
1058 struct ip6_sf_list *psf; 1053 struct ip6_sf_list *psf;
1059 int i, scount; 1054 int i, scount;
@@ -1067,7 +1062,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1067 if (psf->sf_count[MCAST_INCLUDE] || 1062 if (psf->sf_count[MCAST_INCLUDE] ||
1068 pmc->mca_sfcount[MCAST_EXCLUDE] != 1063 pmc->mca_sfcount[MCAST_EXCLUDE] !=
1069 psf->sf_count[MCAST_EXCLUDE]) 1064 psf->sf_count[MCAST_EXCLUDE])
1070 break; 1065 continue;
1071 if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { 1066 if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
1072 scount++; 1067 scount++;
1073 break; 1068 break;
@@ -1076,12 +1071,12 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1076 } 1071 }
1077 pmc->mca_flags &= ~MAF_GSQUERY; 1072 pmc->mca_flags &= ~MAF_GSQUERY;
1078 if (scount == nsrcs) /* all sources excluded */ 1073 if (scount == nsrcs) /* all sources excluded */
1079 return false; 1074 return 0;
1080 return true; 1075 return 1;
1081} 1076}
1082 1077
1083static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, 1078static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1084 const struct in6_addr *srcs) 1079 const struct in6_addr *srcs)
1085{ 1080{
1086 struct ip6_sf_list *psf; 1081 struct ip6_sf_list *psf;
1087 int i, scount; 1082 int i, scount;
@@ -1105,10 +1100,10 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1105 } 1100 }
1106 if (!scount) { 1101 if (!scount) {
1107 pmc->mca_flags &= ~MAF_GSQUERY; 1102 pmc->mca_flags &= ~MAF_GSQUERY;
1108 return false; 1103 return 0;
1109 } 1104 }
1110 pmc->mca_flags |= MAF_GSQUERY; 1105 pmc->mca_flags |= MAF_GSQUERY;
1111 return true; 1106 return 1;
1112} 1107}
1113 1108
1114/* called with rcu_read_lock() */ 1109/* called with rcu_read_lock() */
@@ -1282,17 +1277,17 @@ int igmp6_event_report(struct sk_buff *skb)
1282 return 0; 1277 return 0;
1283} 1278}
1284 1279
1285static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, 1280static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
1286 int gdeleted, int sdeleted) 1281 int gdeleted, int sdeleted)
1287{ 1282{
1288 switch (type) { 1283 switch (type) {
1289 case MLD2_MODE_IS_INCLUDE: 1284 case MLD2_MODE_IS_INCLUDE:
1290 case MLD2_MODE_IS_EXCLUDE: 1285 case MLD2_MODE_IS_EXCLUDE:
1291 if (gdeleted || sdeleted) 1286 if (gdeleted || sdeleted)
1292 return false; 1287 return 0;
1293 if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) { 1288 if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
1294 if (pmc->mca_sfmode == MCAST_INCLUDE) 1289 if (pmc->mca_sfmode == MCAST_INCLUDE)
1295 return true; 1290 return 1;
1296 /* don't include if this source is excluded 1291 /* don't include if this source is excluded
1297 * in all filters 1292 * in all filters
1298 */ 1293 */
@@ -1301,29 +1296,29 @@ static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
1301 return pmc->mca_sfcount[MCAST_EXCLUDE] == 1296 return pmc->mca_sfcount[MCAST_EXCLUDE] ==
1302 psf->sf_count[MCAST_EXCLUDE]; 1297 psf->sf_count[MCAST_EXCLUDE];
1303 } 1298 }
1304 return false; 1299 return 0;
1305 case MLD2_CHANGE_TO_INCLUDE: 1300 case MLD2_CHANGE_TO_INCLUDE:
1306 if (gdeleted || sdeleted) 1301 if (gdeleted || sdeleted)
1307 return false; 1302 return 0;
1308 return psf->sf_count[MCAST_INCLUDE] != 0; 1303 return psf->sf_count[MCAST_INCLUDE] != 0;
1309 case MLD2_CHANGE_TO_EXCLUDE: 1304 case MLD2_CHANGE_TO_EXCLUDE:
1310 if (gdeleted || sdeleted) 1305 if (gdeleted || sdeleted)
1311 return false; 1306 return 0;
1312 if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 || 1307 if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
1313 psf->sf_count[MCAST_INCLUDE]) 1308 psf->sf_count[MCAST_INCLUDE])
1314 return false; 1309 return 0;
1315 return pmc->mca_sfcount[MCAST_EXCLUDE] == 1310 return pmc->mca_sfcount[MCAST_EXCLUDE] ==
1316 psf->sf_count[MCAST_EXCLUDE]; 1311 psf->sf_count[MCAST_EXCLUDE];
1317 case MLD2_ALLOW_NEW_SOURCES: 1312 case MLD2_ALLOW_NEW_SOURCES:
1318 if (gdeleted || !psf->sf_crcount) 1313 if (gdeleted || !psf->sf_crcount)
1319 return false; 1314 return 0;
1320 return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted; 1315 return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
1321 case MLD2_BLOCK_OLD_SOURCES: 1316 case MLD2_BLOCK_OLD_SOURCES:
1322 if (pmc->mca_sfmode == MCAST_INCLUDE) 1317 if (pmc->mca_sfmode == MCAST_INCLUDE)
1323 return gdeleted || (psf->sf_crcount && sdeleted); 1318 return gdeleted || (psf->sf_crcount && sdeleted);
1324 return psf->sf_crcount && !gdeleted && !sdeleted; 1319 return psf->sf_crcount && !gdeleted && !sdeleted;
1325 } 1320 }
1326 return false; 1321 return 0;
1327} 1322}
1328 1323
1329static int 1324static int
@@ -1348,15 +1343,13 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1348 struct mld2_report *pmr; 1343 struct mld2_report *pmr;
1349 struct in6_addr addr_buf; 1344 struct in6_addr addr_buf;
1350 const struct in6_addr *saddr; 1345 const struct in6_addr *saddr;
1351 int hlen = LL_RESERVED_SPACE(dev);
1352 int tlen = dev->needed_tailroom;
1353 int err; 1346 int err;
1354 u8 ra[8] = { IPPROTO_ICMPV6, 0, 1347 u8 ra[8] = { IPPROTO_ICMPV6, 0,
1355 IPV6_TLV_ROUTERALERT, 2, 0, 0, 1348 IPV6_TLV_ROUTERALERT, 2, 0, 0,
1356 IPV6_TLV_PADN, 0 }; 1349 IPV6_TLV_PADN, 0 };
1357 1350
1358 /* we assume size > sizeof(ra) here */ 1351 /* we assume size > sizeof(ra) here */
1359 size += hlen + tlen; 1352 size += LL_ALLOCATED_SPACE(dev);
1360 /* limit our allocations to order-0 page */ 1353 /* limit our allocations to order-0 page */
1361 size = min_t(int, size, SKB_MAX_ORDER(0, 0)); 1354 size = min_t(int, size, SKB_MAX_ORDER(0, 0));
1362 skb = sock_alloc_send_skb(sk, size, 1, &err); 1355 skb = sock_alloc_send_skb(sk, size, 1, &err);
@@ -1364,7 +1357,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1364 if (!skb) 1357 if (!skb)
1365 return NULL; 1358 return NULL;
1366 1359
1367 skb_reserve(skb, hlen); 1360 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1368 1361
1369 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { 1362 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
1370 /* <draft-ietf-magma-mld-source-05.txt>: 1363 /* <draft-ietf-magma-mld-source-05.txt>:
@@ -1415,11 +1408,18 @@ static void mld_sendpack(struct sk_buff *skb)
1415 csum_partial(skb_transport_header(skb), 1408 csum_partial(skb_transport_header(skb),
1416 mldlen, 0)); 1409 mldlen, 0));
1417 1410
1411 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
1412
1413 if (!dst) {
1414 err = -ENOMEM;
1415 goto err_out;
1416 }
1417
1418 icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, 1418 icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT,
1419 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 1419 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
1420 skb->dev->ifindex); 1420 skb->dev->ifindex);
1421 dst = icmp6_dst_alloc(skb->dev, NULL, &fl6);
1422 1421
1422 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1423 err = 0; 1423 err = 0;
1424 if (IS_ERR(dst)) { 1424 if (IS_ERR(dst)) {
1425 err = PTR_ERR(dst); 1425 err = PTR_ERR(dst);
@@ -1723,8 +1723,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1723 struct mld_msg *hdr; 1723 struct mld_msg *hdr;
1724 const struct in6_addr *snd_addr, *saddr; 1724 const struct in6_addr *snd_addr, *saddr;
1725 struct in6_addr addr_buf; 1725 struct in6_addr addr_buf;
1726 int hlen = LL_RESERVED_SPACE(dev);
1727 int tlen = dev->needed_tailroom;
1728 int err, len, payload_len, full_len; 1726 int err, len, payload_len, full_len;
1729 u8 ra[8] = { IPPROTO_ICMPV6, 0, 1727 u8 ra[8] = { IPPROTO_ICMPV6, 0,
1730 IPV6_TLV_ROUTERALERT, 2, 0, 0, 1728 IPV6_TLV_ROUTERALERT, 2, 0, 0,
@@ -1746,7 +1744,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1746 IPSTATS_MIB_OUT, full_len); 1744 IPSTATS_MIB_OUT, full_len);
1747 rcu_read_unlock(); 1745 rcu_read_unlock();
1748 1746
1749 skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); 1747 skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
1750 1748
1751 if (skb == NULL) { 1749 if (skb == NULL) {
1752 rcu_read_lock(); 1750 rcu_read_lock();
@@ -1756,7 +1754,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1756 return; 1754 return;
1757 } 1755 }
1758 1756
1759 skb_reserve(skb, hlen); 1757 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1760 1758
1761 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { 1759 if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
1762 /* <draft-ietf-magma-mld-source-05.txt>: 1760 /* <draft-ietf-magma-mld-source-05.txt>:
@@ -1774,7 +1772,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1774 hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg)); 1772 hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
1775 memset(hdr, 0, sizeof(struct mld_msg)); 1773 memset(hdr, 0, sizeof(struct mld_msg));
1776 hdr->mld_type = type; 1774 hdr->mld_type = type;
1777 hdr->mld_mca = *addr; 1775 ipv6_addr_copy(&hdr->mld_mca, addr);
1778 1776
1779 hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len, 1777 hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
1780 IPPROTO_ICMPV6, 1778 IPPROTO_ICMPV6,
@@ -1783,10 +1781,17 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1783 rcu_read_lock(); 1781 rcu_read_lock();
1784 idev = __in6_dev_get(skb->dev); 1782 idev = __in6_dev_get(skb->dev);
1785 1783
1784 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
1785 if (!dst) {
1786 err = -ENOMEM;
1787 goto err_out;
1788 }
1789
1786 icmpv6_flow_init(sk, &fl6, type, 1790 icmpv6_flow_init(sk, &fl6, type,
1787 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 1791 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
1788 skb->dev->ifindex); 1792 skb->dev->ifindex);
1789 dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); 1793
1794 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1790 if (IS_ERR(dst)) { 1795 if (IS_ERR(dst)) {
1791 err = PTR_ERR(dst); 1796 err = PTR_ERR(dst);
1792 goto err_out; 1797 goto err_out;
@@ -1909,7 +1914,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
1909 * Add multicast single-source filter to the interface list 1914 * Add multicast single-source filter to the interface list
1910 */ 1915 */
1911static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, 1916static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
1912 const struct in6_addr *psfsrc) 1917 const struct in6_addr *psfsrc, int delta)
1913{ 1918{
1914 struct ip6_sf_list *psf, *psf_prev; 1919 struct ip6_sf_list *psf, *psf_prev;
1915 1920
@@ -2040,7 +2045,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
2040 pmc->mca_sfcount[sfmode]++; 2045 pmc->mca_sfcount[sfmode]++;
2041 err = 0; 2046 err = 0;
2042 for (i=0; i<sfcount; i++) { 2047 for (i=0; i<sfcount; i++) {
2043 err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]); 2048 err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
2044 if (err) 2049 if (err)
2045 break; 2050 break;
2046 } 2051 }
@@ -2050,7 +2055,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
2050 if (!delta) 2055 if (!delta)
2051 pmc->mca_sfcount[sfmode]--; 2056 pmc->mca_sfcount[sfmode]--;
2052 for (j=0; j<i; j++) 2057 for (j=0; j<i; j++)
2053 ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]); 2058 (void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
2054 } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) { 2059 } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
2055 struct ip6_sf_list *psf; 2060 struct ip6_sf_list *psf;
2056 2061
@@ -2633,7 +2638,8 @@ static int __net_init igmp6_net_init(struct net *net)
2633 err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6, 2638 err = inet_ctl_sock_create(&net->ipv6.igmp_sk, PF_INET6,
2634 SOCK_RAW, IPPROTO_ICMPV6, net); 2639 SOCK_RAW, IPPROTO_ICMPV6, net);
2635 if (err < 0) { 2640 if (err < 0) {
2636 pr_err("Failed to initialize the IGMP6 control socket (err %d)\n", 2641 printk(KERN_ERR
2642 "Failed to initialize the IGMP6 control socket (err %d).\n",
2637 err); 2643 err);
2638 goto out; 2644 goto out;
2639 } 2645 }
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0f9bdc5ee9f..43242e6e610 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -22,8 +22,6 @@
22 * Masahide NAKAMURA @USAGI 22 * Masahide NAKAMURA @USAGI
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
27#include <linux/module.h> 25#include <linux/module.h>
28#include <linux/skbuff.h> 26#include <linux/skbuff.h>
29#include <linux/time.h> 27#include <linux/time.h>
@@ -46,7 +44,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
46 if (!data) 44 if (!data)
47 return NULL; 45 return NULL;
48 if (padlen == 1) { 46 if (padlen == 1) {
49 data[0] = IPV6_TLV_PAD1; 47 data[0] = IPV6_TLV_PAD0;
50 } else if (padlen > 1) { 48 } else if (padlen > 1) {
51 data[0] = IPV6_TLV_PADN; 49 data[0] = IPV6_TLV_PADN;
52 data[1] = padlen - 2; 50 data[1] = padlen - 2;
@@ -86,30 +84,28 @@ static int mip6_mh_len(int type)
86 84
87static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) 85static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
88{ 86{
89 struct ip6_mh _hdr; 87 struct ip6_mh *mh;
90 const struct ip6_mh *mh;
91 88
92 mh = skb_header_pointer(skb, skb_transport_offset(skb), 89 if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
93 sizeof(_hdr), &_hdr); 90 !pskb_may_pull(skb, (skb_transport_offset(skb) +
94 if (!mh) 91 ((skb_transport_header(skb)[1] + 1) << 3))))
95 return -1; 92 return -1;
96 93
97 if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) 94 mh = (struct ip6_mh *)skb_transport_header(skb);
98 return -1;
99 95
100 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { 96 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
101 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", 97 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
102 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); 98 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
103 mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + 99 mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
104 skb_network_header_len(skb)); 100 skb_network_header(skb)));
105 return -1; 101 return -1;
106 } 102 }
107 103
108 if (mh->ip6mh_proto != IPPROTO_NONE) { 104 if (mh->ip6mh_proto != IPPROTO_NONE) {
109 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", 105 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
110 mh->ip6mh_proto); 106 mh->ip6mh_proto);
111 mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + 107 mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
112 skb_network_header_len(skb)); 108 skb_network_header(skb)));
113 return -1; 109 return -1;
114 } 110 }
115 111
@@ -199,8 +195,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
199 mip6_report_rl.stamp.tv_sec = stamp->tv_sec; 195 mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
200 mip6_report_rl.stamp.tv_usec = stamp->tv_usec; 196 mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
201 mip6_report_rl.iif = iif; 197 mip6_report_rl.iif = iif;
202 mip6_report_rl.src = *src; 198 ipv6_addr_copy(&mip6_report_rl.src, src);
203 mip6_report_rl.dst = *dst; 199 ipv6_addr_copy(&mip6_report_rl.dst, dst);
204 allow = 1; 200 allow = 1;
205 } 201 }
206 spin_unlock_bh(&mip6_report_rl.lock); 202 spin_unlock_bh(&mip6_report_rl.lock);
@@ -311,12 +307,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
311static int mip6_destopt_init_state(struct xfrm_state *x) 307static int mip6_destopt_init_state(struct xfrm_state *x)
312{ 308{
313 if (x->id.spi) { 309 if (x->id.spi) {
314 pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); 310 printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
311 x->id.spi);
315 return -EINVAL; 312 return -EINVAL;
316 } 313 }
317 if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { 314 if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
318 pr_info("%s: state's mode is not %u: %u\n", 315 printk(KERN_INFO "%s: state's mode is not %u: %u\n",
319 __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); 316 __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
320 return -EINVAL; 317 return -EINVAL;
321 } 318 }
322 319
@@ -446,12 +443,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
446static int mip6_rthdr_init_state(struct xfrm_state *x) 443static int mip6_rthdr_init_state(struct xfrm_state *x)
447{ 444{
448 if (x->id.spi) { 445 if (x->id.spi) {
449 pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); 446 printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
447 x->id.spi);
450 return -EINVAL; 448 return -EINVAL;
451 } 449 }
452 if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { 450 if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
453 pr_info("%s: state's mode is not %u: %u\n", 451 printk(KERN_INFO "%s: state's mode is not %u: %u\n",
454 __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); 452 __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
455 return -EINVAL; 453 return -EINVAL;
456 } 454 }
457 455
@@ -483,18 +481,18 @@ static const struct xfrm_type mip6_rthdr_type =
483 481
484static int __init mip6_init(void) 482static int __init mip6_init(void)
485{ 483{
486 pr_info("Mobile IPv6\n"); 484 printk(KERN_INFO "Mobile IPv6\n");
487 485
488 if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { 486 if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
489 pr_info("%s: can't add xfrm type(destopt)\n", __func__); 487 printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __func__);
490 goto mip6_destopt_xfrm_fail; 488 goto mip6_destopt_xfrm_fail;
491 } 489 }
492 if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { 490 if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
493 pr_info("%s: can't add xfrm type(rthdr)\n", __func__); 491 printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__);
494 goto mip6_rthdr_xfrm_fail; 492 goto mip6_rthdr_xfrm_fail;
495 } 493 }
496 if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { 494 if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
497 pr_info("%s: can't add rawv6 mh filter\n", __func__); 495 printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__);
498 goto mip6_rawv6_mh_fail; 496 goto mip6_rawv6_mh_fail;
499 } 497 }
500 498
@@ -512,11 +510,11 @@ static int __init mip6_init(void)
512static void __exit mip6_fini(void) 510static void __exit mip6_fini(void)
513{ 511{
514 if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) 512 if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
515 pr_info("%s: can't remove rawv6 mh filter\n", __func__); 513 printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__);
516 if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) 514 if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
517 pr_info("%s: can't remove xfrm type(rthdr)\n", __func__); 515 printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__);
518 if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) 516 if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
519 pr_info("%s: can't remove xfrm type(destopt)\n", __func__); 517 printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __func__);
520} 518}
521 519
522module_init(mip6_init); 520module_init(mip6_init);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 6574175795d..9da6e02eaae 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -15,7 +15,6 @@
15/* 15/*
16 * Changes: 16 * Changes:
17 * 17 *
18 * Alexey I. Froloff : RFC6106 (DNSSL) support
19 * Pierre Ynard : export userland ND options 18 * Pierre Ynard : export userland ND options
20 * through netlink (RDNSS support) 19 * through netlink (RDNSS support)
21 * Lars Fenneberg : fixed MTU setting on receipt 20 * Lars Fenneberg : fixed MTU setting on receipt
@@ -27,7 +26,27 @@
27 * YOSHIFUJI Hideaki @USAGI : Verify ND options properly 26 * YOSHIFUJI Hideaki @USAGI : Verify ND options properly
28 */ 27 */
29 28
30#define pr_fmt(fmt) "ICMPv6: " fmt 29/* Set to 3 to get tracing... */
30#define ND_DEBUG 1
31
32#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33#define ND_NOPRINTK(x...) do { ; } while(0)
34#define ND_PRINTK0 ND_PRINTK
35#define ND_PRINTK1 ND_NOPRINTK
36#define ND_PRINTK2 ND_NOPRINTK
37#define ND_PRINTK3 ND_NOPRINTK
38#if ND_DEBUG >= 1
39#undef ND_PRINTK1
40#define ND_PRINTK1 ND_PRINTK
41#endif
42#if ND_DEBUG >= 2
43#undef ND_PRINTK2
44#define ND_PRINTK2 ND_PRINTK
45#endif
46#if ND_DEBUG >= 3
47#undef ND_PRINTK3
48#define ND_PRINTK3 ND_PRINTK
49#endif
31 50
32#include <linux/module.h> 51#include <linux/module.h>
33#include <linux/errno.h> 52#include <linux/errno.h>
@@ -72,18 +91,9 @@
72#include <linux/netfilter.h> 91#include <linux/netfilter.h>
73#include <linux/netfilter_ipv6.h> 92#include <linux/netfilter_ipv6.h>
74 93
75/* Set to 3 to get tracing... */
76#define ND_DEBUG 1
77
78#define ND_PRINTK(val, level, fmt, ...) \
79do { \
80 if (val <= ND_DEBUG) \
81 net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
82} while (0)
83
84static u32 ndisc_hash(const void *pkey, 94static u32 ndisc_hash(const void *pkey,
85 const struct net_device *dev, 95 const struct net_device *dev,
86 __u32 *hash_rnd); 96 __u32 rnd);
87static int ndisc_constructor(struct neighbour *neigh); 97static int ndisc_constructor(struct neighbour *neigh);
88static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); 98static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
89static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); 99static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -116,6 +126,7 @@ static const struct neigh_ops ndisc_direct_ops = {
116 126
117struct neigh_table nd_tbl = { 127struct neigh_table nd_tbl = {
118 .family = AF_INET6, 128 .family = AF_INET6,
129 .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
119 .key_len = sizeof(struct in6_addr), 130 .key_len = sizeof(struct in6_addr),
120 .hash = ndisc_hash, 131 .hash = ndisc_hash,
121 .constructor = ndisc_constructor, 132 .constructor = ndisc_constructor,
@@ -130,7 +141,7 @@ struct neigh_table nd_tbl = {
130 .gc_staletime = 60 * HZ, 141 .gc_staletime = 60 * HZ,
131 .reachable_time = ND_REACHABLE_TIME, 142 .reachable_time = ND_REACHABLE_TIME,
132 .delay_probe_time = 5 * HZ, 143 .delay_probe_time = 5 * HZ,
133 .queue_len_bytes = 64*1024, 144 .queue_len = 3,
134 .ucast_probes = 3, 145 .ucast_probes = 3,
135 .mcast_probes = 3, 146 .mcast_probes = 3,
136 .anycast_delay = 1 * HZ, 147 .anycast_delay = 1 * HZ,
@@ -143,6 +154,40 @@ struct neigh_table nd_tbl = {
143 .gc_thresh3 = 1024, 154 .gc_thresh3 = 1024,
144}; 155};
145 156
157/* ND options */
158struct ndisc_options {
159 struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
160#ifdef CONFIG_IPV6_ROUTE_INFO
161 struct nd_opt_hdr *nd_opts_ri;
162 struct nd_opt_hdr *nd_opts_ri_end;
163#endif
164 struct nd_opt_hdr *nd_useropts;
165 struct nd_opt_hdr *nd_useropts_end;
166};
167
168#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
169#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
170#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
171#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
172#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
173#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
174
175#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
176
177/*
178 * Return the padding between the option length and the start of the
179 * link addr. Currently only IP-over-InfiniBand needs this, although
180 * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
181 * also need a pad of 2.
182 */
183static int ndisc_addr_option_pad(unsigned short type)
184{
185 switch (type) {
186 case ARPHRD_INFINIBAND: return 2;
187 default: return 0;
188 }
189}
190
146static inline int ndisc_opt_addr_space(struct net_device *dev) 191static inline int ndisc_opt_addr_space(struct net_device *dev)
147{ 192{
148 return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); 193 return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
@@ -151,8 +196,8 @@ static inline int ndisc_opt_addr_space(struct net_device *dev)
151static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, 196static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
152 unsigned short addr_type) 197 unsigned short addr_type)
153{ 198{
199 int space = NDISC_OPT_SPACE(data_len);
154 int pad = ndisc_addr_option_pad(addr_type); 200 int pad = ndisc_addr_option_pad(addr_type);
155 int space = NDISC_OPT_SPACE(data_len + pad);
156 201
157 opt[0] = type; 202 opt[0] = type;
158 opt[1] = space>>3; 203 opt[1] = space>>3;
@@ -184,8 +229,7 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
184 229
185static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) 230static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
186{ 231{
187 return opt->nd_opt_type == ND_OPT_RDNSS || 232 return opt->nd_opt_type == ND_OPT_RDNSS;
188 opt->nd_opt_type == ND_OPT_DNSSL;
189} 233}
190 234
191static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, 235static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -199,8 +243,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
199 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; 243 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
200} 244}
201 245
202struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, 246static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
203 struct ndisc_options *ndopts) 247 struct ndisc_options *ndopts)
204{ 248{
205 struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; 249 struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
206 250
@@ -220,9 +264,10 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
220 case ND_OPT_MTU: 264 case ND_OPT_MTU:
221 case ND_OPT_REDIRECT_HDR: 265 case ND_OPT_REDIRECT_HDR:
222 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { 266 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
223 ND_PRINTK(2, warn, 267 ND_PRINTK2(KERN_WARNING
224 "%s: duplicated ND6 option found: type=%d\n", 268 "%s(): duplicated ND6 option found: type=%d\n",
225 __func__, nd_opt->nd_opt_type); 269 __func__,
270 nd_opt->nd_opt_type);
226 } else { 271 } else {
227 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; 272 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
228 } 273 }
@@ -250,11 +295,10 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
250 * to accommodate future extension to the 295 * to accommodate future extension to the
251 * protocol. 296 * protocol.
252 */ 297 */
253 ND_PRINTK(2, notice, 298 ND_PRINTK2(KERN_NOTICE
254 "%s: ignored unsupported option; type=%d, len=%d\n", 299 "%s(): ignored unsupported option; type=%d, len=%d\n",
255 __func__, 300 __func__,
256 nd_opt->nd_opt_type, 301 nd_opt->nd_opt_type, nd_opt->nd_opt_len);
257 nd_opt->nd_opt_len);
258 } 302 }
259 } 303 }
260 opt_len -= l; 304 opt_len -= l;
@@ -263,6 +307,17 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
263 return ndopts; 307 return ndopts;
264} 308}
265 309
310static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
311 struct net_device *dev)
312{
313 u8 *lladdr = (u8 *)(p + 1);
314 int lladdrlen = p->nd_opt_len << 3;
315 int prepad = ndisc_addr_option_pad(dev->type);
316 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
317 return NULL;
318 return lladdr + prepad;
319}
320
266int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) 321int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
267{ 322{
268 switch (dev->type) { 323 switch (dev->type) {
@@ -271,6 +326,9 @@ int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev,
271 case ARPHRD_FDDI: 326 case ARPHRD_FDDI:
272 ipv6_eth_mc_map(addr, buf); 327 ipv6_eth_mc_map(addr, buf);
273 return 0; 328 return 0;
329 case ARPHRD_IEEE802_TR:
330 ipv6_tr_mc_map(addr,buf);
331 return 0;
274 case ARPHRD_ARCNET: 332 case ARPHRD_ARCNET:
275 ipv6_arcnet_mc_map(addr, buf); 333 ipv6_arcnet_mc_map(addr, buf);
276 return 0; 334 return 0;
@@ -292,9 +350,16 @@ EXPORT_SYMBOL(ndisc_mc_map);
292 350
293static u32 ndisc_hash(const void *pkey, 351static u32 ndisc_hash(const void *pkey,
294 const struct net_device *dev, 352 const struct net_device *dev,
295 __u32 *hash_rnd) 353 __u32 hash_rnd)
296{ 354{
297 return ndisc_hashfn(pkey, dev, hash_rnd); 355 const u32 *p32 = pkey;
356 u32 addr_hash, i;
357
358 addr_hash = 0;
359 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
360 addr_hash ^= *p32++;
361
362 return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
298} 363}
299 364
300static int ndisc_constructor(struct neighbour *neigh) 365static int ndisc_constructor(struct neighbour *neigh)
@@ -303,16 +368,19 @@ static int ndisc_constructor(struct neighbour *neigh)
303 struct net_device *dev = neigh->dev; 368 struct net_device *dev = neigh->dev;
304 struct inet6_dev *in6_dev; 369 struct inet6_dev *in6_dev;
305 struct neigh_parms *parms; 370 struct neigh_parms *parms;
306 bool is_multicast = ipv6_addr_is_multicast(addr); 371 int is_multicast = ipv6_addr_is_multicast(addr);
307 372
373 rcu_read_lock();
308 in6_dev = in6_dev_get(dev); 374 in6_dev = in6_dev_get(dev);
309 if (in6_dev == NULL) { 375 if (in6_dev == NULL) {
376 rcu_read_unlock();
310 return -EINVAL; 377 return -EINVAL;
311 } 378 }
312 379
313 parms = in6_dev->nd_parms; 380 parms = in6_dev->nd_parms;
314 __neigh_parms_put(neigh->parms); 381 __neigh_parms_put(neigh->parms);
315 neigh->parms = neigh_parms_clone(parms); 382 neigh->parms = neigh_parms_clone(parms);
383 rcu_read_unlock();
316 384
317 neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; 385 neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
318 if (!dev->header_ops) { 386 if (!dev->header_ops) {
@@ -370,19 +438,17 @@ static void pndisc_destructor(struct pneigh_entry *n)
370 ipv6_dev_mc_dec(dev, &maddr); 438 ipv6_dev_mc_dec(dev, &maddr);
371} 439}
372 440
373static struct sk_buff *ndisc_build_skb(struct net_device *dev, 441struct sk_buff *ndisc_build_skb(struct net_device *dev,
374 const struct in6_addr *daddr, 442 const struct in6_addr *daddr,
375 const struct in6_addr *saddr, 443 const struct in6_addr *saddr,
376 struct icmp6hdr *icmp6h, 444 struct icmp6hdr *icmp6h,
377 const struct in6_addr *target, 445 const struct in6_addr *target,
378 int llinfo) 446 int llinfo)
379{ 447{
380 struct net *net = dev_net(dev); 448 struct net *net = dev_net(dev);
381 struct sock *sk = net->ipv6.ndisc_sk; 449 struct sock *sk = net->ipv6.ndisc_sk;
382 struct sk_buff *skb; 450 struct sk_buff *skb;
383 struct icmp6hdr *hdr; 451 struct icmp6hdr *hdr;
384 int hlen = LL_RESERVED_SPACE(dev);
385 int tlen = dev->needed_tailroom;
386 int len; 452 int len;
387 int err; 453 int err;
388 u8 *opt; 454 u8 *opt;
@@ -396,15 +462,16 @@ static struct sk_buff *ndisc_build_skb(struct net_device *dev,
396 462
397 skb = sock_alloc_send_skb(sk, 463 skb = sock_alloc_send_skb(sk,
398 (MAX_HEADER + sizeof(struct ipv6hdr) + 464 (MAX_HEADER + sizeof(struct ipv6hdr) +
399 len + hlen + tlen), 465 len + LL_ALLOCATED_SPACE(dev)),
400 1, &err); 466 1, &err);
401 if (!skb) { 467 if (!skb) {
402 ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n", 468 ND_PRINTK0(KERN_ERR
403 __func__, err); 469 "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
470 __func__, err);
404 return NULL; 471 return NULL;
405 } 472 }
406 473
407 skb_reserve(skb, hlen); 474 skb_reserve(skb, LL_RESERVED_SPACE(dev));
408 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); 475 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
409 476
410 skb->transport_header = skb->tail; 477 skb->transport_header = skb->tail;
@@ -415,7 +482,7 @@ static struct sk_buff *ndisc_build_skb(struct net_device *dev,
415 482
416 opt = skb_transport_header(skb) + sizeof(struct icmp6hdr); 483 opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
417 if (target) { 484 if (target) {
418 *(struct in6_addr *)opt = *target; 485 ipv6_addr_copy((struct in6_addr *)opt, target);
419 opt += sizeof(*target); 486 opt += sizeof(*target);
420 } 487 }
421 488
@@ -431,11 +498,14 @@ static struct sk_buff *ndisc_build_skb(struct net_device *dev,
431 return skb; 498 return skb;
432} 499}
433 500
434static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, 501EXPORT_SYMBOL(ndisc_build_skb);
435 struct neighbour *neigh, 502
436 const struct in6_addr *daddr, 503void ndisc_send_skb(struct sk_buff *skb,
437 const struct in6_addr *saddr, 504 struct net_device *dev,
438 struct icmp6hdr *icmp6h) 505 struct neighbour *neigh,
506 const struct in6_addr *daddr,
507 const struct in6_addr *saddr,
508 struct icmp6hdr *icmp6h)
439{ 509{
440 struct flowi6 fl6; 510 struct flowi6 fl6;
441 struct dst_entry *dst; 511 struct dst_entry *dst;
@@ -448,7 +518,14 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
448 type = icmp6h->icmp6_type; 518 type = icmp6h->icmp6_type;
449 519
450 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); 520 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
451 dst = icmp6_dst_alloc(dev, neigh, &fl6); 521
522 dst = icmp6_dst_alloc(dev, neigh, daddr);
523 if (!dst) {
524 kfree_skb(skb);
525 return;
526 }
527
528 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
452 if (IS_ERR(dst)) { 529 if (IS_ERR(dst)) {
453 kfree_skb(skb); 530 kfree_skb(skb);
454 return; 531 return;
@@ -456,8 +533,7 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
456 533
457 skb_dst_set(skb, dst); 534 skb_dst_set(skb, dst);
458 535
459 rcu_read_lock(); 536 idev = in6_dev_get(dst->dev);
460 idev = __in6_dev_get(dst->dev);
461 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 537 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
462 538
463 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 539 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
@@ -467,9 +543,12 @@ static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev,
467 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 543 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
468 } 544 }
469 545
470 rcu_read_unlock(); 546 if (likely(idev != NULL))
547 in6_dev_put(idev);
471} 548}
472 549
550EXPORT_SYMBOL(ndisc_send_skb);
551
473/* 552/*
474 * Send a Neighbour Discover packet 553 * Send a Neighbour Discover packet
475 */ 554 */
@@ -530,6 +609,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
530{ 609{
531 struct inet6_dev *idev; 610 struct inet6_dev *idev;
532 struct inet6_ifaddr *ifa; 611 struct inet6_ifaddr *ifa;
612 struct in6_addr mcaddr;
533 613
534 idev = in6_dev_get(dev); 614 idev = in6_dev_get(dev);
535 if (!idev) 615 if (!idev)
@@ -537,7 +617,8 @@ static void ndisc_send_unsol_na(struct net_device *dev)
537 617
538 read_lock_bh(&idev->lock); 618 read_lock_bh(&idev->lock);
539 list_for_each_entry(ifa, &idev->addr_list, if_list) { 619 list_for_each_entry(ifa, &idev->addr_list, if_list) {
540 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr, 620 addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
621 ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
541 /*router=*/ !!idev->cnf.forwarding, 622 /*router=*/ !!idev->cnf.forwarding,
542 /*solicited=*/ false, /*override=*/ true, 623 /*solicited=*/ false, /*override=*/ true,
543 /*inc_opt=*/ true); 624 /*inc_opt=*/ true);
@@ -629,9 +710,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
629 710
630 if ((probes -= neigh->parms->ucast_probes) < 0) { 711 if ((probes -= neigh->parms->ucast_probes) < 0) {
631 if (!(neigh->nud_state & NUD_VALID)) { 712 if (!(neigh->nud_state & NUD_VALID)) {
632 ND_PRINTK(1, dbg, 713 ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n",
633 "%s: trying to ucast probe in NUD_INVALID: %pI6\n", 714 __func__, target);
634 __func__, target);
635 } 715 }
636 ndisc_send_ns(dev, neigh, target, target, saddr); 716 ndisc_send_ns(dev, neigh, target, target, saddr);
637 } else if ((probes -= neigh->parms->app_probes) < 0) { 717 } else if ((probes -= neigh->parms->app_probes) < 0) {
@@ -673,11 +753,12 @@ static void ndisc_recv_ns(struct sk_buff *skb)
673 struct inet6_dev *idev = NULL; 753 struct inet6_dev *idev = NULL;
674 struct neighbour *neigh; 754 struct neighbour *neigh;
675 int dad = ipv6_addr_any(saddr); 755 int dad = ipv6_addr_any(saddr);
676 bool inc; 756 int inc;
677 int is_router = -1; 757 int is_router = -1;
678 758
679 if (ipv6_addr_is_multicast(&msg->target)) { 759 if (ipv6_addr_is_multicast(&msg->target)) {
680 ND_PRINTK(2, warn, "NS: multicast target address\n"); 760 ND_PRINTK2(KERN_WARNING
761 "ICMPv6 NS: multicast target address");
681 return; 762 return;
682 } 763 }
683 764
@@ -690,20 +771,22 @@ static void ndisc_recv_ns(struct sk_buff *skb)
690 daddr->s6_addr32[1] == htonl(0x00000000) && 771 daddr->s6_addr32[1] == htonl(0x00000000) &&
691 daddr->s6_addr32[2] == htonl(0x00000001) && 772 daddr->s6_addr32[2] == htonl(0x00000001) &&
692 daddr->s6_addr [12] == 0xff )) { 773 daddr->s6_addr [12] == 0xff )) {
693 ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); 774 ND_PRINTK2(KERN_WARNING
775 "ICMPv6 NS: bad DAD packet (wrong destination)\n");
694 return; 776 return;
695 } 777 }
696 778
697 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { 779 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
698 ND_PRINTK(2, warn, "NS: invalid ND options\n"); 780 ND_PRINTK2(KERN_WARNING
781 "ICMPv6 NS: invalid ND options\n");
699 return; 782 return;
700 } 783 }
701 784
702 if (ndopts.nd_opts_src_lladdr) { 785 if (ndopts.nd_opts_src_lladdr) {
703 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); 786 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
704 if (!lladdr) { 787 if (!lladdr) {
705 ND_PRINTK(2, warn, 788 ND_PRINTK2(KERN_WARNING
706 "NS: invalid link-layer address length\n"); 789 "ICMPv6 NS: invalid link-layer address length\n");
707 return; 790 return;
708 } 791 }
709 792
@@ -713,8 +796,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
713 * in the message. 796 * in the message.
714 */ 797 */
715 if (dad) { 798 if (dad) {
716 ND_PRINTK(2, warn, 799 ND_PRINTK2(KERN_WARNING
717 "NS: bad DAD packet (link-layer address option)\n"); 800 "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
718 return; 801 return;
719 } 802 }
720 } 803 }
@@ -726,6 +809,20 @@ static void ndisc_recv_ns(struct sk_buff *skb)
726 809
727 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { 810 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
728 if (dad) { 811 if (dad) {
812 if (dev->type == ARPHRD_IEEE802_TR) {
813 const unsigned char *sadr;
814 sadr = skb_mac_header(skb);
815 if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
816 sadr[9] == dev->dev_addr[1] &&
817 sadr[10] == dev->dev_addr[2] &&
818 sadr[11] == dev->dev_addr[3] &&
819 sadr[12] == dev->dev_addr[4] &&
820 sadr[13] == dev->dev_addr[5]) {
821 /* looped-back to us */
822 goto out;
823 }
824 }
825
729 /* 826 /*
730 * We are colliding with another node 827 * We are colliding with another node
731 * who is doing DAD 828 * who is doing DAD
@@ -832,30 +929,34 @@ static void ndisc_recv_na(struct sk_buff *skb)
832 struct neighbour *neigh; 929 struct neighbour *neigh;
833 930
834 if (skb->len < sizeof(struct nd_msg)) { 931 if (skb->len < sizeof(struct nd_msg)) {
835 ND_PRINTK(2, warn, "NA: packet too short\n"); 932 ND_PRINTK2(KERN_WARNING
933 "ICMPv6 NA: packet too short\n");
836 return; 934 return;
837 } 935 }
838 936
839 if (ipv6_addr_is_multicast(&msg->target)) { 937 if (ipv6_addr_is_multicast(&msg->target)) {
840 ND_PRINTK(2, warn, "NA: target address is multicast\n"); 938 ND_PRINTK2(KERN_WARNING
939 "ICMPv6 NA: target address is multicast.\n");
841 return; 940 return;
842 } 941 }
843 942
844 if (ipv6_addr_is_multicast(daddr) && 943 if (ipv6_addr_is_multicast(daddr) &&
845 msg->icmph.icmp6_solicited) { 944 msg->icmph.icmp6_solicited) {
846 ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); 945 ND_PRINTK2(KERN_WARNING
946 "ICMPv6 NA: solicited NA is multicasted.\n");
847 return; 947 return;
848 } 948 }
849 949
850 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { 950 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
851 ND_PRINTK(2, warn, "NS: invalid ND option\n"); 951 ND_PRINTK2(KERN_WARNING
952 "ICMPv6 NS: invalid ND option\n");
852 return; 953 return;
853 } 954 }
854 if (ndopts.nd_opts_tgt_lladdr) { 955 if (ndopts.nd_opts_tgt_lladdr) {
855 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); 956 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
856 if (!lladdr) { 957 if (!lladdr) {
857 ND_PRINTK(2, warn, 958 ND_PRINTK2(KERN_WARNING
858 "NA: invalid link-layer address length\n"); 959 "ICMPv6 NA: invalid link-layer address length\n");
859 return; 960 return;
860 } 961 }
861 } 962 }
@@ -876,9 +977,9 @@ static void ndisc_recv_na(struct sk_buff *skb)
876 unsolicited advertisement. 977 unsolicited advertisement.
877 */ 978 */
878 if (skb->pkt_type != PACKET_LOOPBACK) 979 if (skb->pkt_type != PACKET_LOOPBACK)
879 ND_PRINTK(1, warn, 980 ND_PRINTK1(KERN_WARNING
880 "NA: someone advertises our address %pI6 on %s!\n", 981 "ICMPv6 NA: someone advertises our address %pI6 on %s!\n",
881 &ifp->addr, ifp->idev->dev->name); 982 &ifp->addr, ifp->idev->dev->name);
882 in6_ifa_put(ifp); 983 in6_ifa_put(ifp);
883 return; 984 return;
884 } 985 }
@@ -899,7 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
899 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && 1000 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
900 net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && 1001 net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
901 pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { 1002 pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
902 /* XXX: idev->cnf.proxy_ndp */ 1003 /* XXX: idev->cnf.prixy_ndp */
903 goto out; 1004 goto out;
904 } 1005 }
905 1006
@@ -938,9 +1039,10 @@ static void ndisc_recv_rs(struct sk_buff *skb)
938 if (skb->len < sizeof(*rs_msg)) 1039 if (skb->len < sizeof(*rs_msg))
939 return; 1040 return;
940 1041
941 idev = __in6_dev_get(skb->dev); 1042 idev = in6_dev_get(skb->dev);
942 if (!idev) { 1043 if (!idev) {
943 ND_PRINTK(1, err, "RS: can't find in6 device\n"); 1044 if (net_ratelimit())
1045 ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
944 return; 1046 return;
945 } 1047 }
946 1048
@@ -957,7 +1059,8 @@ static void ndisc_recv_rs(struct sk_buff *skb)
957 1059
958 /* Parse ND options */ 1060 /* Parse ND options */
959 if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) { 1061 if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
960 ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n"); 1062 if (net_ratelimit())
1063 ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
961 goto out; 1064 goto out;
962 } 1065 }
963 1066
@@ -977,7 +1080,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
977 neigh_release(neigh); 1080 neigh_release(neigh);
978 } 1081 }
979out: 1082out:
980 return; 1083 in6_dev_put(idev);
981} 1084}
982 1085
983static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) 1086static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
@@ -1012,9 +1115,8 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1012 1115
1013 memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); 1116 memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1014 1117
1015 if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), 1118 NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1016 &ipv6_hdr(ra)->saddr)) 1119 &ipv6_hdr(ra)->saddr);
1017 goto nla_put_failure;
1018 nlmsg_end(skb, nlh); 1120 nlmsg_end(skb, nlh);
1019 1121
1020 rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); 1122 rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
@@ -1027,6 +1129,18 @@ errout:
1027 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); 1129 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1028} 1130}
1029 1131
1132static inline int accept_ra(struct inet6_dev *in6_dev)
1133{
1134 /*
1135 * If forwarding is enabled, RA are not accepted unless the special
1136 * hybrid mode (accept_ra=2) is enabled.
1137 */
1138 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1139 return 0;
1140
1141 return in6_dev->cnf.accept_ra;
1142}
1143
1030static void ndisc_router_discovery(struct sk_buff *skb) 1144static void ndisc_router_discovery(struct sk_buff *skb)
1031{ 1145{
1032 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); 1146 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1043,17 +1157,20 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1043 optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); 1157 optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1044 1158
1045 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { 1159 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1046 ND_PRINTK(2, warn, "RA: source address is not link-local\n"); 1160 ND_PRINTK2(KERN_WARNING
1161 "ICMPv6 RA: source address is not link-local.\n");
1047 return; 1162 return;
1048 } 1163 }
1049 if (optlen < 0) { 1164 if (optlen < 0) {
1050 ND_PRINTK(2, warn, "RA: packet too short\n"); 1165 ND_PRINTK2(KERN_WARNING
1166 "ICMPv6 RA: packet too short\n");
1051 return; 1167 return;
1052 } 1168 }
1053 1169
1054#ifdef CONFIG_IPV6_NDISC_NODETYPE 1170#ifdef CONFIG_IPV6_NDISC_NODETYPE
1055 if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { 1171 if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1056 ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); 1172 ND_PRINTK2(KERN_WARNING
1173 "ICMPv6 RA: from host or unauthorized router\n");
1057 return; 1174 return;
1058 } 1175 }
1059#endif 1176#endif
@@ -1062,19 +1179,22 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1062 * set the RA_RECV flag in the interface 1179 * set the RA_RECV flag in the interface
1063 */ 1180 */
1064 1181
1065 in6_dev = __in6_dev_get(skb->dev); 1182 in6_dev = in6_dev_get(skb->dev);
1066 if (in6_dev == NULL) { 1183 if (in6_dev == NULL) {
1067 ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", 1184 ND_PRINTK0(KERN_ERR
1068 skb->dev->name); 1185 "ICMPv6 RA: can't find inet6 device for %s.\n",
1186 skb->dev->name);
1069 return; 1187 return;
1070 } 1188 }
1071 1189
1072 if (!ndisc_parse_options(opt, optlen, &ndopts)) { 1190 if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1073 ND_PRINTK(2, warn, "RA: invalid ND options\n"); 1191 in6_dev_put(in6_dev);
1192 ND_PRINTK2(KERN_WARNING
1193 "ICMP6 RA: invalid ND options\n");
1074 return; 1194 return;
1075 } 1195 }
1076 1196
1077 if (!ipv6_accept_ra(in6_dev)) 1197 if (!accept_ra(in6_dev))
1078 goto skip_linkparms; 1198 goto skip_linkparms;
1079 1199
1080#ifdef CONFIG_IPV6_NDISC_NODETYPE 1200#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1105,9 +1225,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1105 if (!in6_dev->cnf.accept_ra_defrtr) 1225 if (!in6_dev->cnf.accept_ra_defrtr)
1106 goto skip_defrtr; 1226 goto skip_defrtr;
1107 1227
1108 if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
1109 goto skip_defrtr;
1110
1111 lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); 1228 lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1112 1229
1113#ifdef CONFIG_IPV6_ROUTER_PREF 1230#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -1120,38 +1237,35 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1120 1237
1121 rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); 1238 rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1122 1239
1123 if (rt) { 1240 if (rt)
1124 neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); 1241 neigh = dst_get_neighbour(&rt->dst);
1125 if (!neigh) { 1242
1126 ND_PRINTK(0, err,
1127 "RA: %s got default router without neighbour\n",
1128 __func__);
1129 ip6_rt_put(rt);
1130 return;
1131 }
1132 }
1133 if (rt && lifetime == 0) { 1243 if (rt && lifetime == 0) {
1244 neigh_clone(neigh);
1134 ip6_del_rt(rt); 1245 ip6_del_rt(rt);
1135 rt = NULL; 1246 rt = NULL;
1136 } 1247 }
1137 1248
1138 if (rt == NULL && lifetime) { 1249 if (rt == NULL && lifetime) {
1139 ND_PRINTK(3, dbg, "RA: adding default router\n"); 1250 ND_PRINTK3(KERN_DEBUG
1251 "ICMPv6 RA: adding default router.\n");
1140 1252
1141 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); 1253 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1142 if (rt == NULL) { 1254 if (rt == NULL) {
1143 ND_PRINTK(0, err, 1255 ND_PRINTK0(KERN_ERR
1144 "RA: %s failed to add default route\n", 1256 "ICMPv6 RA: %s() failed to add default route.\n",
1145 __func__); 1257 __func__);
1258 in6_dev_put(in6_dev);
1146 return; 1259 return;
1147 } 1260 }
1148 1261
1149 neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); 1262 neigh = dst_get_neighbour(&rt->dst);
1150 if (neigh == NULL) { 1263 if (neigh == NULL) {
1151 ND_PRINTK(0, err, 1264 ND_PRINTK0(KERN_ERR
1152 "RA: %s got default router without neighbour\n", 1265 "ICMPv6 RA: %s() got default router without neighbour.\n",
1153 __func__); 1266 __func__);
1154 ip6_rt_put(rt); 1267 dst_release(&rt->dst);
1268 in6_dev_put(in6_dev);
1155 return; 1269 return;
1156 } 1270 }
1157 neigh->flags |= NTF_ROUTER; 1271 neigh->flags |= NTF_ROUTER;
@@ -1160,7 +1274,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1160 } 1274 }
1161 1275
1162 if (rt) 1276 if (rt)
1163 rt6_set_expires(rt, jiffies + (HZ * lifetime)); 1277 rt->rt6i_expires = jiffies + (HZ * lifetime);
1278
1164 if (ra_msg->icmph.icmp6_hop_limit) { 1279 if (ra_msg->icmph.icmp6_hop_limit) {
1165 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; 1280 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1166 if (rt) 1281 if (rt)
@@ -1218,8 +1333,8 @@ skip_linkparms:
1218 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, 1333 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1219 skb->dev); 1334 skb->dev);
1220 if (!lladdr) { 1335 if (!lladdr) {
1221 ND_PRINTK(2, warn, 1336 ND_PRINTK2(KERN_WARNING
1222 "RA: invalid link-layer address length\n"); 1337 "ICMPv6 RA: invalid link-layer address length\n");
1223 goto out; 1338 goto out;
1224 } 1339 }
1225 } 1340 }
@@ -1230,13 +1345,10 @@ skip_linkparms:
1230 NEIGH_UPDATE_F_ISROUTER); 1345 NEIGH_UPDATE_F_ISROUTER);
1231 } 1346 }
1232 1347
1233 if (!ipv6_accept_ra(in6_dev)) 1348 if (!accept_ra(in6_dev))
1234 goto out; 1349 goto out;
1235 1350
1236#ifdef CONFIG_IPV6_ROUTE_INFO 1351#ifdef CONFIG_IPV6_ROUTE_INFO
1237 if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
1238 goto skip_routeinfo;
1239
1240 if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { 1352 if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1241 struct nd_opt_hdr *p; 1353 struct nd_opt_hdr *p;
1242 for (p = ndopts.nd_opts_ri; 1354 for (p = ndopts.nd_opts_ri;
@@ -1254,8 +1366,6 @@ skip_linkparms:
1254 &ipv6_hdr(skb)->saddr); 1366 &ipv6_hdr(skb)->saddr);
1255 } 1367 }
1256 } 1368 }
1257
1258skip_routeinfo:
1259#endif 1369#endif
1260 1370
1261#ifdef CONFIG_IPV6_NDISC_NODETYPE 1371#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1269,9 +1379,7 @@ skip_routeinfo:
1269 for (p = ndopts.nd_opts_pi; 1379 for (p = ndopts.nd_opts_pi;
1270 p; 1380 p;
1271 p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) { 1381 p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1272 addrconf_prefix_rcv(skb->dev, (u8 *)p, 1382 addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1273 (p->nd_opt_len) << 3,
1274 ndopts.nd_opts_src_lladdr != NULL);
1275 } 1383 }
1276 } 1384 }
1277 1385
@@ -1283,7 +1391,9 @@ skip_routeinfo:
1283 mtu = ntohl(n); 1391 mtu = ntohl(n);
1284 1392
1285 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { 1393 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1286 ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); 1394 ND_PRINTK2(KERN_WARNING
1395 "ICMPv6 RA: invalid mtu: %d\n",
1396 mtu);
1287 } else if (in6_dev->cnf.mtu6 != mtu) { 1397 } else if (in6_dev->cnf.mtu6 != mtu) {
1288 in6_dev->cnf.mtu6 = mtu; 1398 in6_dev->cnf.mtu6 = mtu;
1289 1399
@@ -1304,59 +1414,120 @@ skip_routeinfo:
1304 } 1414 }
1305 1415
1306 if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { 1416 if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1307 ND_PRINTK(2, warn, "RA: invalid RA options\n"); 1417 ND_PRINTK2(KERN_WARNING
1418 "ICMPv6 RA: invalid RA options");
1308 } 1419 }
1309out: 1420out:
1310 ip6_rt_put(rt); 1421 if (rt)
1311 if (neigh) 1422 dst_release(&rt->dst);
1423 else if (neigh)
1312 neigh_release(neigh); 1424 neigh_release(neigh);
1425 in6_dev_put(in6_dev);
1313} 1426}
1314 1427
1315static void ndisc_redirect_rcv(struct sk_buff *skb) 1428static void ndisc_redirect_rcv(struct sk_buff *skb)
1316{ 1429{
1317 u8 *hdr; 1430 struct inet6_dev *in6_dev;
1431 struct icmp6hdr *icmph;
1432 const struct in6_addr *dest;
1433 const struct in6_addr *target; /* new first hop to destination */
1434 struct neighbour *neigh;
1435 int on_link = 0;
1318 struct ndisc_options ndopts; 1436 struct ndisc_options ndopts;
1319 struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); 1437 int optlen;
1320 u32 ndoptlen = skb->tail - (skb->transport_header + 1438 u8 *lladdr = NULL;
1321 offsetof(struct rd_msg, opt));
1322 1439
1323#ifdef CONFIG_IPV6_NDISC_NODETYPE 1440#ifdef CONFIG_IPV6_NDISC_NODETYPE
1324 switch (skb->ndisc_nodetype) { 1441 switch (skb->ndisc_nodetype) {
1325 case NDISC_NODETYPE_HOST: 1442 case NDISC_NODETYPE_HOST:
1326 case NDISC_NODETYPE_NODEFAULT: 1443 case NDISC_NODETYPE_NODEFAULT:
1327 ND_PRINTK(2, warn, 1444 ND_PRINTK2(KERN_WARNING
1328 "Redirect: from host or unauthorized router\n"); 1445 "ICMPv6 Redirect: from host or unauthorized router\n");
1329 return; 1446 return;
1330 } 1447 }
1331#endif 1448#endif
1332 1449
1333 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { 1450 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1334 ND_PRINTK(2, warn, 1451 ND_PRINTK2(KERN_WARNING
1335 "Redirect: source address is not link-local\n"); 1452 "ICMPv6 Redirect: source address is not link-local.\n");
1453 return;
1454 }
1455
1456 optlen = skb->tail - skb->transport_header;
1457 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1458
1459 if (optlen < 0) {
1460 ND_PRINTK2(KERN_WARNING
1461 "ICMPv6 Redirect: packet too short\n");
1336 return; 1462 return;
1337 } 1463 }
1338 1464
1339 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) 1465 icmph = icmp6_hdr(skb);
1466 target = (const struct in6_addr *) (icmph + 1);
1467 dest = target + 1;
1468
1469 if (ipv6_addr_is_multicast(dest)) {
1470 ND_PRINTK2(KERN_WARNING
1471 "ICMPv6 Redirect: destination address is multicast.\n");
1340 return; 1472 return;
1473 }
1341 1474
1342 if (!ndopts.nd_opts_rh) 1475 if (ipv6_addr_equal(dest, target)) {
1476 on_link = 1;
1477 } else if (ipv6_addr_type(target) !=
1478 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1479 ND_PRINTK2(KERN_WARNING
1480 "ICMPv6 Redirect: target address is not link-local unicast.\n");
1343 return; 1481 return;
1482 }
1344 1483
1345 hdr = (u8 *)ndopts.nd_opts_rh; 1484 in6_dev = in6_dev_get(skb->dev);
1346 hdr += 8; 1485 if (!in6_dev)
1347 if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
1348 return; 1486 return;
1487 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1488 in6_dev_put(in6_dev);
1489 return;
1490 }
1349 1491
1350 icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); 1492 /* RFC2461 8.1:
1493 * The IP source address of the Redirect MUST be the same as the current
1494 * first-hop router for the specified ICMP Destination Address.
1495 */
1496
1497 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1498 ND_PRINTK2(KERN_WARNING
1499 "ICMPv6 Redirect: invalid ND options\n");
1500 in6_dev_put(in6_dev);
1501 return;
1502 }
1503 if (ndopts.nd_opts_tgt_lladdr) {
1504 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1505 skb->dev);
1506 if (!lladdr) {
1507 ND_PRINTK2(KERN_WARNING
1508 "ICMPv6 Redirect: invalid link-layer address length\n");
1509 in6_dev_put(in6_dev);
1510 return;
1511 }
1512 }
1513
1514 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1515 if (neigh) {
1516 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1517 &ipv6_hdr(skb)->saddr, neigh, lladdr,
1518 on_link);
1519 neigh_release(neigh);
1520 }
1521 in6_dev_put(in6_dev);
1351} 1522}
1352 1523
1353void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) 1524void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1525 const struct in6_addr *target)
1354{ 1526{
1355 struct net_device *dev = skb->dev; 1527 struct net_device *dev = skb->dev;
1356 struct net *net = dev_net(dev); 1528 struct net *net = dev_net(dev);
1357 struct sock *sk = net->ipv6.ndisc_sk; 1529 struct sock *sk = net->ipv6.ndisc_sk;
1358 int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); 1530 int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1359 struct inet_peer *peer;
1360 struct sk_buff *buff; 1531 struct sk_buff *buff;
1361 struct icmp6hdr *icmph; 1532 struct icmp6hdr *icmph;
1362 struct in6_addr saddr_buf; 1533 struct in6_addr saddr_buf;
@@ -1366,22 +1537,21 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1366 struct inet6_dev *idev; 1537 struct inet6_dev *idev;
1367 struct flowi6 fl6; 1538 struct flowi6 fl6;
1368 u8 *opt; 1539 u8 *opt;
1369 int hlen, tlen;
1370 int rd_len; 1540 int rd_len;
1371 int err; 1541 int err;
1372 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; 1542 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1373 bool ret;
1374 1543
1375 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { 1544 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1376 ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", 1545 ND_PRINTK2(KERN_WARNING
1377 dev->name); 1546 "ICMPv6 Redirect: no link-local address on %s\n",
1547 dev->name);
1378 return; 1548 return;
1379 } 1549 }
1380 1550
1381 if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && 1551 if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1382 ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 1552 ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1383 ND_PRINTK(2, warn, 1553 ND_PRINTK2(KERN_WARNING
1384 "Redirect: target address is not link-local unicast\n"); 1554 "ICMPv6 Redirect: target address is not link-local unicast.\n");
1385 return; 1555 return;
1386 } 1556 }
1387 1557
@@ -1389,10 +1559,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1389 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); 1559 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1390 1560
1391 dst = ip6_route_output(net, NULL, &fl6); 1561 dst = ip6_route_output(net, NULL, &fl6);
1392 if (dst->error) { 1562 if (dst == NULL)
1393 dst_release(dst);
1394 return; 1563 return;
1395 } 1564
1396 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 1565 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1397 if (IS_ERR(dst)) 1566 if (IS_ERR(dst))
1398 return; 1567 return;
@@ -1400,25 +1569,16 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1400 rt = (struct rt6_info *) dst; 1569 rt = (struct rt6_info *) dst;
1401 1570
1402 if (rt->rt6i_flags & RTF_GATEWAY) { 1571 if (rt->rt6i_flags & RTF_GATEWAY) {
1403 ND_PRINTK(2, warn, 1572 ND_PRINTK2(KERN_WARNING
1404 "Redirect: destination is not a neighbour\n"); 1573 "ICMPv6 Redirect: destination is not a neighbour.\n");
1405 goto release; 1574 goto release;
1406 } 1575 }
1407 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); 1576 if (!rt->rt6i_peer)
1408 ret = inet_peer_xrlim_allow(peer, 1*HZ); 1577 rt6_bind_peer(rt, 1);
1409 if (peer) 1578 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1410 inet_putpeer(peer);
1411 if (!ret)
1412 goto release; 1579 goto release;
1413 1580
1414 if (dev->addr_len) { 1581 if (dev->addr_len) {
1415 struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
1416 if (!neigh) {
1417 ND_PRINTK(2, warn,
1418 "Redirect: no neigh for target address\n");
1419 goto release;
1420 }
1421
1422 read_lock_bh(&neigh->lock); 1582 read_lock_bh(&neigh->lock);
1423 if (neigh->nud_state & NUD_VALID) { 1583 if (neigh->nud_state & NUD_VALID) {
1424 memcpy(ha_buf, neigh->ha, dev->addr_len); 1584 memcpy(ha_buf, neigh->ha, dev->addr_len);
@@ -1427,8 +1587,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1427 len += ndisc_opt_addr_space(dev); 1587 len += ndisc_opt_addr_space(dev);
1428 } else 1588 } else
1429 read_unlock_bh(&neigh->lock); 1589 read_unlock_bh(&neigh->lock);
1430
1431 neigh_release(neigh);
1432 } 1590 }
1433 1591
1434 rd_len = min_t(unsigned int, 1592 rd_len = min_t(unsigned int,
@@ -1436,20 +1594,18 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1436 rd_len &= ~0x7; 1594 rd_len &= ~0x7;
1437 len += rd_len; 1595 len += rd_len;
1438 1596
1439 hlen = LL_RESERVED_SPACE(dev);
1440 tlen = dev->needed_tailroom;
1441 buff = sock_alloc_send_skb(sk, 1597 buff = sock_alloc_send_skb(sk,
1442 (MAX_HEADER + sizeof(struct ipv6hdr) + 1598 (MAX_HEADER + sizeof(struct ipv6hdr) +
1443 len + hlen + tlen), 1599 len + LL_ALLOCATED_SPACE(dev)),
1444 1, &err); 1600 1, &err);
1445 if (buff == NULL) { 1601 if (buff == NULL) {
1446 ND_PRINTK(0, err, 1602 ND_PRINTK0(KERN_ERR
1447 "Redirect: %s failed to allocate an skb, err=%d\n", 1603 "ICMPv6 Redirect: %s() failed to allocate an skb, err=%d.\n",
1448 __func__, err); 1604 __func__, err);
1449 goto release; 1605 goto release;
1450 } 1606 }
1451 1607
1452 skb_reserve(buff, hlen); 1608 skb_reserve(buff, LL_RESERVED_SPACE(dev));
1453 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, 1609 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1454 IPPROTO_ICMPV6, len); 1610 IPPROTO_ICMPV6, len);
1455 1611
@@ -1465,9 +1621,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1465 */ 1621 */
1466 1622
1467 addrp = (struct in6_addr *)(icmph + 1); 1623 addrp = (struct in6_addr *)(icmph + 1);
1468 *addrp = *target; 1624 ipv6_addr_copy(addrp, target);
1469 addrp++; 1625 addrp++;
1470 *addrp = ipv6_hdr(skb)->daddr; 1626 ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1471 1627
1472 opt = (u8*) (addrp + 1); 1628 opt = (u8*) (addrp + 1);
1473 1629
@@ -1495,8 +1651,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1495 csum_partial(icmph, len, 0)); 1651 csum_partial(icmph, len, 0));
1496 1652
1497 skb_dst_set(buff, dst); 1653 skb_dst_set(buff, dst);
1498 rcu_read_lock(); 1654 idev = in6_dev_get(dst->dev);
1499 idev = __in6_dev_get(dst->dev);
1500 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 1655 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1501 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, 1656 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1502 dst_output); 1657 dst_output);
@@ -1505,7 +1660,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
1505 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1660 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1506 } 1661 }
1507 1662
1508 rcu_read_unlock(); 1663 if (likely(idev != NULL))
1664 in6_dev_put(idev);
1509 return; 1665 return;
1510 1666
1511release: 1667release:
@@ -1530,14 +1686,16 @@ int ndisc_rcv(struct sk_buff *skb)
1530 __skb_push(skb, skb->data - skb_transport_header(skb)); 1686 __skb_push(skb, skb->data - skb_transport_header(skb));
1531 1687
1532 if (ipv6_hdr(skb)->hop_limit != 255) { 1688 if (ipv6_hdr(skb)->hop_limit != 255) {
1533 ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", 1689 ND_PRINTK2(KERN_WARNING
1534 ipv6_hdr(skb)->hop_limit); 1690 "ICMPv6 NDISC: invalid hop-limit: %d\n",
1691 ipv6_hdr(skb)->hop_limit);
1535 return 0; 1692 return 0;
1536 } 1693 }
1537 1694
1538 if (msg->icmph.icmp6_code != 0) { 1695 if (msg->icmph.icmp6_code != 0) {
1539 ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", 1696 ND_PRINTK2(KERN_WARNING
1540 msg->icmph.icmp6_code); 1697 "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1698 msg->icmph.icmp6_code);
1541 return 0; 1699 return 0;
1542 } 1700 }
1543 1701
@@ -1572,18 +1730,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
1572{ 1730{
1573 struct net_device *dev = ptr; 1731 struct net_device *dev = ptr;
1574 struct net *net = dev_net(dev); 1732 struct net *net = dev_net(dev);
1575 struct inet6_dev *idev;
1576 1733
1577 switch (event) { 1734 switch (event) {
1578 case NETDEV_CHANGEADDR: 1735 case NETDEV_CHANGEADDR:
1579 neigh_changeaddr(&nd_tbl, dev); 1736 neigh_changeaddr(&nd_tbl, dev);
1580 fib6_run_gc(~0UL, net); 1737 fib6_run_gc(~0UL, net);
1581 idev = in6_dev_get(dev);
1582 if (!idev)
1583 break;
1584 if (idev->cnf.ndisc_notify)
1585 ndisc_send_unsol_na(dev);
1586 in6_dev_put(idev);
1587 break; 1738 break;
1588 case NETDEV_DOWN: 1739 case NETDEV_DOWN:
1589 neigh_ifdown(&nd_tbl, dev); 1740 neigh_ifdown(&nd_tbl, dev);
@@ -1611,7 +1762,11 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1611 static int warned; 1762 static int warned;
1612 if (strcmp(warncomm, current->comm) && warned < 5) { 1763 if (strcmp(warncomm, current->comm) && warned < 5) {
1613 strcpy(warncomm, current->comm); 1764 strcpy(warncomm, current->comm);
1614 pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n", 1765 printk(KERN_WARNING
1766 "process `%s' is using deprecated sysctl (%s) "
1767 "net.ipv6.neigh.%s.%s; "
1768 "Use net.ipv6.neigh.%s.%s_ms "
1769 "instead.\n",
1615 warncomm, func, 1770 warncomm, func,
1616 dev_name, ctl->procname, 1771 dev_name, ctl->procname,
1617 dev_name, ctl->procname); 1772 dev_name, ctl->procname);
@@ -1665,9 +1820,9 @@ static int __net_init ndisc_net_init(struct net *net)
1665 err = inet_ctl_sock_create(&sk, PF_INET6, 1820 err = inet_ctl_sock_create(&sk, PF_INET6,
1666 SOCK_RAW, IPPROTO_ICMPV6, net); 1821 SOCK_RAW, IPPROTO_ICMPV6, net);
1667 if (err < 0) { 1822 if (err < 0) {
1668 ND_PRINTK(0, err, 1823 ND_PRINTK0(KERN_ERR
1669 "NDISC: Failed to initialize the control socket (err %d)\n", 1824 "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1670 err); 1825 err);
1671 return err; 1826 return err;
1672 } 1827 }
1673 1828
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 429089cb073..30fcee46544 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -3,7 +3,6 @@
3#include <linux/ipv6.h> 3#include <linux/ipv6.h>
4#include <linux/netfilter.h> 4#include <linux/netfilter.h>
5#include <linux/netfilter_ipv6.h> 5#include <linux/netfilter_ipv6.h>
6#include <linux/export.h>
7#include <net/dst.h> 6#include <net/dst.h>
8#include <net/ipv6.h> 7#include <net/ipv6.h>
9#include <net/ip6_route.h> 8#include <net/ip6_route.h>
@@ -15,7 +14,6 @@ int ip6_route_me_harder(struct sk_buff *skb)
15{ 14{
16 struct net *net = dev_net(skb_dst(skb)->dev); 15 struct net *net = dev_net(skb_dst(skb)->dev);
17 const struct ipv6hdr *iph = ipv6_hdr(skb); 16 const struct ipv6hdr *iph = ipv6_hdr(skb);
18 unsigned int hh_len;
19 struct dst_entry *dst; 17 struct dst_entry *dst;
20 struct flowi6 fl6 = { 18 struct flowi6 fl6 = {
21 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 19 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -48,13 +46,6 @@ int ip6_route_me_harder(struct sk_buff *skb)
48 } 46 }
49#endif 47#endif
50 48
51 /* Change in oif may mean change in hh_len. */
52 hh_len = skb_dst(skb)->dev->hard_header_len;
53 if (skb_headroom(skb) < hh_len &&
54 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
55 0, GFP_ATOMIC))
56 return -1;
57
58 return 0; 49 return 0;
59} 50}
60EXPORT_SYMBOL(ip6_route_me_harder); 51EXPORT_SYMBOL(ip6_route_me_harder);
@@ -109,16 +100,9 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
109 .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, 100 .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
110 }; 101 };
111 const void *sk = strict ? &fake_sk : NULL; 102 const void *sk = strict ? &fake_sk : NULL;
112 struct dst_entry *result; 103
113 int err; 104 *dst = ip6_route_output(net, sk, &fl->u.ip6);
114 105 return (*dst)->error;
115 result = ip6_route_output(net, sk, &fl->u.ip6);
116 err = result->error;
117 if (err)
118 dst_release(result);
119 else
120 *dst = result;
121 return err;
122} 106}
123 107
124__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 108__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index c72532a60d8..5bbf5316920 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,28 @@ config NF_CONNTRACK_IPV6
25 25
26 To compile it as a module, choose M here. If unsure, say N. 26 To compile it as a module, choose M here. If unsure, say N.
27 27
28config IP6_NF_QUEUE
29 tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
30 depends on INET && IPV6 && NETFILTER
31 depends on NETFILTER_ADVANCED
32 ---help---
33
34 This option adds a queue handler to the kernel for IPv6
35 packets which enables users to receive the filtered packets
36 with QUEUE target using libipq.
37
38 This option enables the old IPv6-only "ip6_queue" implementation
39 which has been obsoleted by the new "nfnetlink_queue" code (see
40 CONFIG_NETFILTER_NETLINK_QUEUE).
41
42 (C) Fernando Anton 2001
43 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
44 Universidad Carlos III de Madrid
45 Universidad Politecnica de Alcala de Henares
46 email: <fanton@it.uc3m.es>.
47
48 To compile it as a module, choose M here. If unsure, say N.
49
28config IP6_NF_IPTABLES 50config IP6_NF_IPTABLES
29 tristate "IP6 tables support (required for filtering)" 51 tristate "IP6 tables support (required for filtering)"
30 depends on INET && IPV6 52 depends on INET && IPV6
@@ -103,16 +125,6 @@ config IP6_NF_MATCH_MH
103 125
104 To compile it as a module, choose M here. If unsure, say N. 126 To compile it as a module, choose M here. If unsure, say N.
105 127
106config IP6_NF_MATCH_RPFILTER
107 tristate '"rpfilter" reverse path filter match support'
108 depends on NETFILTER_ADVANCED
109 ---help---
110 This option allows you to match packets whose replies would
111 go out via the interface the packet came in.
112
113 To compile it as a module, choose M here. If unsure, say N.
114 The module will be called ip6t_rpfilter.
115
116config IP6_NF_MATCH_RT 128config IP6_NF_MATCH_RT
117 tristate '"rt" Routing header match support' 129 tristate '"rt" Routing header match support'
118 depends on NETFILTER_ADVANCED 130 depends on NETFILTER_ADVANCED
@@ -132,6 +144,15 @@ config IP6_NF_TARGET_HL
132 (e.g. when running oldconfig). It selects 144 (e.g. when running oldconfig). It selects
133 CONFIG_NETFILTER_XT_TARGET_HL. 145 CONFIG_NETFILTER_XT_TARGET_HL.
134 146
147config IP6_NF_TARGET_LOG
148 tristate "LOG target support"
149 default m if NETFILTER_ADVANCED=n
150 help
151 This option adds a `LOG' target, which allows you to create rules in
152 any iptables table which records the packet header to the syslog.
153
154 To compile it as a module, choose M here. If unsure, say N.
155
135config IP6_NF_FILTER 156config IP6_NF_FILTER
136 tristate "Packet filtering" 157 tristate "Packet filtering"
137 default m if NETFILTER_ADVANCED=n 158 default m if NETFILTER_ADVANCED=n
@@ -153,6 +174,18 @@ config IP6_NF_TARGET_REJECT
153 174
154 To compile it as a module, choose M here. If unsure, say N. 175 To compile it as a module, choose M here. If unsure, say N.
155 176
177config IP6_NF_TARGET_REJECT_SKERR
178 bool "Force socket error when rejecting with icmp*"
179 depends on IP6_NF_TARGET_REJECT
180 default n
181 help
182 This option enables turning a "--reject-with icmp*" into a matching
183 socket error also.
184 The REJECT target normally allows sending an ICMP message. But it
185 leaves the local socket unaware of any ingress rejects.
186
187 If unsure, say N.
188
156config IP6_NF_MANGLE 189config IP6_NF_MANGLE
157 tristate "Packet mangling" 190 tristate "Packet mangling"
158 default m if NETFILTER_ADVANCED=n 191 default m if NETFILTER_ADVANCED=n
@@ -165,6 +198,7 @@ config IP6_NF_MANGLE
165 198
166config IP6_NF_RAW 199config IP6_NF_RAW
167 tristate 'raw table support (required for TRACE)' 200 tristate 'raw table support (required for TRACE)'
201 depends on NETFILTER_ADVANCED
168 help 202 help
169 This option adds a `raw' table to ip6tables. This table is the very 203 This option adds a `raw' table to ip6tables. This table is the very
170 first in the netfilter framework and hooks in at the PREROUTING 204 first in the netfilter framework and hooks in at the PREROUTING
@@ -181,44 +215,9 @@ config IP6_NF_SECURITY
181 help 215 help
182 This option adds a `security' table to iptables, for use 216 This option adds a `security' table to iptables, for use
183 with Mandatory Access Control (MAC) policy. 217 with Mandatory Access Control (MAC) policy.
184 218
185 If unsure, say N. 219 If unsure, say N.
186 220
187config NF_NAT_IPV6
188 tristate "IPv6 NAT"
189 depends on NF_CONNTRACK_IPV6
190 depends on NETFILTER_ADVANCED
191 select NF_NAT
192 help
193 The IPv6 NAT option allows masquerading, port forwarding and other
194 forms of full Network Address Port Translation. It is controlled by
195 the `nat' table in ip6tables, see the man page for ip6tables(8).
196
197 To compile it as a module, choose M here. If unsure, say N.
198
199if NF_NAT_IPV6
200
201config IP6_NF_TARGET_MASQUERADE
202 tristate "MASQUERADE target support"
203 help
204 Masquerading is a special case of NAT: all outgoing connections are
205 changed to seem to come from a particular interface's address, and
206 if the interface goes down, those connections are lost. This is
207 only useful for dialup accounts with dynamic IP address (ie. your IP
208 address will be different on next dialup).
209
210 To compile it as a module, choose M here. If unsure, say N.
211
212config IP6_NF_TARGET_NPT
213 tristate "NPT (Network Prefix translation) target support"
214 help
215 This option adds the `SNPT' and `DNPT' target, which perform
216 stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
217
218 To compile it as a module, choose M here. If unsure, say N.
219
220endif # NF_NAT_IPV6
221
222endif # IP6_NF_IPTABLES 221endif # IP6_NF_IPTABLES
223 222
224endmenu 223endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2d11fcc2cf3..abfee91ce81 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -6,9 +6,9 @@
6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o 6obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
7obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o 7obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o 8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
9obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
9obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 10obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
10obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
11obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -16,9 +16,6 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
16# l3 independent conntrack 16# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
18 18
19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
21
22# defrag 19# defrag
23nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o 20nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
24obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o 21obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -30,10 +27,8 @@ obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
30obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o 27obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
31obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o 28obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
32obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o 29obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
33obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
34obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 30obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
35 31
36# targets 32# targets
37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o 33obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 34obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 125a90d6a79..14cb310064f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -78,6 +78,19 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
78 78
79 Hence the start of any table is given by get_table() below. */ 79 Hence the start of any table is given by get_table() below. */
80 80
81/* Check for an extension */
82int
83ip6t_ext_hdr(u8 nexthdr)
84{
85 return (nexthdr == IPPROTO_HOPOPTS) ||
86 (nexthdr == IPPROTO_ROUTING) ||
87 (nexthdr == IPPROTO_FRAGMENT) ||
88 (nexthdr == IPPROTO_ESP) ||
89 (nexthdr == IPPROTO_AH) ||
90 (nexthdr == IPPROTO_NONE) ||
91 (nexthdr == IPPROTO_DSTOPTS);
92}
93
81/* Returns whether matches rule or not. */ 94/* Returns whether matches rule or not. */
82/* Performance critical - called for every packet */ 95/* Performance critical - called for every packet */
83static inline bool 96static inline bool
@@ -133,7 +146,7 @@ ip6_packet_match(const struct sk_buff *skb,
133 int protohdr; 146 int protohdr;
134 unsigned short _frag_off; 147 unsigned short _frag_off;
135 148
136 protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL); 149 protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
137 if (protohdr < 0) { 150 if (protohdr < 0) {
138 if (_frag_off == 0) 151 if (_frag_off == 0)
139 *hotdrop = true; 152 *hotdrop = true;
@@ -181,7 +194,8 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
181static unsigned int 194static unsigned int
182ip6t_error(struct sk_buff *skb, const struct xt_action_param *par) 195ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
183{ 196{
184 net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); 197 if (net_ratelimit())
198 pr_info("error: `%s'\n", (const char *)par->targinfo);
185 199
186 return NF_DROP; 200 return NF_DROP;
187} 201}
@@ -207,7 +221,8 @@ ip6t_get_target_c(const struct ip6t_entry *e)
207 return ip6t_get_target((struct ip6t_entry *)e); 221 return ip6t_get_target((struct ip6t_entry *)e);
208} 222}
209 223
210#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 224#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
225 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
211/* This cries for unification! */ 226/* This cries for unification! */
212static const char *const hooknames[] = { 227static const char *const hooknames[] = {
213 [NF_INET_PRE_ROUTING] = "PREROUTING", 228 [NF_INET_PRE_ROUTING] = "PREROUTING",
@@ -360,7 +375,6 @@ ip6t_do_table(struct sk_buff *skb,
360 const struct xt_entry_match *ematch; 375 const struct xt_entry_match *ematch;
361 376
362 IP_NF_ASSERT(e); 377 IP_NF_ASSERT(e);
363 acpar.thoff = 0;
364 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, 378 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
365 &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { 379 &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
366 no_match: 380 no_match:
@@ -380,7 +394,8 @@ ip6t_do_table(struct sk_buff *skb,
380 t = ip6t_get_target_c(e); 394 t = ip6t_get_target_c(e);
381 IP_NF_ASSERT(t->u.kernel.target); 395 IP_NF_ASSERT(t->u.kernel.target);
382 396
383#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 397#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
398 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
384 /* The packet is traced: log it */ 399 /* The packet is traced: log it */
385 if (unlikely(skb->nf_trace)) 400 if (unlikely(skb->nf_trace))
386 trace_packet(skb, hook, in, out, 401 trace_packet(skb, hook, in, out,
@@ -394,7 +409,7 @@ ip6t_do_table(struct sk_buff *skb,
394 if (v < 0) { 409 if (v < 0) {
395 /* Pop from stack? */ 410 /* Pop from stack? */
396 if (v != XT_RETURN) { 411 if (v != XT_RETURN) {
397 verdict = (unsigned int)(-v) - 1; 412 verdict = (unsigned)(-v) - 1;
398 break; 413 break;
399 } 414 }
400 if (*stackptr <= origptr) 415 if (*stackptr <= origptr)
@@ -1854,7 +1869,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1854{ 1869{
1855 int ret; 1870 int ret;
1856 1871
1857 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1872 if (!capable(CAP_NET_ADMIN))
1858 return -EPERM; 1873 return -EPERM;
1859 1874
1860 switch (cmd) { 1875 switch (cmd) {
@@ -1969,7 +1984,7 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1969{ 1984{
1970 int ret; 1985 int ret;
1971 1986
1972 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1987 if (!capable(CAP_NET_ADMIN))
1973 return -EPERM; 1988 return -EPERM;
1974 1989
1975 switch (cmd) { 1990 switch (cmd) {
@@ -1991,7 +2006,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1991{ 2006{
1992 int ret; 2007 int ret;
1993 2008
1994 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2009 if (!capable(CAP_NET_ADMIN))
1995 return -EPERM; 2010 return -EPERM;
1996 2011
1997 switch (cmd) { 2012 switch (cmd) {
@@ -2016,7 +2031,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2016{ 2031{
2017 int ret; 2032 int ret;
2018 2033
2019 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2034 if (!capable(CAP_NET_ADMIN))
2020 return -EPERM; 2035 return -EPERM;
2021 2036
2022 switch (cmd) { 2037 switch (cmd) {
@@ -2271,9 +2286,90 @@ static void __exit ip6_tables_fini(void)
2271 unregister_pernet_subsys(&ip6_tables_net_ops); 2286 unregister_pernet_subsys(&ip6_tables_net_ops);
2272} 2287}
2273 2288
2289/*
2290 * find the offset to specified header or the protocol number of last header
2291 * if target < 0. "last header" is transport protocol header, ESP, or
2292 * "No next header".
2293 *
2294 * If target header is found, its offset is set in *offset and return protocol
2295 * number. Otherwise, return -ENOENT or -EBADMSG.
2296 *
2297 * If the first fragment doesn't contain the final protocol header or
2298 * NEXTHDR_NONE it is considered invalid.
2299 *
2300 * Note that non-1st fragment is special case that "the protocol number
2301 * of last header" is "next header" field in Fragment header. In this case,
2302 * *offset is meaningless. If fragoff is not NULL, the fragment offset is
2303 * stored in *fragoff; if it is NULL, return -EINVAL.
2304 */
2305int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
2306 int target, unsigned short *fragoff)
2307{
2308 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
2309 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
2310 unsigned int len = skb->len - start;
2311
2312 if (fragoff)
2313 *fragoff = 0;
2314
2315 while (nexthdr != target) {
2316 struct ipv6_opt_hdr _hdr, *hp;
2317 unsigned int hdrlen;
2318
2319 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
2320 if (target < 0)
2321 break;
2322 return -ENOENT;
2323 }
2324
2325 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
2326 if (hp == NULL)
2327 return -EBADMSG;
2328 if (nexthdr == NEXTHDR_FRAGMENT) {
2329 unsigned short _frag_off;
2330 __be16 *fp;
2331 fp = skb_header_pointer(skb,
2332 start+offsetof(struct frag_hdr,
2333 frag_off),
2334 sizeof(_frag_off),
2335 &_frag_off);
2336 if (fp == NULL)
2337 return -EBADMSG;
2338
2339 _frag_off = ntohs(*fp) & ~0x7;
2340 if (_frag_off) {
2341 if (target < 0 &&
2342 ((!ipv6_ext_hdr(hp->nexthdr)) ||
2343 hp->nexthdr == NEXTHDR_NONE)) {
2344 if (fragoff) {
2345 *fragoff = _frag_off;
2346 return hp->nexthdr;
2347 } else {
2348 return -EINVAL;
2349 }
2350 }
2351 return -ENOENT;
2352 }
2353 hdrlen = 8;
2354 } else if (nexthdr == NEXTHDR_AUTH)
2355 hdrlen = (hp->hdrlen + 2) << 2;
2356 else
2357 hdrlen = ipv6_optlen(hp);
2358
2359 nexthdr = hp->nexthdr;
2360 len -= hdrlen;
2361 start += hdrlen;
2362 }
2363
2364 *offset = start;
2365 return nexthdr;
2366}
2367
2274EXPORT_SYMBOL(ip6t_register_table); 2368EXPORT_SYMBOL(ip6t_register_table);
2275EXPORT_SYMBOL(ip6t_unregister_table); 2369EXPORT_SYMBOL(ip6t_unregister_table);
2276EXPORT_SYMBOL(ip6t_do_table); 2370EXPORT_SYMBOL(ip6t_do_table);
2371EXPORT_SYMBOL(ip6t_ext_hdr);
2372EXPORT_SYMBOL(ipv6_find_hdr);
2277 2373
2278module_init(ip6_tables_init); 2374module_init(ip6_tables_init);
2279module_exit(ip6_tables_fini); 2375module_exit(ip6_tables_fini);
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
deleted file mode 100644
index 60e9053bab0..00000000000
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ /dev/null
@@ -1,135 +0,0 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
9 * NAT funded by Astaro.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/netdevice.h>
15#include <linux/ipv6.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv6.h>
18#include <linux/netfilter/x_tables.h>
19#include <net/netfilter/nf_nat.h>
20#include <net/addrconf.h>
21#include <net/ipv6.h>
22
23static unsigned int
24masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
25{
26 const struct nf_nat_range *range = par->targinfo;
27 enum ip_conntrack_info ctinfo;
28 struct in6_addr src;
29 struct nf_conn *ct;
30 struct nf_nat_range newrange;
31
32 ct = nf_ct_get(skb, &ctinfo);
33 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
34 ctinfo == IP_CT_RELATED_REPLY));
35
36 if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
37 &ipv6_hdr(skb)->daddr, 0, &src) < 0)
38 return NF_DROP;
39
40 nfct_nat(ct)->masq_index = par->out->ifindex;
41
42 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
43 newrange.min_addr.in6 = src;
44 newrange.max_addr.in6 = src;
45 newrange.min_proto = range->min_proto;
46 newrange.max_proto = range->max_proto;
47
48 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
49}
50
51static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
52{
53 const struct nf_nat_range *range = par->targinfo;
54
55 if (range->flags & NF_NAT_RANGE_MAP_IPS)
56 return -EINVAL;
57 return 0;
58}
59
60static int device_cmp(struct nf_conn *ct, void *ifindex)
61{
62 const struct nf_conn_nat *nat = nfct_nat(ct);
63
64 if (!nat)
65 return 0;
66 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
67 return 0;
68 return nat->masq_index == (int)(long)ifindex;
69}
70
71static int masq_device_event(struct notifier_block *this,
72 unsigned long event, void *ptr)
73{
74 const struct net_device *dev = ptr;
75 struct net *net = dev_net(dev);
76
77 if (event == NETDEV_DOWN)
78 nf_ct_iterate_cleanup(net, device_cmp,
79 (void *)(long)dev->ifindex);
80
81 return NOTIFY_DONE;
82}
83
84static struct notifier_block masq_dev_notifier = {
85 .notifier_call = masq_device_event,
86};
87
88static int masq_inet_event(struct notifier_block *this,
89 unsigned long event, void *ptr)
90{
91 struct inet6_ifaddr *ifa = ptr;
92
93 return masq_device_event(this, event, ifa->idev->dev);
94}
95
96static struct notifier_block masq_inet_notifier = {
97 .notifier_call = masq_inet_event,
98};
99
100static struct xt_target masquerade_tg6_reg __read_mostly = {
101 .name = "MASQUERADE",
102 .family = NFPROTO_IPV6,
103 .checkentry = masquerade_tg6_checkentry,
104 .target = masquerade_tg6,
105 .targetsize = sizeof(struct nf_nat_range),
106 .table = "nat",
107 .hooks = 1 << NF_INET_POST_ROUTING,
108 .me = THIS_MODULE,
109};
110
111static int __init masquerade_tg6_init(void)
112{
113 int err;
114
115 err = xt_register_target(&masquerade_tg6_reg);
116 if (err == 0) {
117 register_netdevice_notifier(&masq_dev_notifier);
118 register_inet6addr_notifier(&masq_inet_notifier);
119 }
120
121 return err;
122}
123static void __exit masquerade_tg6_exit(void)
124{
125 unregister_inet6addr_notifier(&masq_inet_notifier);
126 unregister_netdevice_notifier(&masq_dev_notifier);
127 xt_unregister_target(&masquerade_tg6_reg);
128}
129
130module_init(masquerade_tg6_init);
131module_exit(masquerade_tg6_exit);
132
133MODULE_LICENSE("GPL");
134MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
135MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
deleted file mode 100644
index 7302b0b7b64..00000000000
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ /dev/null
@@ -1,146 +0,0 @@
1/*
2 * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/ipv6.h>
12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv6.h>
14#include <linux/netfilter_ipv6/ip6t_NPT.h>
15#include <linux/netfilter/x_tables.h>
16
17static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
18{
19 struct ip6t_npt_tginfo *npt = par->targinfo;
20 __wsum src_sum = 0, dst_sum = 0;
21 unsigned int i;
22
23 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
24 return -EINVAL;
25
26 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
27 src_sum = csum_add(src_sum,
28 (__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
29 dst_sum = csum_add(dst_sum,
30 (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
31 }
32
33 npt->adjustment = (__force __sum16) csum_sub(src_sum, dst_sum);
34 return 0;
35}
36
37static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
38 struct in6_addr *addr)
39{
40 unsigned int pfx_len;
41 unsigned int i, idx;
42 __be32 mask;
43 __sum16 sum;
44
45 pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
46 for (i = 0; i < pfx_len; i += 32) {
47 if (pfx_len - i >= 32)
48 mask = 0;
49 else
50 mask = htonl(~((1 << (pfx_len - i)) - 1));
51
52 idx = i / 32;
53 addr->s6_addr32[idx] &= mask;
54 addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
55 }
56
57 if (pfx_len <= 48)
58 idx = 3;
59 else {
60 for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
61 if ((__force __sum16)addr->s6_addr16[idx] !=
62 CSUM_MANGLED_0)
63 break;
64 }
65 if (idx == ARRAY_SIZE(addr->s6_addr16))
66 return false;
67 }
68
69 sum = (__force __sum16) csum_add((__force __wsum)addr->s6_addr16[idx],
70 npt->adjustment);
71 if (sum == CSUM_MANGLED_0)
72 sum = 0;
73 *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
74
75 return true;
76}
77
78static unsigned int
79ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
80{
81 const struct ip6t_npt_tginfo *npt = par->targinfo;
82
83 if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
84 icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
85 offsetof(struct ipv6hdr, saddr));
86 return NF_DROP;
87 }
88 return XT_CONTINUE;
89}
90
91static unsigned int
92ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
93{
94 const struct ip6t_npt_tginfo *npt = par->targinfo;
95
96 if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
97 icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
98 offsetof(struct ipv6hdr, daddr));
99 return NF_DROP;
100 }
101 return XT_CONTINUE;
102}
103
104static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
105 {
106 .name = "SNPT",
107 .target = ip6t_snpt_tg,
108 .targetsize = sizeof(struct ip6t_npt_tginfo),
109 .checkentry = ip6t_npt_checkentry,
110 .family = NFPROTO_IPV6,
111 .hooks = (1 << NF_INET_LOCAL_IN) |
112 (1 << NF_INET_POST_ROUTING),
113 .me = THIS_MODULE,
114 },
115 {
116 .name = "DNPT",
117 .target = ip6t_dnpt_tg,
118 .targetsize = sizeof(struct ip6t_npt_tginfo),
119 .checkentry = ip6t_npt_checkentry,
120 .family = NFPROTO_IPV6,
121 .hooks = (1 << NF_INET_PRE_ROUTING) |
122 (1 << NF_INET_LOCAL_OUT),
123 .me = THIS_MODULE,
124 },
125};
126
127static int __init ip6t_npt_init(void)
128{
129 return xt_register_targets(ip6t_npt_target_reg,
130 ARRAY_SIZE(ip6t_npt_target_reg));
131}
132
133static void __exit ip6t_npt_exit(void)
134{
135 xt_unregister_targets(ip6t_npt_target_reg,
136 ARRAY_SIZE(ip6t_npt_target_reg));
137}
138
139module_init(ip6t_npt_init);
140module_exit(ip6t_npt_exit);
141
142MODULE_LICENSE("GPL");
143MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
144MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
145MODULE_ALIAS("ip6t_SNPT");
146MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 029623dbd41..09d30498c92 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -49,7 +49,6 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
49 const __u8 tclass = DEFAULT_TOS_VALUE; 49 const __u8 tclass = DEFAULT_TOS_VALUE;
50 struct dst_entry *dst = NULL; 50 struct dst_entry *dst = NULL;
51 u8 proto; 51 u8 proto;
52 __be16 frag_off;
53 struct flowi6 fl6; 52 struct flowi6 fl6;
54 53
55 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || 54 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
@@ -59,7 +58,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
59 } 58 }
60 59
61 proto = oip6h->nexthdr; 60 proto = oip6h->nexthdr;
62 tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); 61 tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
63 62
64 if ((tcphoff < 0) || (tcphoff > oldskb->len)) { 63 if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
65 pr_debug("Cannot get TCP header.\n"); 64 pr_debug("Cannot get TCP header.\n");
@@ -94,8 +93,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
94 93
95 memset(&fl6, 0, sizeof(fl6)); 94 memset(&fl6, 0, sizeof(fl6));
96 fl6.flowi6_proto = IPPROTO_TCP; 95 fl6.flowi6_proto = IPPROTO_TCP;
97 fl6.saddr = oip6h->daddr; 96 ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
98 fl6.daddr = oip6h->saddr; 97 ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
99 fl6.fl6_sport = otcph.dest; 98 fl6.fl6_sport = otcph.dest;
100 fl6.fl6_dport = otcph.source; 99 fl6.fl6_dport = otcph.source;
101 security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); 100 security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
@@ -114,7 +113,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
114 GFP_ATOMIC); 113 GFP_ATOMIC);
115 114
116 if (!nskb) { 115 if (!nskb) {
117 net_dbg_ratelimited("cannot alloc skb\n"); 116 if (net_ratelimit())
117 pr_debug("cannot alloc skb\n");
118 dst_release(dst); 118 dst_release(dst);
119 return; 119 return;
120 } 120 }
@@ -129,10 +129,9 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
129 *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); 129 *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20));
130 ip6h->hop_limit = ip6_dst_hoplimit(dst); 130 ip6h->hop_limit = ip6_dst_hoplimit(dst);
131 ip6h->nexthdr = IPPROTO_TCP; 131 ip6h->nexthdr = IPPROTO_TCP;
132 ip6h->saddr = oip6h->daddr; 132 ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
133 ip6h->daddr = oip6h->saddr; 133 ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
134 134
135 skb_reset_transport_header(nskb);
136 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); 135 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
137 /* Truncate to length (no data) */ 136 /* Truncate to length (no data) */
138 tcph->doff = sizeof(struct tcphdr)/4; 137 tcph->doff = sizeof(struct tcphdr)/4;
@@ -178,6 +177,15 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
178 skb_in->dev = net->loopback_dev; 177 skb_in->dev = net->loopback_dev;
179 178
180 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); 179 icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
180#ifdef CONFIG_IP6_NF_TARGET_REJECT_SKERR
181 if (skb_in->sk) {
182 icmpv6_err_convert(ICMPV6_DEST_UNREACH, code,
183 &skb_in->sk->sk_err);
184 skb_in->sk->sk_error_report(skb_in->sk);
185 pr_debug("ip6t_REJECT: sk_err=%d for skb=%p sk=%p\n",
186 skb_in->sk->sk_err, skb_in, skb_in->sk);
187 }
188#endif
181} 189}
182 190
183static unsigned int 191static unsigned int
@@ -210,7 +218,8 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
210 send_reset(net, skb); 218 send_reset(net, skb);
211 break; 219 break;
212 default: 220 default:
213 net_info_ratelimited("case %u not handled yet\n", reject->with); 221 if (net_ratelimit())
222 pr_info("case %u not handled yet\n", reject->with);
214 break; 223 break;
215 } 224 }
216 225
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 04099ab7d2e..89cccc5a9c9 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -41,11 +41,11 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
41 struct ip_auth_hdr _ah; 41 struct ip_auth_hdr _ah;
42 const struct ip_auth_hdr *ah; 42 const struct ip_auth_hdr *ah;
43 const struct ip6t_ah *ahinfo = par->matchinfo; 43 const struct ip6t_ah *ahinfo = par->matchinfo;
44 unsigned int ptr = 0; 44 unsigned int ptr;
45 unsigned int hdrlen = 0; 45 unsigned int hdrlen = 0;
46 int err; 46 int err;
47 47
48 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL, NULL); 48 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
49 if (err < 0) { 49 if (err < 0) {
50 if (err != -ENOENT) 50 if (err != -ENOENT)
51 par->hotdrop = true; 51 par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 3b5735e56bf..eda898fda6c 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -40,10 +40,10 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
40 struct frag_hdr _frag; 40 struct frag_hdr _frag;
41 const struct frag_hdr *fh; 41 const struct frag_hdr *fh;
42 const struct ip6t_frag *fraginfo = par->matchinfo; 42 const struct ip6t_frag *fraginfo = par->matchinfo;
43 unsigned int ptr = 0; 43 unsigned int ptr;
44 int err; 44 int err;
45 45
46 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL); 46 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
47 if (err < 0) { 47 if (err < 0) {
48 if (err != -ENOENT) 48 if (err != -ENOENT)
49 par->hotdrop = true; 49 par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 01df142bb02..59df051eaef 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -50,7 +50,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
50 const struct ipv6_opt_hdr *oh; 50 const struct ipv6_opt_hdr *oh;
51 const struct ip6t_opts *optinfo = par->matchinfo; 51 const struct ip6t_opts *optinfo = par->matchinfo;
52 unsigned int temp; 52 unsigned int temp;
53 unsigned int ptr = 0; 53 unsigned int ptr;
54 unsigned int hdrlen = 0; 54 unsigned int hdrlen = 0;
55 bool ret = false; 55 bool ret = false;
56 u8 _opttype; 56 u8 _opttype;
@@ -62,7 +62,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
62 62
63 err = ipv6_find_hdr(skb, &ptr, 63 err = ipv6_find_hdr(skb, &ptr,
64 (par->match == &hbh_mt6_reg[0]) ? 64 (par->match == &hbh_mt6_reg[0]) ?
65 NEXTHDR_HOP : NEXTHDR_DEST, NULL, NULL); 65 NEXTHDR_HOP : NEXTHDR_DEST, NULL);
66 if (err < 0) { 66 if (err < 0) {
67 if (err != -ENOENT) 67 if (err != -ENOENT)
68 par->hotdrop = true; 68 par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
deleted file mode 100644
index 5060d54199a..00000000000
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ /dev/null
@@ -1,133 +0,0 @@
1/*
2 * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/netdevice.h>
12#include <linux/route.h>
13#include <net/ip6_fib.h>
14#include <net/ip6_route.h>
15
16#include <linux/netfilter/xt_rpfilter.h>
17#include <linux/netfilter/x_tables.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
21MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match");
22
23static bool rpfilter_addr_unicast(const struct in6_addr *addr)
24{
25 int addr_type = ipv6_addr_type(addr);
26 return addr_type & IPV6_ADDR_UNICAST;
27}
28
29static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
30 const struct net_device *dev, u8 flags)
31{
32 struct rt6_info *rt;
33 struct ipv6hdr *iph = ipv6_hdr(skb);
34 bool ret = false;
35 struct flowi6 fl6 = {
36 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
37 .flowi6_proto = iph->nexthdr,
38 .daddr = iph->saddr,
39 };
40 int lookup_flags;
41
42 if (rpfilter_addr_unicast(&iph->daddr)) {
43 memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr));
44 lookup_flags = RT6_LOOKUP_F_HAS_SADDR;
45 } else {
46 lookup_flags = 0;
47 }
48
49 fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
50 if ((flags & XT_RPFILTER_LOOSE) == 0) {
51 fl6.flowi6_oif = dev->ifindex;
52 lookup_flags |= RT6_LOOKUP_F_IFACE;
53 }
54
55 rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
56 if (rt->dst.error)
57 goto out;
58
59 if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST))
60 goto out;
61
62 if (rt->rt6i_flags & RTF_LOCAL) {
63 ret = flags & XT_RPFILTER_ACCEPT_LOCAL;
64 goto out;
65 }
66
67 if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
68 ret = true;
69 out:
70 ip6_rt_put(rt);
71 return ret;
72}
73
74static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
75{
76 const struct xt_rpfilter_info *info = par->matchinfo;
77 int saddrtype;
78 struct ipv6hdr *iph;
79 bool invert = info->flags & XT_RPFILTER_INVERT;
80
81 if (par->in->flags & IFF_LOOPBACK)
82 return true ^ invert;
83
84 iph = ipv6_hdr(skb);
85 saddrtype = ipv6_addr_type(&iph->saddr);
86 if (unlikely(saddrtype == IPV6_ADDR_ANY))
87 return true ^ invert; /* not routable: forward path will drop it */
88
89 return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
90}
91
92static int rpfilter_check(const struct xt_mtchk_param *par)
93{
94 const struct xt_rpfilter_info *info = par->matchinfo;
95 unsigned int options = ~XT_RPFILTER_OPTION_MASK;
96
97 if (info->flags & options) {
98 pr_info("unknown options encountered");
99 return -EINVAL;
100 }
101
102 if (strcmp(par->table, "mangle") != 0 &&
103 strcmp(par->table, "raw") != 0) {
104 pr_info("match only valid in the \'raw\' "
105 "or \'mangle\' tables, not \'%s\'.\n", par->table);
106 return -EINVAL;
107 }
108
109 return 0;
110}
111
112static struct xt_match rpfilter_mt_reg __read_mostly = {
113 .name = "rpfilter",
114 .family = NFPROTO_IPV6,
115 .checkentry = rpfilter_check,
116 .match = rpfilter_mt,
117 .matchsize = sizeof(struct xt_rpfilter_info),
118 .hooks = (1 << NF_INET_PRE_ROUTING),
119 .me = THIS_MODULE
120};
121
122static int __init rpfilter_mt_init(void)
123{
124 return xt_register_match(&rpfilter_mt_reg);
125}
126
127static void __exit rpfilter_mt_exit(void)
128{
129 xt_unregister_match(&rpfilter_mt_reg);
130}
131
132module_init(rpfilter_mt_init);
133module_exit(rpfilter_mt_exit);
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2c99b94eeca..d8488c50a8e 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -42,14 +42,14 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
42 const struct ipv6_rt_hdr *rh; 42 const struct ipv6_rt_hdr *rh;
43 const struct ip6t_rt *rtinfo = par->matchinfo; 43 const struct ip6t_rt *rtinfo = par->matchinfo;
44 unsigned int temp; 44 unsigned int temp;
45 unsigned int ptr = 0; 45 unsigned int ptr;
46 unsigned int hdrlen = 0; 46 unsigned int hdrlen = 0;
47 bool ret = false; 47 bool ret = false;
48 struct in6_addr _addr; 48 struct in6_addr _addr;
49 const struct in6_addr *ap; 49 const struct in6_addr *ap;
50 int err; 50 int err;
51 51
52 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL, NULL); 52 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
53 if (err < 0) { 53 if (err < 0) {
54 if (err != -ENOENT) 54 if (err != -ENOENT)
55 par->hotdrop = true; 55 par->hotdrop = true;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index beb5777d204..c9e37c8fd62 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -44,7 +44,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
44static struct nf_hook_ops *filter_ops __read_mostly; 44static struct nf_hook_ops *filter_ops __read_mostly;
45 45
46/* Default to forward because I got too much mail already. */ 46/* Default to forward because I got too much mail already. */
47static bool forward = true; 47static int forward = NF_ACCEPT;
48module_param(forward, bool, 0000); 48module_param(forward, bool, 0000);
49 49
50static int __net_init ip6table_filter_net_init(struct net *net) 50static int __net_init ip6table_filter_net_init(struct net *net)
@@ -56,12 +56,14 @@ static int __net_init ip6table_filter_net_init(struct net *net)
56 return -ENOMEM; 56 return -ENOMEM;
57 /* Entry 1 is the FORWARD hook */ 57 /* Entry 1 is the FORWARD hook */
58 ((struct ip6t_standard *)repl->entries)[1].target.verdict = 58 ((struct ip6t_standard *)repl->entries)[1].target.verdict =
59 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; 59 -forward - 1;
60 60
61 net->ipv6.ip6table_filter = 61 net->ipv6.ip6table_filter =
62 ip6t_register_table(net, &packet_filter, repl); 62 ip6t_register_table(net, &packet_filter, repl);
63 kfree(repl); 63 kfree(repl);
64 return PTR_RET(net->ipv6.ip6table_filter); 64 if (IS_ERR(net->ipv6.ip6table_filter))
65 return PTR_ERR(net->ipv6.ip6table_filter);
66 return 0;
65} 67}
66 68
67static void __net_exit ip6table_filter_net_exit(struct net *net) 69static void __net_exit ip6table_filter_net_exit(struct net *net)
@@ -78,6 +80,11 @@ static int __init ip6table_filter_init(void)
78{ 80{
79 int ret; 81 int ret;
80 82
83 if (forward < 0 || forward > NF_MAX_VERDICT) {
84 pr_err("iptables forward must be 0 or 1\n");
85 return -EINVAL;
86 }
87
81 ret = register_pernet_subsys(&ip6table_filter_net_ops); 88 ret = register_pernet_subsys(&ip6table_filter_net_ops);
82 if (ret < 0) 89 if (ret < 0)
83 return ret; 90 return ret;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 7431121b87d..00d19173db7 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,7 +42,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
42 /* root is playing with raw sockets. */ 42 /* root is playing with raw sockets. */
43 if (skb->len < sizeof(struct iphdr) || 43 if (skb->len < sizeof(struct iphdr) ||
44 ip_hdrlen(skb) < sizeof(struct iphdr)) { 44 ip_hdrlen(skb) < sizeof(struct iphdr)) {
45 net_warn_ratelimited("ip6t_hook: happy cracking\n"); 45 if (net_ratelimit())
46 pr_warning("ip6t_hook: happy cracking.\n");
46 return NF_ACCEPT; 47 return NF_ACCEPT;
47 } 48 }
48#endif 49#endif
@@ -97,7 +98,9 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
97 net->ipv6.ip6table_mangle = 98 net->ipv6.ip6table_mangle =
98 ip6t_register_table(net, &packet_mangler, repl); 99 ip6t_register_table(net, &packet_mangler, repl);
99 kfree(repl); 100 kfree(repl);
100 return PTR_RET(net->ipv6.ip6table_mangle); 101 if (IS_ERR(net->ipv6.ip6table_mangle))
102 return PTR_ERR(net->ipv6.ip6table_mangle);
103 return 0;
101} 104}
102 105
103static void __net_exit ip6table_mangle_net_exit(struct net *net) 106static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
deleted file mode 100644
index e0e788d25b1..00000000000
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ /dev/null
@@ -1,330 +0,0 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
9 * funded by Astaro.
10 */
11
12#include <linux/module.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter_ipv6.h>
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <linux/ipv6.h>
17#include <net/ipv6.h>
18
19#include <net/netfilter/nf_nat.h>
20#include <net/netfilter/nf_nat_core.h>
21#include <net/netfilter/nf_nat_l3proto.h>
22
23static const struct xt_table nf_nat_ipv6_table = {
24 .name = "nat",
25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
26 (1 << NF_INET_POST_ROUTING) |
27 (1 << NF_INET_LOCAL_OUT) |
28 (1 << NF_INET_LOCAL_IN),
29 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6,
31};
32
33static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
34{
35 /* Force range to this IP; let proto decide mapping for
36 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
37 */
38 struct nf_nat_range range;
39
40 range.flags = 0;
41 pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
42 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
43 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
44 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
45
46 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
47}
48
49static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
50 const struct net_device *in,
51 const struct net_device *out,
52 struct nf_conn *ct)
53{
54 struct net *net = nf_ct_net(ct);
55 unsigned int ret;
56
57 ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
58 if (ret == NF_ACCEPT) {
59 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
60 ret = alloc_null_binding(ct, hooknum);
61 }
62 return ret;
63}
64
65static unsigned int
66nf_nat_ipv6_fn(unsigned int hooknum,
67 struct sk_buff *skb,
68 const struct net_device *in,
69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *))
71{
72 struct nf_conn *ct;
73 enum ip_conntrack_info ctinfo;
74 struct nf_conn_nat *nat;
75 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
76 __be16 frag_off;
77 int hdrlen;
78 u8 nexthdr;
79
80 ct = nf_ct_get(skb, &ctinfo);
81 /* Can't track? It's not due to stress, or conntrack would
82 * have dropped it. Hence it's the user's responsibilty to
83 * packet filter it out, or implement conntrack/NAT for that
84 * protocol. 8) --RR
85 */
86 if (!ct)
87 return NF_ACCEPT;
88
89 /* Don't try to NAT if this packet is not conntracked */
90 if (nf_ct_is_untracked(ct))
91 return NF_ACCEPT;
92
93 nat = nfct_nat(ct);
94 if (!nat) {
95 /* NAT module was loaded late. */
96 if (nf_ct_is_confirmed(ct))
97 return NF_ACCEPT;
98 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
99 if (nat == NULL) {
100 pr_debug("failed to add NAT extension\n");
101 return NF_ACCEPT;
102 }
103 }
104
105 switch (ctinfo) {
106 case IP_CT_RELATED:
107 case IP_CT_RELATED_REPLY:
108 nexthdr = ipv6_hdr(skb)->nexthdr;
109 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
110 &nexthdr, &frag_off);
111
112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
114 hooknum, hdrlen))
115 return NF_DROP;
116 else
117 return NF_ACCEPT;
118 }
119 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
120 case IP_CT_NEW:
121 /* Seen it before? This can happen for loopback, retrans,
122 * or local packets.
123 */
124 if (!nf_nat_initialized(ct, maniptype)) {
125 unsigned int ret;
126
127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
128 if (ret != NF_ACCEPT)
129 return ret;
130 } else {
131 pr_debug("Already setup manip %s for ct %p\n",
132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
133 ct);
134 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
135 goto oif_changed;
136 }
137 break;
138
139 default:
140 /* ESTABLISHED */
141 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
142 ctinfo == IP_CT_ESTABLISHED_REPLY);
143 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
144 goto oif_changed;
145 }
146
147 return nf_nat_packet(ct, ctinfo, hooknum, skb);
148
149oif_changed:
150 nf_ct_kill_acct(ct, ctinfo, skb);
151 return NF_DROP;
152}
153
154static unsigned int
155nf_nat_ipv6_in(unsigned int hooknum,
156 struct sk_buff *skb,
157 const struct net_device *in,
158 const struct net_device *out,
159 int (*okfn)(struct sk_buff *))
160{
161 unsigned int ret;
162 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
163
164 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
165 if (ret != NF_DROP && ret != NF_STOLEN &&
166 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
167 skb_dst_drop(skb);
168
169 return ret;
170}
171
172static unsigned int
173nf_nat_ipv6_out(unsigned int hooknum,
174 struct sk_buff *skb,
175 const struct net_device *in,
176 const struct net_device *out,
177 int (*okfn)(struct sk_buff *))
178{
179#ifdef CONFIG_XFRM
180 const struct nf_conn *ct;
181 enum ip_conntrack_info ctinfo;
182#endif
183 unsigned int ret;
184
185 /* root is playing with raw sockets. */
186 if (skb->len < sizeof(struct ipv6hdr))
187 return NF_ACCEPT;
188
189 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
190#ifdef CONFIG_XFRM
191 if (ret != NF_DROP && ret != NF_STOLEN &&
192 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
193 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
194 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
195
196 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
197 &ct->tuplehash[!dir].tuple.dst.u3) ||
198 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
199 ct->tuplehash[dir].tuple.src.u.all !=
200 ct->tuplehash[!dir].tuple.dst.u.all))
201 if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
202 ret = NF_DROP;
203 }
204#endif
205 return ret;
206}
207
208static unsigned int
209nf_nat_ipv6_local_fn(unsigned int hooknum,
210 struct sk_buff *skb,
211 const struct net_device *in,
212 const struct net_device *out,
213 int (*okfn)(struct sk_buff *))
214{
215 const struct nf_conn *ct;
216 enum ip_conntrack_info ctinfo;
217 unsigned int ret;
218
219 /* root is playing with raw sockets. */
220 if (skb->len < sizeof(struct ipv6hdr))
221 return NF_ACCEPT;
222
223 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
224 if (ret != NF_DROP && ret != NF_STOLEN &&
225 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
226 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
227
228 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
229 &ct->tuplehash[!dir].tuple.src.u3)) {
230 if (ip6_route_me_harder(skb))
231 ret = NF_DROP;
232 }
233#ifdef CONFIG_XFRM
234 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
235 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
236 ct->tuplehash[dir].tuple.dst.u.all !=
237 ct->tuplehash[!dir].tuple.src.u.all)
238 if (nf_xfrm_me_harder(skb, AF_INET6))
239 ret = NF_DROP;
240#endif
241 }
242 return ret;
243}
244
245static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
246 /* Before packet filtering, change destination */
247 {
248 .hook = nf_nat_ipv6_in,
249 .owner = THIS_MODULE,
250 .pf = NFPROTO_IPV6,
251 .hooknum = NF_INET_PRE_ROUTING,
252 .priority = NF_IP6_PRI_NAT_DST,
253 },
254 /* After packet filtering, change source */
255 {
256 .hook = nf_nat_ipv6_out,
257 .owner = THIS_MODULE,
258 .pf = NFPROTO_IPV6,
259 .hooknum = NF_INET_POST_ROUTING,
260 .priority = NF_IP6_PRI_NAT_SRC,
261 },
262 /* Before packet filtering, change destination */
263 {
264 .hook = nf_nat_ipv6_local_fn,
265 .owner = THIS_MODULE,
266 .pf = NFPROTO_IPV6,
267 .hooknum = NF_INET_LOCAL_OUT,
268 .priority = NF_IP6_PRI_NAT_DST,
269 },
270 /* After packet filtering, change source */
271 {
272 .hook = nf_nat_ipv6_fn,
273 .owner = THIS_MODULE,
274 .pf = NFPROTO_IPV6,
275 .hooknum = NF_INET_LOCAL_IN,
276 .priority = NF_IP6_PRI_NAT_SRC,
277 },
278};
279
280static int __net_init ip6table_nat_net_init(struct net *net)
281{
282 struct ip6t_replace *repl;
283
284 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
285 if (repl == NULL)
286 return -ENOMEM;
287 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
288 kfree(repl);
289 return PTR_RET(net->ipv6.ip6table_nat);
290}
291
292static void __net_exit ip6table_nat_net_exit(struct net *net)
293{
294 ip6t_unregister_table(net, net->ipv6.ip6table_nat);
295}
296
297static struct pernet_operations ip6table_nat_net_ops = {
298 .init = ip6table_nat_net_init,
299 .exit = ip6table_nat_net_exit,
300};
301
302static int __init ip6table_nat_init(void)
303{
304 int err;
305
306 err = register_pernet_subsys(&ip6table_nat_net_ops);
307 if (err < 0)
308 goto err1;
309
310 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
311 if (err < 0)
312 goto err2;
313 return 0;
314
315err2:
316 unregister_pernet_subsys(&ip6table_nat_net_ops);
317err1:
318 return err;
319}
320
321static void __exit ip6table_nat_exit(void)
322{
323 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
324 unregister_pernet_subsys(&ip6table_nat_net_ops);
325}
326
327module_init(ip6table_nat_init);
328module_exit(ip6table_nat_exit);
329
330MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 60d1bddff7a..5b9926a011b 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,9 @@ static int __net_init ip6table_raw_net_init(struct net *net)
40 net->ipv6.ip6table_raw = 40 net->ipv6.ip6table_raw =
41 ip6t_register_table(net, &packet_raw, repl); 41 ip6t_register_table(net, &packet_raw, repl);
42 kfree(repl); 42 kfree(repl);
43 return PTR_RET(net->ipv6.ip6table_raw); 43 if (IS_ERR(net->ipv6.ip6table_raw))
44 return PTR_ERR(net->ipv6.ip6table_raw);
45 return 0;
44} 46}
45 47
46static void __net_exit ip6table_raw_net_exit(struct net *net) 48static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index db155351339..91aa2b4d83c 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,7 +58,10 @@ static int __net_init ip6table_security_net_init(struct net *net)
58 net->ipv6.ip6table_security = 58 net->ipv6.ip6table_security =
59 ip6t_register_table(net, &security_table, repl); 59 ip6t_register_table(net, &security_table, repl);
60 kfree(repl); 60 kfree(repl);
61 return PTR_RET(net->ipv6.ip6table_security); 61 if (IS_ERR(net->ipv6.ip6table_security))
62 return PTR_ERR(net->ipv6.ip6table_security);
63
64 return 0;
62} 65}
63 66
64static void __net_exit ip6table_security_net_exit(struct net *net) 67static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 137e245860a..4111050a9fc 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -21,7 +21,6 @@
21 21
22#include <linux/netfilter_bridge.h> 22#include <linux/netfilter_bridge.h>
23#include <linux/netfilter_ipv6.h> 23#include <linux/netfilter_ipv6.h>
24#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/nf_conntrack.h> 24#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_helper.h> 25#include <net/netfilter/nf_conntrack_helper.h>
27#include <net/netfilter/nf_conntrack_l4proto.h> 26#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -29,7 +28,6 @@
29#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_zones.h> 29#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
32#include <net/netfilter/nf_nat_helper.h>
33#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 31#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
34#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
35 33
@@ -66,112 +64,125 @@ static int ipv6_print_tuple(struct seq_file *s,
66 tuple->src.u3.ip6, tuple->dst.u3.ip6); 64 tuple->src.u3.ip6, tuple->dst.u3.ip6);
67} 65}
68 66
67/*
68 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
69 *
70 * This function parses (probably truncated) exthdr set "hdr"
71 * of length "len". "nexthdrp" initially points to some place,
72 * where type of the first header can be found.
73 *
74 * It skips all well-known exthdrs, and returns pointer to the start
75 * of unparsable area i.e. the first header with unknown type.
76 * if success, *nexthdr is updated by type/protocol of this header.
77 *
78 * NOTES: - it may return pointer pointing beyond end of packet,
79 * if the last recognized header is truncated in the middle.
80 * - if packet is truncated, so that all parsed headers are skipped,
81 * it returns -1.
82 * - if packet is fragmented, return pointer of the fragment header.
83 * - ESP is unparsable for now and considered like
84 * normal payload protocol.
85 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
86 */
87
88static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
89 u8 *nexthdrp, int len)
90{
91 u8 nexthdr = *nexthdrp;
92
93 while (ipv6_ext_hdr(nexthdr)) {
94 struct ipv6_opt_hdr hdr;
95 int hdrlen;
96
97 if (len < (int)sizeof(struct ipv6_opt_hdr))
98 return -1;
99 if (nexthdr == NEXTHDR_NONE)
100 break;
101 if (nexthdr == NEXTHDR_FRAGMENT)
102 break;
103 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
104 BUG();
105 if (nexthdr == NEXTHDR_AUTH)
106 hdrlen = (hdr.hdrlen+2)<<2;
107 else
108 hdrlen = ipv6_optlen(&hdr);
109
110 nexthdr = hdr.nexthdr;
111 len -= hdrlen;
112 start += hdrlen;
113 }
114
115 *nexthdrp = nexthdr;
116 return start;
117}
118
69static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 119static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
70 unsigned int *dataoff, u_int8_t *protonum) 120 unsigned int *dataoff, u_int8_t *protonum)
71{ 121{
72 unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 122 unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
73 __be16 frag_off; 123 unsigned char pnum;
74 int protoff; 124 int protoff;
75 u8 nexthdr;
76 125
77 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 126 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
78 &nexthdr, sizeof(nexthdr)) != 0) { 127 &pnum, sizeof(pnum)) != 0) {
79 pr_debug("ip6_conntrack_core: can't get nexthdr\n"); 128 pr_debug("ip6_conntrack_core: can't get nexthdr\n");
80 return -NF_ACCEPT; 129 return -NF_ACCEPT;
81 } 130 }
82 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 131 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff);
83 /* 132 /*
84 * (protoff == skb->len) means the packet has not data, just 133 * (protoff == skb->len) mean that the packet doesn't have no data
85 * IPv6 and possibly extensions headers, but it is tracked anyway 134 * except of IPv6 & ext headers. but it's tracked anyway. - YK
86 */ 135 */
87 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 136 if ((protoff < 0) || (protoff > skb->len)) {
88 pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); 137 pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
89 return -NF_ACCEPT; 138 return -NF_ACCEPT;
90 } 139 }
91 140
92 *dataoff = protoff; 141 *dataoff = protoff;
93 *protonum = nexthdr; 142 *protonum = pnum;
94 return NF_ACCEPT; 143 return NF_ACCEPT;
95} 144}
96 145
97static unsigned int ipv6_helper(unsigned int hooknum, 146static unsigned int ipv6_confirm(unsigned int hooknum,
98 struct sk_buff *skb, 147 struct sk_buff *skb,
99 const struct net_device *in, 148 const struct net_device *in,
100 const struct net_device *out, 149 const struct net_device *out,
101 int (*okfn)(struct sk_buff *)) 150 int (*okfn)(struct sk_buff *))
102{ 151{
103 struct nf_conn *ct; 152 struct nf_conn *ct;
104 const struct nf_conn_help *help; 153 const struct nf_conn_help *help;
105 const struct nf_conntrack_helper *helper; 154 const struct nf_conntrack_helper *helper;
106 enum ip_conntrack_info ctinfo; 155 enum ip_conntrack_info ctinfo;
107 unsigned int ret; 156 unsigned int ret, protoff;
108 __be16 frag_off; 157 unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
109 int protoff; 158 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
110 u8 nexthdr; 159
111 160
112 /* This is where we call the helper: as the packet goes out. */ 161 /* This is where we call the helper: as the packet goes out. */
113 ct = nf_ct_get(skb, &ctinfo); 162 ct = nf_ct_get(skb, &ctinfo);
114 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 163 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
115 return NF_ACCEPT; 164 goto out;
116 165
117 help = nfct_help(ct); 166 help = nfct_help(ct);
118 if (!help) 167 if (!help)
119 return NF_ACCEPT; 168 goto out;
120 /* rcu_read_lock()ed by nf_hook_slow */ 169 /* rcu_read_lock()ed by nf_hook_slow */
121 helper = rcu_dereference(help->helper); 170 helper = rcu_dereference(help->helper);
122 if (!helper) 171 if (!helper)
123 return NF_ACCEPT; 172 goto out;
124 173
125 nexthdr = ipv6_hdr(skb)->nexthdr; 174 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
126 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 175 skb->len - extoff);
127 &frag_off); 176 if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
128 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
129 pr_debug("proto header not found\n"); 177 pr_debug("proto header not found\n");
130 return NF_ACCEPT; 178 return NF_ACCEPT;
131 } 179 }
132 180
133 ret = helper->help(skb, protoff, ct, ctinfo); 181 ret = helper->help(skb, protoff, ct, ctinfo);
134 if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { 182 if (ret != NF_ACCEPT) {
135 nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, 183 nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
136 "nf_ct_%s: dropping packet", helper->name); 184 "nf_ct_%s: dropping packet", helper->name);
137 } 185 return ret;
138 return ret;
139}
140
141static unsigned int ipv6_confirm(unsigned int hooknum,
142 struct sk_buff *skb,
143 const struct net_device *in,
144 const struct net_device *out,
145 int (*okfn)(struct sk_buff *))
146{
147 struct nf_conn *ct;
148 enum ip_conntrack_info ctinfo;
149 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
150 int protoff;
151 __be16 frag_off;
152
153 ct = nf_ct_get(skb, &ctinfo);
154 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
155 goto out;
156
157 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
158 &frag_off);
159 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
160 pr_debug("proto header not found\n");
161 goto out;
162 }
163
164 /* adjust seqs for loopback traffic only in outgoing direction */
165 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
166 !nf_is_loopback_packet(skb)) {
167 typeof(nf_nat_seq_adjust_hook) seq_adjust;
168
169 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
170 if (!seq_adjust ||
171 !seq_adjust(skb, ct, ctinfo, protoff)) {
172 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
173 return NF_DROP;
174 }
175 } 186 }
176out: 187out:
177 /* We've seen it coming out the other side: confirm it */ 188 /* We've seen it coming out the other side: confirm it */
@@ -181,14 +192,9 @@ out:
181static unsigned int __ipv6_conntrack_in(struct net *net, 192static unsigned int __ipv6_conntrack_in(struct net *net,
182 unsigned int hooknum, 193 unsigned int hooknum,
183 struct sk_buff *skb, 194 struct sk_buff *skb,
184 const struct net_device *in,
185 const struct net_device *out,
186 int (*okfn)(struct sk_buff *)) 195 int (*okfn)(struct sk_buff *))
187{ 196{
188 struct sk_buff *reasm = skb->nfct_reasm; 197 struct sk_buff *reasm = skb->nfct_reasm;
189 const struct nf_conn_help *help;
190 struct nf_conn *ct;
191 enum ip_conntrack_info ctinfo;
192 198
193 /* This packet is fragmented and has reassembled packet. */ 199 /* This packet is fragmented and has reassembled packet. */
194 if (reasm) { 200 if (reasm) {
@@ -200,25 +206,6 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
200 if (ret != NF_ACCEPT) 206 if (ret != NF_ACCEPT)
201 return ret; 207 return ret;
202 } 208 }
203
204 /* Conntrack helpers need the entire reassembled packet in the
205 * POST_ROUTING hook. In case of unconfirmed connections NAT
206 * might reassign a helper, so the entire packet is also
207 * required.
208 */
209 ct = nf_ct_get(reasm, &ctinfo);
210 if (ct != NULL && !nf_ct_is_untracked(ct)) {
211 help = nfct_help(ct);
212 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
213 nf_conntrack_get_reasm(skb);
214 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
215 (struct net_device *)in,
216 (struct net_device *)out,
217 okfn, NF_IP6_PRI_CONNTRACK + 1);
218 return NF_DROP_ERR(-ECANCELED);
219 }
220 }
221
222 nf_conntrack_get(reasm->nfct); 209 nf_conntrack_get(reasm->nfct);
223 skb->nfct = reasm->nfct; 210 skb->nfct = reasm->nfct;
224 skb->nfctinfo = reasm->nfctinfo; 211 skb->nfctinfo = reasm->nfctinfo;
@@ -234,7 +221,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
234 const struct net_device *out, 221 const struct net_device *out,
235 int (*okfn)(struct sk_buff *)) 222 int (*okfn)(struct sk_buff *))
236{ 223{
237 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); 224 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
238} 225}
239 226
240static unsigned int ipv6_conntrack_local(unsigned int hooknum, 227static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -245,10 +232,11 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
245{ 232{
246 /* root is playing with raw sockets. */ 233 /* root is playing with raw sockets. */
247 if (skb->len < sizeof(struct ipv6hdr)) { 234 if (skb->len < sizeof(struct ipv6hdr)) {
248 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 235 if (net_ratelimit())
236 pr_notice("ipv6_conntrack_local: packet too short\n");
249 return NF_ACCEPT; 237 return NF_ACCEPT;
250 } 238 }
251 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); 239 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
252} 240}
253 241
254static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 242static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
@@ -267,13 +255,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
267 .priority = NF_IP6_PRI_CONNTRACK, 255 .priority = NF_IP6_PRI_CONNTRACK,
268 }, 256 },
269 { 257 {
270 .hook = ipv6_helper,
271 .owner = THIS_MODULE,
272 .pf = NFPROTO_IPV6,
273 .hooknum = NF_INET_POST_ROUTING,
274 .priority = NF_IP6_PRI_CONNTRACK_HELPER,
275 },
276 {
277 .hook = ipv6_confirm, 258 .hook = ipv6_confirm,
278 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
279 .pf = NFPROTO_IPV6, 260 .pf = NFPROTO_IPV6,
@@ -281,13 +262,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
281 .priority = NF_IP6_PRI_LAST, 262 .priority = NF_IP6_PRI_LAST,
282 }, 263 },
283 { 264 {
284 .hook = ipv6_helper,
285 .owner = THIS_MODULE,
286 .pf = NFPROTO_IPV6,
287 .hooknum = NF_INET_LOCAL_IN,
288 .priority = NF_IP6_PRI_CONNTRACK_HELPER,
289 },
290 {
291 .hook = ipv6_confirm, 265 .hook = ipv6_confirm,
292 .owner = THIS_MODULE, 266 .owner = THIS_MODULE,
293 .pf = NFPROTO_IPV6, 267 .pf = NFPROTO_IPV6,
@@ -296,56 +270,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
296 }, 270 },
297}; 271};
298 272
299static int 273#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
300ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
301{
302 const struct inet_sock *inet = inet_sk(sk);
303 const struct ipv6_pinfo *inet6 = inet6_sk(sk);
304 const struct nf_conntrack_tuple_hash *h;
305 struct sockaddr_in6 sin6;
306 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
307 struct nf_conn *ct;
308
309 tuple.src.u3.in6 = inet6->rcv_saddr;
310 tuple.src.u.tcp.port = inet->inet_sport;
311 tuple.dst.u3.in6 = inet6->daddr;
312 tuple.dst.u.tcp.port = inet->inet_dport;
313 tuple.dst.protonum = sk->sk_protocol;
314
315 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
316 return -ENOPROTOOPT;
317
318 if (*len < 0 || (unsigned int) *len < sizeof(sin6))
319 return -EINVAL;
320
321 h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
322 if (!h) {
323 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
324 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
325 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
326 return -ENOENT;
327 }
328
329 ct = nf_ct_tuplehash_to_ctrack(h);
330
331 sin6.sin6_family = AF_INET6;
332 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
333 sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
334 memcpy(&sin6.sin6_addr,
335 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
336 sizeof(sin6.sin6_addr));
337
338 nf_ct_put(ct);
339
340 if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
341 sin6.sin6_scope_id = sk->sk_bound_dev_if;
342 else
343 sin6.sin6_scope_id = 0;
344
345 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
346}
347
348#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
349 274
350#include <linux/netfilter/nfnetlink.h> 275#include <linux/netfilter/nfnetlink.h>
351#include <linux/netfilter/nfnetlink_conntrack.h> 276#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -353,11 +278,10 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
353static int ipv6_tuple_to_nlattr(struct sk_buff *skb, 278static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
354 const struct nf_conntrack_tuple *tuple) 279 const struct nf_conntrack_tuple *tuple)
355{ 280{
356 if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, 281 NLA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
357 &tuple->src.u3.ip6) || 282 &tuple->src.u3.ip6);
358 nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, 283 NLA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
359 &tuple->dst.u3.ip6)) 284 &tuple->dst.u3.ip6);
360 goto nla_put_failure;
361 return 0; 285 return 0;
362 286
363nla_put_failure: 287nla_put_failure:
@@ -396,7 +320,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
396 .invert_tuple = ipv6_invert_tuple, 320 .invert_tuple = ipv6_invert_tuple,
397 .print_tuple = ipv6_print_tuple, 321 .print_tuple = ipv6_print_tuple,
398 .get_l4proto = ipv6_get_l4proto, 322 .get_l4proto = ipv6_get_l4proto,
399#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 323#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
400 .tuple_to_nlattr = ipv6_tuple_to_nlattr, 324 .tuple_to_nlattr = ipv6_tuple_to_nlattr,
401 .nlattr_tuple_size = ipv6_nlattr_tuple_size, 325 .nlattr_tuple_size = ipv6_nlattr_tuple_size,
402 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 326 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
@@ -409,89 +333,37 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
409MODULE_LICENSE("GPL"); 333MODULE_LICENSE("GPL");
410MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); 334MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
411 335
412static struct nf_sockopt_ops so_getorigdst6 = { 336static int __init nf_conntrack_l3proto_ipv6_init(void)
413 .pf = NFPROTO_IPV6,
414 .get_optmin = IP6T_SO_ORIGINAL_DST,
415 .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
416 .get = ipv6_getorigdst,
417 .owner = THIS_MODULE,
418};
419
420static int ipv6_net_init(struct net *net)
421{ 337{
422 int ret = 0; 338 int ret = 0;
423 339
424 ret = nf_conntrack_l4proto_register(net, 340 need_conntrack();
425 &nf_conntrack_l4proto_tcp6); 341 nf_defrag_ipv6_enable();
426 if (ret < 0) { 342
427 printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); 343 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
428 goto out;
429 }
430 ret = nf_conntrack_l4proto_register(net,
431 &nf_conntrack_l4proto_udp6);
432 if (ret < 0) { 344 if (ret < 0) {
433 printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); 345 pr_err("nf_conntrack_ipv6: can't register tcp.\n");
434 goto cleanup_tcp6; 346 return ret;
435 } 347 }
436 ret = nf_conntrack_l4proto_register(net, 348
437 &nf_conntrack_l4proto_icmpv6); 349 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
438 if (ret < 0) { 350 if (ret < 0) {
439 printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); 351 pr_err("nf_conntrack_ipv6: can't register udp.\n");
440 goto cleanup_udp6; 352 goto cleanup_tcp;
441 } 353 }
442 ret = nf_conntrack_l3proto_register(net, 354
443 &nf_conntrack_l3proto_ipv6); 355 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
444 if (ret < 0) { 356 if (ret < 0) {
445 printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); 357 pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
446 goto cleanup_icmpv6; 358 goto cleanup_udp;
447 } 359 }
448 return 0;
449 cleanup_icmpv6:
450 nf_conntrack_l4proto_unregister(net,
451 &nf_conntrack_l4proto_icmpv6);
452 cleanup_udp6:
453 nf_conntrack_l4proto_unregister(net,
454 &nf_conntrack_l4proto_udp6);
455 cleanup_tcp6:
456 nf_conntrack_l4proto_unregister(net,
457 &nf_conntrack_l4proto_tcp6);
458 out:
459 return ret;
460}
461
462static void ipv6_net_exit(struct net *net)
463{
464 nf_conntrack_l3proto_unregister(net,
465 &nf_conntrack_l3proto_ipv6);
466 nf_conntrack_l4proto_unregister(net,
467 &nf_conntrack_l4proto_icmpv6);
468 nf_conntrack_l4proto_unregister(net,
469 &nf_conntrack_l4proto_udp6);
470 nf_conntrack_l4proto_unregister(net,
471 &nf_conntrack_l4proto_tcp6);
472}
473
474static struct pernet_operations ipv6_net_ops = {
475 .init = ipv6_net_init,
476 .exit = ipv6_net_exit,
477};
478
479static int __init nf_conntrack_l3proto_ipv6_init(void)
480{
481 int ret = 0;
482
483 need_conntrack();
484 nf_defrag_ipv6_enable();
485 360
486 ret = nf_register_sockopt(&so_getorigdst6); 361 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
487 if (ret < 0) { 362 if (ret < 0) {
488 pr_err("Unable to register netfilter socket option\n"); 363 pr_err("nf_conntrack_ipv6: can't register ipv6\n");
489 return ret; 364 goto cleanup_icmpv6;
490 } 365 }
491 366
492 ret = register_pernet_subsys(&ipv6_net_ops);
493 if (ret < 0)
494 goto cleanup_pernet;
495 ret = nf_register_hooks(ipv6_conntrack_ops, 367 ret = nf_register_hooks(ipv6_conntrack_ops,
496 ARRAY_SIZE(ipv6_conntrack_ops)); 368 ARRAY_SIZE(ipv6_conntrack_ops));
497 if (ret < 0) { 369 if (ret < 0) {
@@ -502,9 +374,13 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
502 return ret; 374 return ret;
503 375
504 cleanup_ipv6: 376 cleanup_ipv6:
505 unregister_pernet_subsys(&ipv6_net_ops); 377 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
506 cleanup_pernet: 378 cleanup_icmpv6:
507 nf_unregister_sockopt(&so_getorigdst6); 379 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
380 cleanup_udp:
381 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
382 cleanup_tcp:
383 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
508 return ret; 384 return ret;
509} 385}
510 386
@@ -512,8 +388,10 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
512{ 388{
513 synchronize_net(); 389 synchronize_net();
514 nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); 390 nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
515 unregister_pernet_subsys(&ipv6_net_ops); 391 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
516 nf_unregister_sockopt(&so_getorigdst6); 392 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
393 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
394 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
517} 395}
518 396
519module_init(nf_conntrack_l3proto_ipv6_init); 397module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 24df3dde007..7c05e7eacbc 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -29,11 +29,6 @@
29 29
30static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; 30static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
31 31
32static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
33{
34 return &net->ct.nf_ct_proto.icmpv6;
35}
36
37static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, 32static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
38 unsigned int dataoff, 33 unsigned int dataoff,
39 struct nf_conntrack_tuple *tuple) 34 struct nf_conntrack_tuple *tuple)
@@ -93,31 +88,25 @@ static int icmpv6_print_tuple(struct seq_file *s,
93 ntohs(tuple->src.u.icmp.id)); 88 ntohs(tuple->src.u.icmp.id));
94} 89}
95 90
96static unsigned int *icmpv6_get_timeouts(struct net *net)
97{
98 return &icmpv6_pernet(net)->timeout;
99}
100
101/* Returns verdict for packet, or -1 for invalid. */ 91/* Returns verdict for packet, or -1 for invalid. */
102static int icmpv6_packet(struct nf_conn *ct, 92static int icmpv6_packet(struct nf_conn *ct,
103 const struct sk_buff *skb, 93 const struct sk_buff *skb,
104 unsigned int dataoff, 94 unsigned int dataoff,
105 enum ip_conntrack_info ctinfo, 95 enum ip_conntrack_info ctinfo,
106 u_int8_t pf, 96 u_int8_t pf,
107 unsigned int hooknum, 97 unsigned int hooknum)
108 unsigned int *timeout)
109{ 98{
110 /* Do not immediately delete the connection after the first 99 /* Do not immediately delete the connection after the first
111 successful reply to avoid excessive conntrackd traffic 100 successful reply to avoid excessive conntrackd traffic
112 and also to handle correctly ICMP echo reply duplicates. */ 101 and also to handle correctly ICMP echo reply duplicates. */
113 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 102 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
114 103
115 return NF_ACCEPT; 104 return NF_ACCEPT;
116} 105}
117 106
118/* Called when a new connection for this protocol found. */ 107/* Called when a new connection for this protocol found. */
119static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, 108static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
120 unsigned int dataoff, unsigned int *timeouts) 109 unsigned int dataoff)
121{ 110{
122 static const u_int8_t valid_new[] = { 111 static const u_int8_t valid_new[] = {
123 [ICMPV6_ECHO_REQUEST - 128] = 1, 112 [ICMPV6_ECHO_REQUEST - 128] = 1,
@@ -232,17 +221,17 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
232 return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); 221 return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
233} 222}
234 223
235#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 224#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
236 225
237#include <linux/netfilter/nfnetlink.h> 226#include <linux/netfilter/nfnetlink.h>
238#include <linux/netfilter/nfnetlink_conntrack.h> 227#include <linux/netfilter/nfnetlink_conntrack.h>
239static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, 228static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
240 const struct nf_conntrack_tuple *t) 229 const struct nf_conntrack_tuple *t)
241{ 230{
242 if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || 231 NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id);
243 nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || 232 NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type);
244 nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) 233 NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code);
245 goto nla_put_failure; 234
246 return 0; 235 return 0;
247 236
248nla_put_failure: 237nla_put_failure:
@@ -281,50 +270,12 @@ static int icmpv6_nlattr_tuple_size(void)
281} 270}
282#endif 271#endif
283 272
284#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
285
286#include <linux/netfilter/nfnetlink.h>
287#include <linux/netfilter/nfnetlink_cttimeout.h>
288
289static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
290 struct net *net, void *data)
291{
292 unsigned int *timeout = data;
293 struct nf_icmp_net *in = icmpv6_pernet(net);
294
295 if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
296 *timeout =
297 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
298 } else {
299 /* Set default ICMPv6 timeout. */
300 *timeout = in->timeout;
301 }
302 return 0;
303}
304
305static int
306icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
307{
308 const unsigned int *timeout = data;
309
310 if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
311 goto nla_put_failure;
312 return 0;
313
314nla_put_failure:
315 return -ENOSPC;
316}
317
318static const struct nla_policy
319icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
320 [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 },
321};
322#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
323
324#ifdef CONFIG_SYSCTL 273#ifdef CONFIG_SYSCTL
274static struct ctl_table_header *icmpv6_sysctl_header;
325static struct ctl_table icmpv6_sysctl_table[] = { 275static struct ctl_table icmpv6_sysctl_table[] = {
326 { 276 {
327 .procname = "nf_conntrack_icmpv6_timeout", 277 .procname = "nf_conntrack_icmpv6_timeout",
278 .data = &nf_ct_icmpv6_timeout,
328 .maxlen = sizeof(unsigned int), 279 .maxlen = sizeof(unsigned int),
329 .mode = 0644, 280 .mode = 0644,
330 .proc_handler = proc_dointvec_jiffies, 281 .proc_handler = proc_dointvec_jiffies,
@@ -333,36 +284,6 @@ static struct ctl_table icmpv6_sysctl_table[] = {
333}; 284};
334#endif /* CONFIG_SYSCTL */ 285#endif /* CONFIG_SYSCTL */
335 286
336static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
337 struct nf_icmp_net *in)
338{
339#ifdef CONFIG_SYSCTL
340 pn->ctl_table = kmemdup(icmpv6_sysctl_table,
341 sizeof(icmpv6_sysctl_table),
342 GFP_KERNEL);
343 if (!pn->ctl_table)
344 return -ENOMEM;
345
346 pn->ctl_table[0].data = &in->timeout;
347#endif
348 return 0;
349}
350
351static int icmpv6_init_net(struct net *net, u_int16_t proto)
352{
353 struct nf_icmp_net *in = icmpv6_pernet(net);
354 struct nf_proto_net *pn = &in->pn;
355
356 in->timeout = nf_ct_icmpv6_timeout;
357
358 return icmpv6_kmemdup_sysctl_table(pn, in);
359}
360
361static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
362{
363 return &net->ct.nf_ct_proto.icmpv6.pn;
364}
365
366struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = 287struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
367{ 288{
368 .l3proto = PF_INET6, 289 .l3proto = PF_INET6,
@@ -372,24 +293,16 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
372 .invert_tuple = icmpv6_invert_tuple, 293 .invert_tuple = icmpv6_invert_tuple,
373 .print_tuple = icmpv6_print_tuple, 294 .print_tuple = icmpv6_print_tuple,
374 .packet = icmpv6_packet, 295 .packet = icmpv6_packet,
375 .get_timeouts = icmpv6_get_timeouts,
376 .new = icmpv6_new, 296 .new = icmpv6_new,
377 .error = icmpv6_error, 297 .error = icmpv6_error,
378#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 298#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
379 .tuple_to_nlattr = icmpv6_tuple_to_nlattr, 299 .tuple_to_nlattr = icmpv6_tuple_to_nlattr,
380 .nlattr_tuple_size = icmpv6_nlattr_tuple_size, 300 .nlattr_tuple_size = icmpv6_nlattr_tuple_size,
381 .nlattr_to_tuple = icmpv6_nlattr_to_tuple, 301 .nlattr_to_tuple = icmpv6_nlattr_to_tuple,
382 .nla_policy = icmpv6_nla_policy, 302 .nla_policy = icmpv6_nla_policy,
383#endif 303#endif
384#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 304#ifdef CONFIG_SYSCTL
385 .ctnl_timeout = { 305 .ctl_table_header = &icmpv6_sysctl_header,
386 .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, 306 .ctl_table = icmpv6_sysctl_table,
387 .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, 307#endif
388 .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
389 .obj_size = sizeof(unsigned int),
390 .nla_policy = icmpv6_timeout_nla_policy,
391 },
392#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
393 .init_net = icmpv6_init_net,
394 .get_net_proto = icmpv6_get_net_proto,
395}; 308};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3dacecc9906..08572726381 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -57,27 +57,41 @@ struct nf_ct_frag6_skb_cb
57 57
58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) 58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
59 59
60struct nf_ct_frag6_queue
61{
62 struct inet_frag_queue q;
63
64 __be32 id; /* fragment id */
65 u32 user;
66 struct in6_addr saddr;
67 struct in6_addr daddr;
68
69 unsigned int csum;
70 __u16 nhoffset;
71};
72
60static struct inet_frags nf_frags; 73static struct inet_frags nf_frags;
74static struct netns_frags nf_init_frags;
61 75
62#ifdef CONFIG_SYSCTL 76#ifdef CONFIG_SYSCTL
63static struct ctl_table nf_ct_frag6_sysctl_table[] = { 77static struct ctl_table nf_ct_frag6_sysctl_table[] = {
64 { 78 {
65 .procname = "nf_conntrack_frag6_timeout", 79 .procname = "nf_conntrack_frag6_timeout",
66 .data = &init_net.nf_frag.frags.timeout, 80 .data = &nf_init_frags.timeout,
67 .maxlen = sizeof(unsigned int), 81 .maxlen = sizeof(unsigned int),
68 .mode = 0644, 82 .mode = 0644,
69 .proc_handler = proc_dointvec_jiffies, 83 .proc_handler = proc_dointvec_jiffies,
70 }, 84 },
71 { 85 {
72 .procname = "nf_conntrack_frag6_low_thresh", 86 .procname = "nf_conntrack_frag6_low_thresh",
73 .data = &init_net.nf_frag.frags.low_thresh, 87 .data = &nf_init_frags.low_thresh,
74 .maxlen = sizeof(unsigned int), 88 .maxlen = sizeof(unsigned int),
75 .mode = 0644, 89 .mode = 0644,
76 .proc_handler = proc_dointvec, 90 .proc_handler = proc_dointvec,
77 }, 91 },
78 { 92 {
79 .procname = "nf_conntrack_frag6_high_thresh", 93 .procname = "nf_conntrack_frag6_high_thresh",
80 .data = &init_net.nf_frag.frags.high_thresh, 94 .data = &nf_init_frags.high_thresh,
81 .maxlen = sizeof(unsigned int), 95 .maxlen = sizeof(unsigned int),
82 .mode = 0644, 96 .mode = 0644,
83 .proc_handler = proc_dointvec, 97 .proc_handler = proc_dointvec,
@@ -85,86 +99,68 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
85 { } 99 { }
86}; 100};
87 101
88static int nf_ct_frag6_sysctl_register(struct net *net) 102static struct ctl_table_header *nf_ct_frag6_sysctl_header;
89{ 103#endif
90 struct ctl_table *table;
91 struct ctl_table_header *hdr;
92
93 table = nf_ct_frag6_sysctl_table;
94 if (!net_eq(net, &init_net)) {
95 table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
96 GFP_KERNEL);
97 if (table == NULL)
98 goto err_alloc;
99
100 table[0].data = &net->ipv6.frags.high_thresh;
101 table[1].data = &net->ipv6.frags.low_thresh;
102 table[2].data = &net->ipv6.frags.timeout;
103 }
104
105 hdr = register_net_sysctl(net, "net/netfilter", table);
106 if (hdr == NULL)
107 goto err_reg;
108
109 net->nf_frag.sysctl.frags_hdr = hdr;
110 return 0;
111
112err_reg:
113 if (!net_eq(net, &init_net))
114 kfree(table);
115err_alloc:
116 return -ENOMEM;
117}
118 104
119static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) 105static unsigned int nf_hashfn(struct inet_frag_queue *q)
120{ 106{
121 struct ctl_table *table; 107 const struct nf_ct_frag6_queue *nq;
122 108
123 table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; 109 nq = container_of(q, struct nf_ct_frag6_queue, q);
124 unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); 110 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
125 if (!net_eq(net, &init_net))
126 kfree(table);
127} 111}
128 112
129#else 113static void nf_skb_free(struct sk_buff *skb)
130static int nf_ct_frag6_sysctl_register(struct net *net)
131{ 114{
132 return 0; 115 if (NFCT_FRAG6_CB(skb)->orig)
116 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
133} 117}
134static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) 118
119/* Destruction primitives. */
120
121static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
135{ 122{
123 inet_frag_put(&fq->q, &nf_frags);
136} 124}
137#endif
138 125
139static unsigned int nf_hashfn(struct inet_frag_queue *q) 126/* Kill fq entry. It is not destroyed immediately,
127 * because caller (and someone more) holds reference count.
128 */
129static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
140{ 130{
141 const struct frag_queue *nq; 131 inet_frag_kill(&fq->q, &nf_frags);
142
143 nq = container_of(q, struct frag_queue, q);
144 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
145} 132}
146 133
147static void nf_skb_free(struct sk_buff *skb) 134static void nf_ct_frag6_evictor(void)
148{ 135{
149 if (NFCT_FRAG6_CB(skb)->orig) 136 local_bh_disable();
150 kfree_skb(NFCT_FRAG6_CB(skb)->orig); 137 inet_frag_evictor(&nf_init_frags, &nf_frags);
138 local_bh_enable();
151} 139}
152 140
153static void nf_ct_frag6_expire(unsigned long data) 141static void nf_ct_frag6_expire(unsigned long data)
154{ 142{
155 struct frag_queue *fq; 143 struct nf_ct_frag6_queue *fq;
156 struct net *net;
157 144
158 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); 145 fq = container_of((struct inet_frag_queue *)data,
159 net = container_of(fq->q.net, struct net, nf_frag.frags); 146 struct nf_ct_frag6_queue, q);
160 147
161 ip6_expire_frag_queue(net, fq, &nf_frags); 148 spin_lock(&fq->q.lock);
149
150 if (fq->q.last_in & INET_FRAG_COMPLETE)
151 goto out;
152
153 fq_kill(fq);
154
155out:
156 spin_unlock(&fq->q.lock);
157 fq_put(fq);
162} 158}
163 159
164/* Creation primitives. */ 160/* Creation primitives. */
165static inline struct frag_queue *fq_find(struct net *net, __be32 id, 161
166 u32 user, struct in6_addr *src, 162static __inline__ struct nf_ct_frag6_queue *
167 struct in6_addr *dst) 163fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
168{ 164{
169 struct inet_frag_queue *q; 165 struct inet_frag_queue *q;
170 struct ip6_create_arg arg; 166 struct ip6_create_arg arg;
@@ -178,23 +174,23 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
178 read_lock_bh(&nf_frags.lock); 174 read_lock_bh(&nf_frags.lock);
179 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); 175 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
180 176
181 q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); 177 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
182 local_bh_enable(); 178 local_bh_enable();
183 if (q == NULL) 179 if (q == NULL)
184 goto oom; 180 goto oom;
185 181
186 return container_of(q, struct frag_queue, q); 182 return container_of(q, struct nf_ct_frag6_queue, q);
187 183
188oom: 184oom:
185 pr_debug("Can't alloc new queue\n");
189 return NULL; 186 return NULL;
190} 187}
191 188
192 189
193static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, 190static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
194 const struct frag_hdr *fhdr, int nhoff) 191 const struct frag_hdr *fhdr, int nhoff)
195{ 192{
196 struct sk_buff *prev, *next; 193 struct sk_buff *prev, *next;
197 unsigned int payload_len;
198 int offset, end; 194 int offset, end;
199 195
200 if (fq->q.last_in & INET_FRAG_COMPLETE) { 196 if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -202,10 +198,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
202 goto err; 198 goto err;
203 } 199 }
204 200
205 payload_len = ntohs(ipv6_hdr(skb)->payload_len);
206
207 offset = ntohs(fhdr->frag_off) & ~0x7; 201 offset = ntohs(fhdr->frag_off) & ~0x7;
208 end = offset + (payload_len - 202 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
209 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 203 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
210 204
211 if ((unsigned int)end > IPV6_MAXPLEN) { 205 if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -311,15 +305,10 @@ found:
311 else 305 else
312 fq->q.fragments = skb; 306 fq->q.fragments = skb;
313 307
314 if (skb->dev) { 308 skb->dev = NULL;
315 fq->iif = skb->dev->ifindex;
316 skb->dev = NULL;
317 }
318 fq->q.stamp = skb->tstamp; 309 fq->q.stamp = skb->tstamp;
319 fq->q.meat += skb->len; 310 fq->q.meat += skb->len;
320 if (payload_len > fq->q.max_size) 311 atomic_add(skb->truesize, &nf_init_frags.mem);
321 fq->q.max_size = payload_len;
322 atomic_add(skb->truesize, &fq->q.net->mem);
323 312
324 /* The first fragment. 313 /* The first fragment.
325 * nhoffset is obtained from the first fragment, of course. 314 * nhoffset is obtained from the first fragment, of course.
@@ -329,12 +318,12 @@ found:
329 fq->q.last_in |= INET_FRAG_FIRST_IN; 318 fq->q.last_in |= INET_FRAG_FIRST_IN;
330 } 319 }
331 write_lock(&nf_frags.lock); 320 write_lock(&nf_frags.lock);
332 list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); 321 list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
333 write_unlock(&nf_frags.lock); 322 write_unlock(&nf_frags.lock);
334 return 0; 323 return 0;
335 324
336discard_fq: 325discard_fq:
337 inet_frag_kill(&fq->q, &nf_frags); 326 fq_kill(fq);
338err: 327err:
339 return -1; 328 return -1;
340} 329}
@@ -349,12 +338,12 @@ err:
349 * the last and the first frames arrived and all the bits are here. 338 * the last and the first frames arrived and all the bits are here.
350 */ 339 */
351static struct sk_buff * 340static struct sk_buff *
352nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) 341nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
353{ 342{
354 struct sk_buff *fp, *op, *head = fq->q.fragments; 343 struct sk_buff *fp, *op, *head = fq->q.fragments;
355 int payload_len; 344 int payload_len;
356 345
357 inet_frag_kill(&fq->q, &nf_frags); 346 fq_kill(fq);
358 347
359 WARN_ON(head == NULL); 348 WARN_ON(head == NULL);
360 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); 349 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -381,16 +370,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
381 struct sk_buff *clone; 370 struct sk_buff *clone;
382 int i, plen = 0; 371 int i, plen = 0;
383 372
384 clone = alloc_skb(0, GFP_ATOMIC); 373 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
385 if (clone == NULL) 374 pr_debug("Can't alloc skb\n");
386 goto out_oom; 375 goto out_oom;
387 376 }
388 clone->next = head->next; 377 clone->next = head->next;
389 head->next = clone; 378 head->next = clone;
390 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 379 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
391 skb_frag_list_init(head); 380 skb_frag_list_init(head);
392 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 381 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
393 plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 382 plen += skb_shinfo(head)->frags[i].size;
394 clone->len = clone->data_len = head->data_len - plen; 383 clone->len = clone->data_len = head->data_len - plen;
395 head->data_len -= clone->len; 384 head->data_len -= clone->len;
396 head->len -= clone->len; 385 head->len -= clone->len;
@@ -398,7 +387,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
398 clone->ip_summed = head->ip_summed; 387 clone->ip_summed = head->ip_summed;
399 388
400 NFCT_FRAG6_CB(clone)->orig = NULL; 389 NFCT_FRAG6_CB(clone)->orig = NULL;
401 atomic_add(clone->truesize, &fq->q.net->mem); 390 atomic_add(clone->truesize, &nf_init_frags.mem);
402 } 391 }
403 392
404 /* We have to remove fragment header from datagram and to relocate 393 /* We have to remove fragment header from datagram and to relocate
@@ -422,14 +411,12 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
422 head->csum = csum_add(head->csum, fp->csum); 411 head->csum = csum_add(head->csum, fp->csum);
423 head->truesize += fp->truesize; 412 head->truesize += fp->truesize;
424 } 413 }
425 atomic_sub(head->truesize, &fq->q.net->mem); 414 atomic_sub(head->truesize, &nf_init_frags.mem);
426 415
427 head->local_df = 1;
428 head->next = NULL; 416 head->next = NULL;
429 head->dev = dev; 417 head->dev = dev;
430 head->tstamp = fq->q.stamp; 418 head->tstamp = fq->q.stamp;
431 ipv6_hdr(head)->payload_len = htons(payload_len); 419 ipv6_hdr(head)->payload_len = htons(payload_len);
432 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
433 420
434 /* Yes, and fold redundant checksum back. 8) */ 421 /* Yes, and fold redundant checksum back. 8) */
435 if (head->ip_summed == CHECKSUM_COMPLETE) 422 if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -458,11 +445,12 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
458 return head; 445 return head;
459 446
460out_oversize: 447out_oversize:
461 net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", 448 if (net_ratelimit())
462 payload_len); 449 printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
463 goto out_fail; 450 goto out_fail;
464out_oom: 451out_oom:
465 net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); 452 if (net_ratelimit())
453 printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
466out_fail: 454out_fail:
467 return NULL; 455 return NULL;
468} 456}
@@ -534,10 +522,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
534{ 522{
535 struct sk_buff *clone; 523 struct sk_buff *clone;
536 struct net_device *dev = skb->dev; 524 struct net_device *dev = skb->dev;
537 struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
538 : dev_net(skb->dev);
539 struct frag_hdr *fhdr; 525 struct frag_hdr *fhdr;
540 struct frag_queue *fq; 526 struct nf_ct_frag6_queue *fq;
541 struct ipv6hdr *hdr; 527 struct ipv6hdr *hdr;
542 int fhoff, nhoff; 528 int fhoff, nhoff;
543 u8 prevhdr; 529 u8 prevhdr;
@@ -569,11 +555,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
569 hdr = ipv6_hdr(clone); 555 hdr = ipv6_hdr(clone);
570 fhdr = (struct frag_hdr *)skb_transport_header(clone); 556 fhdr = (struct frag_hdr *)skb_transport_header(clone);
571 557
572 local_bh_disable(); 558 if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
573 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); 559 nf_ct_frag6_evictor();
574 local_bh_enable();
575 560
576 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); 561 fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
577 if (fq == NULL) { 562 if (fq == NULL) {
578 pr_debug("Can't find and can't create new queue\n"); 563 pr_debug("Can't find and can't create new queue\n");
579 goto ret_orig; 564 goto ret_orig;
@@ -584,7 +569,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
584 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { 569 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
585 spin_unlock_bh(&fq->q.lock); 570 spin_unlock_bh(&fq->q.lock);
586 pr_debug("Can't insert skb to queue\n"); 571 pr_debug("Can't insert skb to queue\n");
587 inet_frag_put(&fq->q, &nf_frags); 572 fq_put(fq);
588 goto ret_orig; 573 goto ret_orig;
589 } 574 }
590 575
@@ -596,7 +581,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
596 } 581 }
597 spin_unlock_bh(&fq->q.lock); 582 spin_unlock_bh(&fq->q.lock);
598 583
599 inet_frag_put(&fq->q, &nf_frags); 584 fq_put(fq);
600 return ret_skb; 585 return ret_skb;
601 586
602ret_orig: 587ret_orig:
@@ -609,7 +594,6 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
609 int (*okfn)(struct sk_buff *)) 594 int (*okfn)(struct sk_buff *))
610{ 595{
611 struct sk_buff *s, *s2; 596 struct sk_buff *s, *s2;
612 unsigned int ret = 0;
613 597
614 for (s = NFCT_FRAG6_CB(skb)->orig; s;) { 598 for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
615 nf_conntrack_put_reasm(s->nfct_reasm); 599 nf_conntrack_put_reasm(s->nfct_reasm);
@@ -619,62 +603,49 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
619 s2 = s->next; 603 s2 = s->next;
620 s->next = NULL; 604 s->next = NULL;
621 605
622 if (ret != -ECANCELED) 606 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
623 ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, 607 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
624 in, out, okfn,
625 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
626 else
627 kfree_skb(s);
628
629 s = s2; 608 s = s2;
630 } 609 }
631 nf_conntrack_put_reasm(skb); 610 nf_conntrack_put_reasm(skb);
632} 611}
633 612
634static int nf_ct_net_init(struct net *net)
635{
636 net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
637 net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
638 net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
639 inet_frags_init_net(&net->nf_frag.frags);
640
641 return nf_ct_frag6_sysctl_register(net);
642}
643
644static void nf_ct_net_exit(struct net *net)
645{
646 nf_ct_frags6_sysctl_unregister(net);
647 inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
648}
649
650static struct pernet_operations nf_ct_net_ops = {
651 .init = nf_ct_net_init,
652 .exit = nf_ct_net_exit,
653};
654
655int nf_ct_frag6_init(void) 613int nf_ct_frag6_init(void)
656{ 614{
657 int ret = 0;
658
659 nf_frags.hashfn = nf_hashfn; 615 nf_frags.hashfn = nf_hashfn;
660 nf_frags.constructor = ip6_frag_init; 616 nf_frags.constructor = ip6_frag_init;
661 nf_frags.destructor = NULL; 617 nf_frags.destructor = NULL;
662 nf_frags.skb_free = nf_skb_free; 618 nf_frags.skb_free = nf_skb_free;
663 nf_frags.qsize = sizeof(struct frag_queue); 619 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
664 nf_frags.match = ip6_frag_match; 620 nf_frags.match = ip6_frag_match;
665 nf_frags.frag_expire = nf_ct_frag6_expire; 621 nf_frags.frag_expire = nf_ct_frag6_expire;
666 nf_frags.secret_interval = 10 * 60 * HZ; 622 nf_frags.secret_interval = 10 * 60 * HZ;
623 nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
624 nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
625 nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
626 inet_frags_init_net(&nf_init_frags);
667 inet_frags_init(&nf_frags); 627 inet_frags_init(&nf_frags);
668 628
669 ret = register_pernet_subsys(&nf_ct_net_ops); 629#ifdef CONFIG_SYSCTL
670 if (ret) 630 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
631 nf_ct_frag6_sysctl_table);
632 if (!nf_ct_frag6_sysctl_header) {
671 inet_frags_fini(&nf_frags); 633 inet_frags_fini(&nf_frags);
634 return -ENOMEM;
635 }
636#endif
672 637
673 return ret; 638 return 0;
674} 639}
675 640
676void nf_ct_frag6_cleanup(void) 641void nf_ct_frag6_cleanup(void)
677{ 642{
678 unregister_pernet_subsys(&nf_ct_net_ops); 643#ifdef CONFIG_SYSCTL
644 unregister_sysctl_table(nf_ct_frag6_sysctl_header);
645 nf_ct_frag6_sysctl_header = NULL;
646#endif
679 inet_frags_fini(&nf_frags); 647 inet_frags_fini(&nf_frags);
648
649 nf_init_frags.low_thresh = 0;
650 nf_ct_frag6_evictor();
680} 651}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c..cdd6d045e42 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,7 +19,7 @@
19 19
20#include <linux/netfilter_ipv6.h> 20#include <linux/netfilter_ipv6.h>
21#include <linux/netfilter_bridge.h> 21#include <linux/netfilter_bridge.h>
22#if IS_ENABLED(CONFIG_NF_CONNTRACK) 22#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
23#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_helper.h> 24#include <net/netfilter/nf_conntrack_helper.h>
25#include <net/netfilter/nf_conntrack_l4proto.h> 25#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
35{ 35{
36 u16 zone = NF_CT_DEFAULT_ZONE; 36 u16 zone = NF_CT_DEFAULT_ZONE;
37 37
38#if IS_ENABLED(CONFIG_NF_CONNTRACK) 38#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
39 if (skb->nfct) 39 if (skb->nfct)
40 zone = nf_ct_zone((struct nf_conn *)skb->nfct); 40 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
41#endif 41#endif
@@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
60{ 60{
61 struct sk_buff *reasm; 61 struct sk_buff *reasm;
62 62
63#if IS_ENABLED(CONFIG_NF_CONNTRACK) 63#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
64 /* Previously seen (loopback)? */ 64 /* Previously seen (loopback)? */
65 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) 65 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
66 return NF_ACCEPT; 66 return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
deleted file mode 100644
index abfe75a2e31..00000000000
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ /dev/null
@@ -1,288 +0,0 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of IPv6 NAT funded by Astaro.
9 */
10#include <linux/types.h>
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ipv6.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv6.h>
16#include <net/secure_seq.h>
17#include <net/checksum.h>
18#include <net/ip6_checksum.h>
19#include <net/ip6_route.h>
20#include <net/ipv6.h>
21
22#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_nat_l3proto.h>
26#include <net/netfilter/nf_nat_l4proto.h>
27
28static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
29
30#ifdef CONFIG_XFRM
31static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
32 const struct nf_conn *ct,
33 enum ip_conntrack_dir dir,
34 unsigned long statusbit,
35 struct flowi *fl)
36{
37 const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
38 struct flowi6 *fl6 = &fl->u.ip6;
39
40 if (ct->status & statusbit) {
41 fl6->daddr = t->dst.u3.in6;
42 if (t->dst.protonum == IPPROTO_TCP ||
43 t->dst.protonum == IPPROTO_UDP ||
44 t->dst.protonum == IPPROTO_UDPLITE ||
45 t->dst.protonum == IPPROTO_DCCP ||
46 t->dst.protonum == IPPROTO_SCTP)
47 fl6->fl6_dport = t->dst.u.all;
48 }
49
50 statusbit ^= IPS_NAT_MASK;
51
52 if (ct->status & statusbit) {
53 fl6->saddr = t->src.u3.in6;
54 if (t->dst.protonum == IPPROTO_TCP ||
55 t->dst.protonum == IPPROTO_UDP ||
56 t->dst.protonum == IPPROTO_UDPLITE ||
57 t->dst.protonum == IPPROTO_DCCP ||
58 t->dst.protonum == IPPROTO_SCTP)
59 fl6->fl6_sport = t->src.u.all;
60 }
61}
62#endif
63
64static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
65 const struct nf_nat_range *range)
66{
67 return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
68 ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
69}
70
71static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
72 __be16 dport)
73{
74 return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
75}
76
77static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
78 unsigned int iphdroff,
79 const struct nf_nat_l4proto *l4proto,
80 const struct nf_conntrack_tuple *target,
81 enum nf_nat_manip_type maniptype)
82{
83 struct ipv6hdr *ipv6h;
84 __be16 frag_off;
85 int hdroff;
86 u8 nexthdr;
87
88 if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
89 return false;
90
91 ipv6h = (void *)skb->data + iphdroff;
92 nexthdr = ipv6h->nexthdr;
93 hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
94 &nexthdr, &frag_off);
95 if (hdroff < 0)
96 goto manip_addr;
97
98 if ((frag_off & htons(~0x7)) == 0 &&
99 !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
100 target, maniptype))
101 return false;
102manip_addr:
103 if (maniptype == NF_NAT_MANIP_SRC)
104 ipv6h->saddr = target->src.u3.in6;
105 else
106 ipv6h->daddr = target->dst.u3.in6;
107
108 return true;
109}
110
111static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
112 unsigned int iphdroff, __sum16 *check,
113 const struct nf_conntrack_tuple *t,
114 enum nf_nat_manip_type maniptype)
115{
116 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
117 const struct in6_addr *oldip, *newip;
118
119 if (maniptype == NF_NAT_MANIP_SRC) {
120 oldip = &ipv6h->saddr;
121 newip = &t->src.u3.in6;
122 } else {
123 oldip = &ipv6h->daddr;
124 newip = &t->dst.u3.in6;
125 }
126 inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
127 newip->s6_addr32, 1);
128}
129
130static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
131 u8 proto, void *data, __sum16 *check,
132 int datalen, int oldlen)
133{
134 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
135 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
136
137 if (skb->ip_summed != CHECKSUM_PARTIAL) {
138 if (!(rt->rt6i_flags & RTF_LOCAL) &&
139 (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
140 skb->ip_summed = CHECKSUM_PARTIAL;
141 skb->csum_start = skb_headroom(skb) +
142 skb_network_offset(skb) +
143 (data - (void *)skb->data);
144 skb->csum_offset = (void *)check - data;
145 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
146 datalen, proto, 0);
147 } else {
148 *check = 0;
149 *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
150 datalen, proto,
151 csum_partial(data, datalen,
152 0));
153 if (proto == IPPROTO_UDP && !*check)
154 *check = CSUM_MANGLED_0;
155 }
156 } else
157 inet_proto_csum_replace2(check, skb,
158 htons(oldlen), htons(datalen), 1);
159}
160
161static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
162 struct nf_nat_range *range)
163{
164 if (tb[CTA_NAT_V6_MINIP]) {
165 nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
166 sizeof(struct in6_addr));
167 range->flags |= NF_NAT_RANGE_MAP_IPS;
168 }
169
170 if (tb[CTA_NAT_V6_MAXIP])
171 nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
172 sizeof(struct in6_addr));
173 else
174 range->max_addr = range->min_addr;
175
176 return 0;
177}
178
179static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
180 .l3proto = NFPROTO_IPV6,
181 .secure_port = nf_nat_ipv6_secure_port,
182 .in_range = nf_nat_ipv6_in_range,
183 .manip_pkt = nf_nat_ipv6_manip_pkt,
184 .csum_update = nf_nat_ipv6_csum_update,
185 .csum_recalc = nf_nat_ipv6_csum_recalc,
186 .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
187#ifdef CONFIG_XFRM
188 .decode_session = nf_nat_ipv6_decode_session,
189#endif
190};
191
192int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
193 struct nf_conn *ct,
194 enum ip_conntrack_info ctinfo,
195 unsigned int hooknum,
196 unsigned int hdrlen)
197{
198 struct {
199 struct icmp6hdr icmp6;
200 struct ipv6hdr ip6;
201 } *inside;
202 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
203 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
204 const struct nf_nat_l4proto *l4proto;
205 struct nf_conntrack_tuple target;
206 unsigned long statusbit;
207
208 NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
209
210 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
211 return 0;
212 if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
213 return 0;
214
215 inside = (void *)skb->data + hdrlen;
216 if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
217 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
218 return 0;
219 if (ct->status & IPS_NAT_MASK)
220 return 0;
221 }
222
223 if (manip == NF_NAT_MANIP_SRC)
224 statusbit = IPS_SRC_NAT;
225 else
226 statusbit = IPS_DST_NAT;
227
228 /* Invert if this is reply direction */
229 if (dir == IP_CT_DIR_REPLY)
230 statusbit ^= IPS_NAT_MASK;
231
232 if (!(ct->status & statusbit))
233 return 1;
234
235 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
236 if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
237 l4proto, &ct->tuplehash[!dir].tuple, !manip))
238 return 0;
239
240 if (skb->ip_summed != CHECKSUM_PARTIAL) {
241 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
242 inside = (void *)skb->data + hdrlen;
243 inside->icmp6.icmp6_cksum = 0;
244 inside->icmp6.icmp6_cksum =
245 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
246 skb->len - hdrlen, IPPROTO_ICMPV6,
247 csum_partial(&inside->icmp6,
248 skb->len - hdrlen, 0));
249 }
250
251 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
252 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
253 if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
254 return 0;
255
256 return 1;
257}
258EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
259
260static int __init nf_nat_l3proto_ipv6_init(void)
261{
262 int err;
263
264 err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
265 if (err < 0)
266 goto err1;
267 err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
268 if (err < 0)
269 goto err2;
270 return err;
271
272err2:
273 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
274err1:
275 return err;
276}
277
278static void __exit nf_nat_l3proto_ipv6_exit(void)
279{
280 nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
281 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
282}
283
284MODULE_LICENSE("GPL");
285MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
286
287module_init(nf_nat_l3proto_ipv6_init);
288module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
deleted file mode 100644
index 61aaf70f376..00000000000
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ /dev/null
@@ -1,90 +0,0 @@
1/*
2 * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
9 * NAT funded by Astaro.
10 */
11
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/icmpv6.h>
15
16#include <linux/netfilter.h>
17#include <net/netfilter/nf_nat.h>
18#include <net/netfilter/nf_nat_core.h>
19#include <net/netfilter/nf_nat_l3proto.h>
20#include <net/netfilter/nf_nat_l4proto.h>
21
22static bool
23icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
24 enum nf_nat_manip_type maniptype,
25 const union nf_conntrack_man_proto *min,
26 const union nf_conntrack_man_proto *max)
27{
28 return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
29 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
30}
31
32static void
33icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
34 struct nf_conntrack_tuple *tuple,
35 const struct nf_nat_range *range,
36 enum nf_nat_manip_type maniptype,
37 const struct nf_conn *ct)
38{
39 static u16 id;
40 unsigned int range_size;
41 unsigned int i;
42
43 range_size = ntohs(range->max_proto.icmp.id) -
44 ntohs(range->min_proto.icmp.id) + 1;
45
46 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
47 range_size = 0xffff;
48
49 for (i = 0; ; ++id) {
50 tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
51 (id % range_size));
52 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
53 return;
54 }
55}
56
57static bool
58icmpv6_manip_pkt(struct sk_buff *skb,
59 const struct nf_nat_l3proto *l3proto,
60 unsigned int iphdroff, unsigned int hdroff,
61 const struct nf_conntrack_tuple *tuple,
62 enum nf_nat_manip_type maniptype)
63{
64 struct icmp6hdr *hdr;
65
66 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
67 return false;
68
69 hdr = (struct icmp6hdr *)(skb->data + hdroff);
70 l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
71 tuple, maniptype);
72 if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
73 hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
74 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
75 hdr->icmp6_identifier,
76 tuple->src.u.icmp.id, 0);
77 hdr->icmp6_identifier = tuple->src.u.icmp.id;
78 }
79 return true;
80}
81
82const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
83 .l4proto = IPPROTO_ICMPV6,
84 .manip_pkt = icmpv6_manip_pkt,
85 .in_range = icmpv6_in_range,
86 .unique_tuple = icmpv6_unique_tuple,
87#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
88 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
89#endif
90};
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
deleted file mode 100644
index c2e73e647e4..00000000000
--- a/net/ipv6/output_core.c
+++ /dev/null
@@ -1,76 +0,0 @@
1/*
2 * IPv6 library code, needed by static components when full IPv6 support is
3 * not configured or static. These functions are needed by GSO/GRO implementation.
4 */
5#include <linux/export.h>
6#include <net/ipv6.h>
7#include <net/ip6_fib.h>
8
9void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
10{
11 static atomic_t ipv6_fragmentation_id;
12 int old, new;
13
14#if IS_ENABLED(CONFIG_IPV6)
15 if (rt && !(rt->dst.flags & DST_NOPEER)) {
16 struct inet_peer *peer;
17 struct net *net;
18
19 net = dev_net(rt->dst.dev);
20 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
21 if (peer) {
22 fhdr->identification = htonl(inet_getid(peer, 0));
23 inet_putpeer(peer);
24 return;
25 }
26 }
27#endif
28 do {
29 old = atomic_read(&ipv6_fragmentation_id);
30 new = old + 1;
31 if (!new)
32 new = 1;
33 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
34 fhdr->identification = htonl(new);
35}
36EXPORT_SYMBOL(ipv6_select_ident);
37
38int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
39{
40 u16 offset = sizeof(struct ipv6hdr);
41 struct ipv6_opt_hdr *exthdr =
42 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
43 unsigned int packet_len = skb->tail - skb->network_header;
44 int found_rhdr = 0;
45 *nexthdr = &ipv6_hdr(skb)->nexthdr;
46
47 while (offset + 1 <= packet_len) {
48
49 switch (**nexthdr) {
50
51 case NEXTHDR_HOP:
52 break;
53 case NEXTHDR_ROUTING:
54 found_rhdr = 1;
55 break;
56 case NEXTHDR_DEST:
57#if IS_ENABLED(CONFIG_IPV6_MIP6)
58 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
59 break;
60#endif
61 if (found_rhdr)
62 return offset;
63 break;
64 default :
65 return offset;
66 }
67
68 offset += ipv6_optlen(exthdr);
69 *nexthdr = &exthdr->nexthdr;
70 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
71 offset);
72 }
73
74 return offset;
75}
76EXPORT_SYMBOL(ip6_find_1stfragopt);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 745a3204295..18ff5df7ec0 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -21,7 +21,6 @@
21#include <linux/proc_fs.h> 21#include <linux/proc_fs.h>
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/stddef.h> 23#include <linux/stddef.h>
24#include <linux/export.h>
25#include <net/net_namespace.h> 24#include <net/net_namespace.h>
26#include <net/ip.h> 25#include <net/ip.h>
27#include <net/sock.h> 26#include <net/sock.h>
@@ -142,7 +141,11 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
142 SNMP_MIB_SENTINEL 141 SNMP_MIB_SENTINEL
143}; 142};
144 143
145static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) 144/* can be called either with percpu mib (pcpumib != NULL),
145 * or shared one (smib != NULL)
146 */
147static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **pcpumib,
148 atomic_long_t *smib)
146{ 149{
147 char name[32]; 150 char name[32];
148 int i; 151 int i;
@@ -159,14 +162,14 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
159 snprintf(name, sizeof(name), "Icmp6%s%s", 162 snprintf(name, sizeof(name), "Icmp6%s%s",
160 i & 0x100 ? "Out" : "In", p); 163 i & 0x100 ? "Out" : "In", p);
161 seq_printf(seq, "%-32s\t%lu\n", name, 164 seq_printf(seq, "%-32s\t%lu\n", name,
162 atomic_long_read(smib + i)); 165 pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i));
163 } 166 }
164 167
165 /* print by number (nonzero only) - ICMPMsgStat format */ 168 /* print by number (nonzero only) - ICMPMsgStat format */
166 for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { 169 for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
167 unsigned long val; 170 unsigned long val;
168 171
169 val = atomic_long_read(smib + i); 172 val = pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i);
170 if (!val) 173 if (!val)
171 continue; 174 continue;
172 snprintf(name, sizeof(name), "Icmp6%sType%u", 175 snprintf(name, sizeof(name), "Icmp6%sType%u",
@@ -211,7 +214,8 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
211 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); 214 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
212 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, 215 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
213 NULL, snmp6_icmp6_list); 216 NULL, snmp6_icmp6_list);
214 snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); 217 snmp6_seq_show_icmpv6msg(seq,
218 (void __percpu **)net->mib.icmpv6msg_statistics, NULL);
215 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, 219 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6,
216 NULL, snmp6_udp6_list); 220 NULL, snmp6_udp6_list);
217 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, 221 snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6,
@@ -237,11 +241,11 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
237 struct inet6_dev *idev = (struct inet6_dev *)seq->private; 241 struct inet6_dev *idev = (struct inet6_dev *)seq->private;
238 242
239 seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); 243 seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
240 snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6, 244 snmp6_seq_show_item(seq, (void __percpu **)idev->stats.ipv6, NULL,
241 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); 245 snmp6_ipstats_list);
242 snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, 246 snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
243 snmp6_icmp6_list); 247 snmp6_icmp6_list);
244 snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs); 248 snmp6_seq_show_icmpv6msg(seq, NULL, idev->stats.icmpv6msgdev->mibs);
245 return 0; 249 return 0;
246} 250}
247 251
@@ -307,10 +311,10 @@ static int __net_init ipv6_proc_init_net(struct net *net)
307 goto proc_dev_snmp6_fail; 311 goto proc_dev_snmp6_fail;
308 return 0; 312 return 0;
309 313
310proc_dev_snmp6_fail:
311 proc_net_remove(net, "snmp6");
312proc_snmp6_fail: 314proc_snmp6_fail:
313 proc_net_remove(net, "sockstat6"); 315 proc_net_remove(net, "sockstat6");
316proc_dev_snmp6_fail:
317 proc_net_remove(net, "dev_snmp6");
314 return -ENOMEM; 318 return -ENOMEM;
315} 319}
316 320
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 22d1bd4670d..9a7978fdc02 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,13 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28#if IS_ENABLED(CONFIG_IPV6)
29const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; 28const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
30EXPORT_SYMBOL(inet6_protos);
31 29
32int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
33{ 31{
34 return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], 32 int hash = protocol & (MAX_INET_PROTOS - 1);
33
34 return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
35 NULL, prot) ? 0 : -1; 35 NULL, prot) ? 0 : -1;
36} 36}
37EXPORT_SYMBOL(inet6_add_protocol); 37EXPORT_SYMBOL(inet6_add_protocol);
@@ -42,9 +42,9 @@ EXPORT_SYMBOL(inet6_add_protocol);
42 42
43int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) 43int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
44{ 44{
45 int ret; 45 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
46 46
47 ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], 47 ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
48 prot, NULL) == prot) ? 0 : -1; 48 prot, NULL) == prot) ? 0 : -1;
49 49
50 synchronize_net(); 50 synchronize_net();
@@ -52,26 +52,3 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
52 return ret; 52 return ret;
53} 53}
54EXPORT_SYMBOL(inet6_del_protocol); 54EXPORT_SYMBOL(inet6_del_protocol);
55#endif
56
57const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
58
59int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
60{
61 return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
62 NULL, prot) ? 0 : -1;
63}
64EXPORT_SYMBOL(inet6_add_offload);
65
66int inet6_del_offload(const struct net_offload *prot, unsigned char protocol)
67{
68 int ret;
69
70 ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol],
71 prot, NULL) == prot) ? 0 : -1;
72
73 synchronize_net();
74
75 return ret;
76}
77EXPORT_SYMBOL(inet6_del_offload);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 6cd29b1e8b9..343852e5c70 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,7 +50,7 @@
50#include <net/udp.h> 50#include <net/udp.h>
51#include <net/inet_common.h> 51#include <net/inet_common.h>
52#include <net/tcp_states.h> 52#include <net/tcp_states.h>
53#if IS_ENABLED(CONFIG_IPV6_MIP6) 53#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
54#include <net/mip6.h> 54#include <net/mip6.h>
55#endif 55#endif
56#include <linux/mroute6.h> 56#include <linux/mroute6.h>
@@ -61,7 +61,6 @@
61 61
62#include <linux/proc_fs.h> 62#include <linux/proc_fs.h>
63#include <linux/seq_file.h> 63#include <linux/seq_file.h>
64#include <linux/export.h>
65 64
66static struct raw_hashinfo raw_v6_hashinfo = { 65static struct raw_hashinfo raw_v6_hashinfo = {
67 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), 66 .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
@@ -72,7 +71,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
72 const struct in6_addr *rmt_addr, int dif) 71 const struct in6_addr *rmt_addr, int dif)
73{ 72{
74 struct hlist_node *node; 73 struct hlist_node *node;
75 bool is_multicast = ipv6_addr_is_multicast(loc_addr); 74 int is_multicast = ipv6_addr_is_multicast(loc_addr);
76 75
77 sk_for_each_from(sk, node) 76 sk_for_each_from(sk, node)
78 if (inet_sk(sk)->inet_num == num) { 77 if (inet_sk(sk)->inet_num == num) {
@@ -107,23 +106,24 @@ found:
107 * 0 - deliver 106 * 0 - deliver
108 * 1 - block 107 * 1 - block
109 */ 108 */
110static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) 109static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
111{ 110{
112 struct icmp6hdr *_hdr; 111 struct icmp6hdr *icmph;
113 const struct icmp6hdr *hdr; 112 struct raw6_sock *rp = raw6_sk(sk);
113
114 if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
115 __u32 *data = &rp->filter.data[0];
116 int bit_nr;
114 117
115 hdr = skb_header_pointer(skb, skb_transport_offset(skb), 118 icmph = (struct icmp6hdr *) skb->data;
116 sizeof(_hdr), &_hdr); 119 bit_nr = icmph->icmp6_type;
117 if (hdr) {
118 const __u32 *data = &raw6_sk(sk)->filter.data[0];
119 unsigned int type = hdr->icmp6_type;
120 120
121 return (data[type >> 5] & (1U << (type & 31))) != 0; 121 return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
122 } 122 }
123 return 1; 123 return 0;
124} 124}
125 125
126#if IS_ENABLED(CONFIG_IPV6_MIP6) 126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
127typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); 127typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
128 128
129static mh_filter_t __rcu *mh_filter __read_mostly; 129static mh_filter_t __rcu *mh_filter __read_mostly;
@@ -137,7 +137,7 @@ EXPORT_SYMBOL(rawv6_mh_filter_register);
137 137
138int rawv6_mh_filter_unregister(mh_filter_t filter) 138int rawv6_mh_filter_unregister(mh_filter_t filter)
139{ 139{
140 RCU_INIT_POINTER(mh_filter, NULL); 140 rcu_assign_pointer(mh_filter, NULL);
141 synchronize_rcu(); 141 synchronize_rcu();
142 return 0; 142 return 0;
143} 143}
@@ -152,19 +152,19 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
152 * 152 *
153 * Caller owns SKB so we must make clones. 153 * Caller owns SKB so we must make clones.
154 */ 154 */
155static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) 155static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
156{ 156{
157 const struct in6_addr *saddr; 157 const struct in6_addr *saddr;
158 const struct in6_addr *daddr; 158 const struct in6_addr *daddr;
159 struct sock *sk; 159 struct sock *sk;
160 bool delivered = false; 160 int delivered = 0;
161 __u8 hash; 161 __u8 hash;
162 struct net *net; 162 struct net *net;
163 163
164 saddr = &ipv6_hdr(skb)->saddr; 164 saddr = &ipv6_hdr(skb)->saddr;
165 daddr = saddr + 1; 165 daddr = saddr + 1;
166 166
167 hash = nexthdr & (RAW_HTABLE_SIZE - 1); 167 hash = nexthdr & (MAX_INET_PROTOS - 1);
168 168
169 read_lock(&raw_v6_hashinfo.lock); 169 read_lock(&raw_v6_hashinfo.lock);
170 sk = sk_head(&raw_v6_hashinfo.ht[hash]); 170 sk = sk_head(&raw_v6_hashinfo.ht[hash]);
@@ -178,13 +178,13 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
178 while (sk) { 178 while (sk) {
179 int filtered; 179 int filtered;
180 180
181 delivered = true; 181 delivered = 1;
182 switch (nexthdr) { 182 switch (nexthdr) {
183 case IPPROTO_ICMPV6: 183 case IPPROTO_ICMPV6:
184 filtered = icmpv6_filter(sk, skb); 184 filtered = icmpv6_filter(sk, skb);
185 break; 185 break;
186 186
187#if IS_ENABLED(CONFIG_IPV6_MIP6) 187#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
188 case IPPROTO_MH: 188 case IPPROTO_MH:
189 { 189 {
190 /* XXX: To validate MH only once for each packet, 190 /* XXX: To validate MH only once for each packet,
@@ -224,11 +224,11 @@ out:
224 return delivered; 224 return delivered;
225} 225}
226 226
227bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) 227int raw6_local_deliver(struct sk_buff *skb, int nexthdr)
228{ 228{
229 struct sock *raw_sk; 229 struct sock *raw_sk;
230 230
231 raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); 231 raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]);
232 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) 232 if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
233 raw_sk = NULL; 233 raw_sk = NULL;
234 234
@@ -298,9 +298,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
298 } 298 }
299 299
300 inet->inet_rcv_saddr = inet->inet_saddr = v4addr; 300 inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
301 np->rcv_saddr = addr->sin6_addr; 301 ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
302 if (!(addr_type & IPV6_ADDR_MULTICAST)) 302 if (!(addr_type & IPV6_ADDR_MULTICAST))
303 np->saddr = addr->sin6_addr; 303 ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
304 err = 0; 304 err = 0;
305out_unlock: 305out_unlock:
306 rcu_read_unlock(); 306 rcu_read_unlock();
@@ -327,12 +327,9 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
327 return; 327 return;
328 328
329 harderr = icmpv6_err_convert(type, code, &err); 329 harderr = icmpv6_err_convert(type, code, &err);
330 if (type == ICMPV6_PKT_TOOBIG) { 330 if (type == ICMPV6_PKT_TOOBIG)
331 ip6_sk_update_pmtu(skb, sk, info);
332 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 331 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
333 } 332
334 if (type == NDISC_REDIRECT)
335 ip6_sk_redirect(skb, sk);
336 if (np->recverr) { 333 if (np->recverr) {
337 u8 *payload = skb->data; 334 u8 *payload = skb->data;
338 if (!inet->hdrincl) 335 if (!inet->hdrincl)
@@ -375,9 +372,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
375 read_unlock(&raw_v6_hashinfo.lock); 372 read_unlock(&raw_v6_hashinfo.lock);
376} 373}
377 374
378static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) 375static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
379{ 376{
380 if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && 377 if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
381 skb_checksum_complete(skb)) { 378 skb_checksum_complete(skb)) {
382 atomic_inc(&sk->sk_drops); 379 atomic_inc(&sk->sk_drops);
383 kfree_skb(skb); 380 kfree_skb(skb);
@@ -385,8 +382,7 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
385 } 382 }
386 383
387 /* Charge it to the socket. */ 384 /* Charge it to the socket. */
388 skb_dst_drop(skb); 385 if (ip_queue_rcv_skb(sk, skb) < 0) {
389 if (sock_queue_rcv_skb(sk, skb) < 0) {
390 kfree_skb(skb); 386 kfree_skb(skb);
391 return NET_RX_DROP; 387 return NET_RX_DROP;
392 } 388 }
@@ -497,7 +493,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
497 if (sin6) { 493 if (sin6) {
498 sin6->sin6_family = AF_INET6; 494 sin6->sin6_family = AF_INET6;
499 sin6->sin6_port = 0; 495 sin6->sin6_port = 0;
500 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 496 ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
501 sin6->sin6_flowinfo = 0; 497 sin6->sin6_flowinfo = 0;
502 sin6->sin6_scope_id = 0; 498 sin6->sin6_scope_id = 0;
503 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 499 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -546,7 +542,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
546 goto out; 542 goto out;
547 543
548 offset = rp->offset; 544 offset = rp->offset;
549 total_len = inet_sk(sk)->cork.base.length; 545 total_len = inet_sk(sk)->cork.base.length - (skb_network_header(skb) -
546 skb->data);
550 if (offset >= total_len - 1) { 547 if (offset >= total_len - 1) {
551 err = -EINVAL; 548 err = -EINVAL;
552 ip6_flush_pending_frames(sk); 549 ip6_flush_pending_frames(sk);
@@ -613,8 +610,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
613 struct sk_buff *skb; 610 struct sk_buff *skb;
614 int err; 611 int err;
615 struct rt6_info *rt = (struct rt6_info *)*dstp; 612 struct rt6_info *rt = (struct rt6_info *)*dstp;
616 int hlen = LL_RESERVED_SPACE(rt->dst.dev);
617 int tlen = rt->dst.dev->needed_tailroom;
618 613
619 if (length > rt->dst.dev->mtu) { 614 if (length > rt->dst.dev->mtu) {
620 ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); 615 ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
@@ -624,11 +619,11 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
624 goto out; 619 goto out;
625 620
626 skb = sock_alloc_send_skb(sk, 621 skb = sock_alloc_send_skb(sk,
627 length + hlen + tlen + 15, 622 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
628 flags & MSG_DONTWAIT, &err); 623 flags & MSG_DONTWAIT, &err);
629 if (skb == NULL) 624 if (skb == NULL)
630 goto error; 625 goto error;
631 skb_reserve(skb, hlen); 626 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
632 627
633 skb->priority = sk->sk_priority; 628 skb->priority = sk->sk_priority;
634 skb->mark = sk->sk_mark; 629 skb->mark = sk->sk_mark;
@@ -848,18 +843,16 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
848 goto out; 843 goto out;
849 844
850 if (!ipv6_addr_any(daddr)) 845 if (!ipv6_addr_any(daddr))
851 fl6.daddr = *daddr; 846 ipv6_addr_copy(&fl6.daddr, daddr);
852 else 847 else
853 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 848 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
854 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) 849 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
855 fl6.saddr = np->saddr; 850 ipv6_addr_copy(&fl6.saddr, &np->saddr);
856 851
857 final_p = fl6_update_dst(&fl6, opt, &final); 852 final_p = fl6_update_dst(&fl6, opt, &final);
858 853
859 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 854 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
860 fl6.flowi6_oif = np->mcast_oif; 855 fl6.flowi6_oif = np->mcast_oif;
861 else if (!fl6.flowi6_oif)
862 fl6.flowi6_oif = np->ucast_oif;
863 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 856 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
864 857
865 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); 858 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
@@ -1250,8 +1243,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1250 sk_wmem_alloc_get(sp), 1243 sk_wmem_alloc_get(sp),
1251 sk_rmem_alloc_get(sp), 1244 sk_rmem_alloc_get(sp),
1252 0, 0L, 0, 1245 0, 0L, 0,
1253 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 1246 sock_i_uid(sp), 0,
1254 0,
1255 sock_i_ino(sp), 1247 sock_i_ino(sp),
1256 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); 1248 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1257} 1249}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e5253ec9e0f..7b954e2539d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -42,7 +42,6 @@
42#include <linux/jhash.h> 42#include <linux/jhash.h>
43#include <linux/skbuff.h> 43#include <linux/skbuff.h>
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/export.h>
46 45
47#include <net/sock.h> 46#include <net/sock.h>
48#include <net/snmp.h> 47#include <net/snmp.h>
@@ -65,8 +64,36 @@ struct ip6frag_skb_cb
65#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) 64#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
66 65
67 66
67/*
68 * Equivalent of ipv4 struct ipq
69 */
70
71struct frag_queue
72{
73 struct inet_frag_queue q;
74
75 __be32 id; /* fragment id */
76 u32 user;
77 struct in6_addr saddr;
78 struct in6_addr daddr;
79
80 int iif;
81 unsigned int csum;
82 __u16 nhoffset;
83};
84
68static struct inet_frags ip6_frags; 85static struct inet_frags ip6_frags;
69 86
87int ip6_frag_nqueues(struct net *net)
88{
89 return net->ipv6.frags.nqueues;
90}
91
92int ip6_frag_mem(struct net *net)
93{
94 return atomic_read(&net->ipv6.frags.mem);
95}
96
70static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 97static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
71 struct net_device *dev); 98 struct net_device *dev);
72 99
@@ -106,16 +133,15 @@ static unsigned int ip6_hashfn(struct inet_frag_queue *q)
106 return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); 133 return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd);
107} 134}
108 135
109bool ip6_frag_match(struct inet_frag_queue *q, void *a) 136int ip6_frag_match(struct inet_frag_queue *q, void *a)
110{ 137{
111 struct frag_queue *fq; 138 struct frag_queue *fq;
112 struct ip6_create_arg *arg = a; 139 struct ip6_create_arg *arg = a;
113 140
114 fq = container_of(q, struct frag_queue, q); 141 fq = container_of(q, struct frag_queue, q);
115 return fq->id == arg->id && 142 return (fq->id == arg->id && fq->user == arg->user &&
116 fq->user == arg->user && 143 ipv6_addr_equal(&fq->saddr, arg->src) &&
117 ipv6_addr_equal(&fq->saddr, arg->src) && 144 ipv6_addr_equal(&fq->daddr, arg->dst));
118 ipv6_addr_equal(&fq->daddr, arg->dst);
119} 145}
120EXPORT_SYMBOL(ip6_frag_match); 146EXPORT_SYMBOL(ip6_frag_match);
121 147
@@ -126,23 +152,51 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
126 152
127 fq->id = arg->id; 153 fq->id = arg->id;
128 fq->user = arg->user; 154 fq->user = arg->user;
129 fq->saddr = *arg->src; 155 ipv6_addr_copy(&fq->saddr, arg->src);
130 fq->daddr = *arg->dst; 156 ipv6_addr_copy(&fq->daddr, arg->dst);
131} 157}
132EXPORT_SYMBOL(ip6_frag_init); 158EXPORT_SYMBOL(ip6_frag_init);
133 159
134void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, 160/* Destruction primitives. */
135 struct inet_frags *frags) 161
162static __inline__ void fq_put(struct frag_queue *fq)
136{ 163{
164 inet_frag_put(&fq->q, &ip6_frags);
165}
166
167/* Kill fq entry. It is not destroyed immediately,
168 * because caller (and someone more) holds reference count.
169 */
170static __inline__ void fq_kill(struct frag_queue *fq)
171{
172 inet_frag_kill(&fq->q, &ip6_frags);
173}
174
175static void ip6_evictor(struct net *net, struct inet6_dev *idev)
176{
177 int evicted;
178
179 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
180 if (evicted)
181 IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
182}
183
184static void ip6_frag_expire(unsigned long data)
185{
186 struct frag_queue *fq;
137 struct net_device *dev = NULL; 187 struct net_device *dev = NULL;
188 struct net *net;
189
190 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
138 191
139 spin_lock(&fq->q.lock); 192 spin_lock(&fq->q.lock);
140 193
141 if (fq->q.last_in & INET_FRAG_COMPLETE) 194 if (fq->q.last_in & INET_FRAG_COMPLETE)
142 goto out; 195 goto out;
143 196
144 inet_frag_kill(&fq->q, frags); 197 fq_kill(fq);
145 198
199 net = container_of(fq->q.net, struct net, ipv6.frags);
146 rcu_read_lock(); 200 rcu_read_lock();
147 dev = dev_get_by_index_rcu(net, fq->iif); 201 dev = dev_get_by_index_rcu(net, fq->iif);
148 if (!dev) 202 if (!dev)
@@ -166,19 +220,7 @@ out_rcu_unlock:
166 rcu_read_unlock(); 220 rcu_read_unlock();
167out: 221out:
168 spin_unlock(&fq->q.lock); 222 spin_unlock(&fq->q.lock);
169 inet_frag_put(&fq->q, frags); 223 fq_put(fq);
170}
171EXPORT_SYMBOL(ip6_expire_frag_queue);
172
173static void ip6_frag_expire(unsigned long data)
174{
175 struct frag_queue *fq;
176 struct net *net;
177
178 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
179 net = container_of(fq->q.net, struct net, ipv6.frags);
180
181 ip6_expire_frag_queue(net, fq, &ip6_frags);
182} 224}
183 225
184static __inline__ struct frag_queue * 226static __inline__ struct frag_queue *
@@ -293,11 +335,12 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
293 } 335 }
294 336
295found: 337found:
296 /* RFC5722, Section 4, amended by Errata ID : 3089 338 /* RFC5722, Section 4:
297 * When reassembling an IPv6 datagram, if 339 * When reassembling an IPv6 datagram, if
298 * one or more its constituent fragments is determined to be an 340 * one or more its constituent fragments is determined to be an
299 * overlapping fragment, the entire datagram (and any constituent 341 * overlapping fragment, the entire datagram (and any constituent
300 * fragments) MUST be silently discarded. 342 * fragments, including those not yet received) MUST be silently
343 * discarded.
301 */ 344 */
302 345
303 /* Check for overlap with preceding fragment. */ 346 /* Check for overlap with preceding fragment. */
@@ -347,7 +390,7 @@ found:
347 return -1; 390 return -1;
348 391
349discard_fq: 392discard_fq:
350 inet_frag_kill(&fq->q, &ip6_frags); 393 fq_kill(fq);
351err: 394err:
352 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 395 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
353 IPSTATS_MIB_REASMFAILS); 396 IPSTATS_MIB_REASMFAILS);
@@ -371,9 +414,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
371 struct sk_buff *fp, *head = fq->q.fragments; 414 struct sk_buff *fp, *head = fq->q.fragments;
372 int payload_len; 415 int payload_len;
373 unsigned int nhoff; 416 unsigned int nhoff;
374 int sum_truesize;
375 417
376 inet_frag_kill(&fq->q, &ip6_frags); 418 fq_kill(fq);
377 419
378 /* Make the one we just received the head. */ 420 /* Make the one we just received the head. */
379 if (prev) { 421 if (prev) {
@@ -391,7 +433,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
391 skb_morph(head, fq->q.fragments); 433 skb_morph(head, fq->q.fragments);
392 head->next = fq->q.fragments->next; 434 head->next = fq->q.fragments->next;
393 435
394 consume_skb(fq->q.fragments); 436 kfree_skb(fq->q.fragments);
395 fq->q.fragments = head; 437 fq->q.fragments = head;
396 } 438 }
397 439
@@ -422,8 +464,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
422 head->next = clone; 464 head->next = clone;
423 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 465 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
424 skb_frag_list_init(head); 466 skb_frag_list_init(head);
425 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 467 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
426 plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 468 plen += skb_shinfo(head)->frags[i].size;
427 clone->len = clone->data_len = head->data_len - plen; 469 clone->len = clone->data_len = head->data_len - plen;
428 head->data_len -= clone->len; 470 head->data_len -= clone->len;
429 head->len -= clone->len; 471 head->len -= clone->len;
@@ -441,33 +483,20 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
441 head->mac_header += sizeof(struct frag_hdr); 483 head->mac_header += sizeof(struct frag_hdr);
442 head->network_header += sizeof(struct frag_hdr); 484 head->network_header += sizeof(struct frag_hdr);
443 485
486 skb_shinfo(head)->frag_list = head->next;
444 skb_reset_transport_header(head); 487 skb_reset_transport_header(head);
445 skb_push(head, head->data - skb_network_header(head)); 488 skb_push(head, head->data - skb_network_header(head));
446 489
447 sum_truesize = head->truesize; 490 for (fp=head->next; fp; fp = fp->next) {
448 for (fp = head->next; fp;) { 491 head->data_len += fp->len;
449 bool headstolen; 492 head->len += fp->len;
450 int delta;
451 struct sk_buff *next = fp->next;
452
453 sum_truesize += fp->truesize;
454 if (head->ip_summed != fp->ip_summed) 493 if (head->ip_summed != fp->ip_summed)
455 head->ip_summed = CHECKSUM_NONE; 494 head->ip_summed = CHECKSUM_NONE;
456 else if (head->ip_summed == CHECKSUM_COMPLETE) 495 else if (head->ip_summed == CHECKSUM_COMPLETE)
457 head->csum = csum_add(head->csum, fp->csum); 496 head->csum = csum_add(head->csum, fp->csum);
458 497 head->truesize += fp->truesize;
459 if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
460 kfree_skb_partial(fp, headstolen);
461 } else {
462 if (!skb_shinfo(head)->frag_list)
463 skb_shinfo(head)->frag_list = fp;
464 head->data_len += fp->len;
465 head->len += fp->len;
466 head->truesize += fp->truesize;
467 }
468 fp = next;
469 } 498 }
470 atomic_sub(sum_truesize, &fq->q.net->mem); 499 atomic_sub(head->truesize, &fq->q.net->mem);
471 500
472 head->next = NULL; 501 head->next = NULL;
473 head->dev = dev; 502 head->dev = dev;
@@ -489,10 +518,12 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
489 return 1; 518 return 1;
490 519
491out_oversize: 520out_oversize:
492 net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len); 521 if (net_ratelimit())
522 printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len);
493 goto out_fail; 523 goto out_fail;
494out_oom: 524out_oom:
495 net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); 525 if (net_ratelimit())
526 printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
496out_fail: 527out_fail:
497 rcu_read_lock(); 528 rcu_read_lock();
498 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 529 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
@@ -506,7 +537,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
506 struct frag_queue *fq; 537 struct frag_queue *fq;
507 const struct ipv6hdr *hdr = ipv6_hdr(skb); 538 const struct ipv6hdr *hdr = ipv6_hdr(skb);
508 struct net *net = dev_net(skb_dst(skb)->dev); 539 struct net *net = dev_net(skb_dst(skb)->dev);
509 int evicted;
510 540
511 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 541 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
512 542
@@ -531,10 +561,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
531 return 1; 561 return 1;
532 } 562 }
533 563
534 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); 564 if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
535 if (evicted) 565 ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
536 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
537 IPSTATS_MIB_REASMFAILS, evicted);
538 566
539 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); 567 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
540 if (fq != NULL) { 568 if (fq != NULL) {
@@ -545,7 +573,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
545 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); 573 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
546 574
547 spin_unlock(&fq->q.lock); 575 spin_unlock(&fq->q.lock);
548 inet_frag_put(&fq->q, &ip6_frags); 576 fq_put(fq);
549 return ret; 577 return ret;
550 } 578 }
551 579
@@ -616,13 +644,9 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
616 table[0].data = &net->ipv6.frags.high_thresh; 644 table[0].data = &net->ipv6.frags.high_thresh;
617 table[1].data = &net->ipv6.frags.low_thresh; 645 table[1].data = &net->ipv6.frags.low_thresh;
618 table[2].data = &net->ipv6.frags.timeout; 646 table[2].data = &net->ipv6.frags.timeout;
619
620 /* Don't export sysctls to unprivileged users */
621 if (net->user_ns != &init_user_ns)
622 table[0].procname = NULL;
623 } 647 }
624 648
625 hdr = register_net_sysctl(net, "net/ipv6", table); 649 hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
626 if (hdr == NULL) 650 if (hdr == NULL)
627 goto err_reg; 651 goto err_reg;
628 652
@@ -650,7 +674,7 @@ static struct ctl_table_header *ip6_ctl_header;
650 674
651static int ip6_frags_sysctl_register(void) 675static int ip6_frags_sysctl_register(void)
652{ 676{
653 ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6", 677 ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
654 ip6_frags_ctl_table); 678 ip6_frags_ctl_table);
655 return ip6_ctl_header == NULL ? -ENOMEM : 0; 679 return ip6_ctl_header == NULL ? -ENOMEM : 0;
656} 680}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e229a3bc345..f02fe523bd3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -24,11 +24,8 @@
24 * Fixed routing subtrees. 24 * Fixed routing subtrees.
25 */ 25 */
26 26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h> 27#include <linux/capability.h>
30#include <linux/errno.h> 28#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h> 29#include <linux/types.h>
33#include <linux/times.h> 30#include <linux/times.h>
34#include <linux/socket.h> 31#include <linux/socket.h>
@@ -57,7 +54,6 @@
57#include <net/xfrm.h> 54#include <net/xfrm.h>
58#include <net/netevent.h> 55#include <net/netevent.h>
59#include <net/netlink.h> 56#include <net/netlink.h>
60#include <net/nexthop.h>
61 57
62#include <asm/uaccess.h> 58#include <asm/uaccess.h>
63 59
@@ -65,11 +61,22 @@
65#include <linux/sysctl.h> 61#include <linux/sysctl.h>
66#endif 62#endif
67 63
68static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
69 const struct in6_addr *dest); 76 const struct in6_addr *dest);
70static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71static unsigned int ip6_default_advmss(const struct dst_entry *dst); 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
72static unsigned int ip6_mtu(const struct dst_entry *dst); 79static unsigned int ip6_default_mtu(const struct dst_entry *dst);
73static struct dst_entry *ip6_negative_advice(struct dst_entry *); 80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74static void ip6_dst_destroy(struct dst_entry *); 81static void ip6_dst_destroy(struct dst_entry *);
75static void ip6_dst_ifdown(struct dst_entry *, 82static void ip6_dst_ifdown(struct dst_entry *,
@@ -79,16 +86,13 @@ static int ip6_dst_gc(struct dst_ops *ops);
79static int ip6_pkt_discard(struct sk_buff *skb); 86static int ip6_pkt_discard(struct sk_buff *skb);
80static int ip6_pkt_discard_out(struct sk_buff *skb); 87static int ip6_pkt_discard_out(struct sk_buff *skb);
81static void ip6_link_failure(struct sk_buff *skb); 88static void ip6_link_failure(struct sk_buff *skb);
82static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
83 struct sk_buff *skb, u32 mtu);
84static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
86 90
87#ifdef CONFIG_IPV6_ROUTE_INFO 91#ifdef CONFIG_IPV6_ROUTE_INFO
88static struct rt6_info *rt6_add_route_info(struct net *net, 92static struct rt6_info *rt6_add_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen, 93 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex, 94 const struct in6_addr *gwaddr, int ifindex,
91 unsigned int pref); 95 unsigned pref);
92static struct rt6_info *rt6_get_route_info(struct net *net, 96static struct rt6_info *rt6_get_route_info(struct net *net,
93 const struct in6_addr *prefix, int prefixlen, 97 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex); 98 const struct in6_addr *gwaddr, int ifindex);
@@ -103,7 +107,10 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103 if (!(rt->dst.flags & DST_HOST)) 107 if (!(rt->dst.flags & DST_HOST))
104 return NULL; 108 return NULL;
105 109
106 peer = rt6_get_peer_create(rt); 110 if (!rt->rt6i_peer)
111 rt6_bind_peer(rt, 1);
112
113 peer = rt->rt6i_peer;
107 if (peer) { 114 if (peer) {
108 u32 *old_p = __DST_METRICS_PTR(old); 115 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new; 116 unsigned long prev, new;
@@ -124,44 +131,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
124 return p; 131 return p;
125} 132}
126 133
127static inline const void *choose_neigh_daddr(struct rt6_info *rt, 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
128 struct sk_buff *skb,
129 const void *daddr)
130{ 135{
131 struct in6_addr *p = &rt->rt6i_gateway; 136 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
132
133 if (!ipv6_addr_any(p))
134 return (const void *) p;
135 else if (skb)
136 return &ipv6_hdr(skb)->daddr;
137 return daddr;
138}
139
140static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct sk_buff *skb,
142 const void *daddr)
143{
144 struct rt6_info *rt = (struct rt6_info *) dst;
145 struct neighbour *n;
146
147 daddr = choose_neigh_daddr(rt, skb, daddr);
148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149 if (n)
150 return n;
151 return neigh_create(&nd_tbl, daddr, dst->dev);
152}
153
154static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155{
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157 if (!n) {
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159 if (IS_ERR(n))
160 return PTR_ERR(n);
161 }
162 rt->n = n;
163
164 return 0;
165} 137}
166 138
167static struct dst_ops ip6_dst_ops_template = { 139static struct dst_ops ip6_dst_ops_template = {
@@ -171,32 +143,23 @@ static struct dst_ops ip6_dst_ops_template = {
171 .gc_thresh = 1024, 143 .gc_thresh = 1024,
172 .check = ip6_dst_check, 144 .check = ip6_dst_check,
173 .default_advmss = ip6_default_advmss, 145 .default_advmss = ip6_default_advmss,
174 .mtu = ip6_mtu, 146 .default_mtu = ip6_default_mtu,
175 .cow_metrics = ipv6_cow_metrics, 147 .cow_metrics = ipv6_cow_metrics,
176 .destroy = ip6_dst_destroy, 148 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown, 149 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice, 150 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure, 151 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu, 152 .update_pmtu = ip6_rt_update_pmtu,
181 .redirect = rt6_do_redirect,
182 .local_out = __ip6_local_out, 153 .local_out = __ip6_local_out,
183 .neigh_lookup = ip6_neigh_lookup, 154 .neigh_lookup = ip6_neigh_lookup,
184}; 155};
185 156
186static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
187{
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190 return mtu ? : dst->dev->mtu;
191}
192
193static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
195{ 158{
159 return 0;
196} 160}
197 161
198static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
199 struct sk_buff *skb)
200{ 163{
201} 164}
202 165
@@ -211,23 +174,22 @@ static struct dst_ops ip6_dst_blackhole_ops = {
211 .protocol = cpu_to_be16(ETH_P_IPV6), 174 .protocol = cpu_to_be16(ETH_P_IPV6),
212 .destroy = ip6_dst_destroy, 175 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check, 176 .check = ip6_dst_check,
214 .mtu = ip6_blackhole_mtu, 177 .default_mtu = ip6_blackhole_default_mtu,
215 .default_advmss = ip6_default_advmss, 178 .default_advmss = ip6_default_advmss,
216 .update_pmtu = ip6_rt_blackhole_update_pmtu, 179 .update_pmtu = ip6_rt_blackhole_update_pmtu,
217 .redirect = ip6_rt_blackhole_redirect,
218 .cow_metrics = ip6_rt_blackhole_cow_metrics, 180 .cow_metrics = ip6_rt_blackhole_cow_metrics,
219 .neigh_lookup = ip6_neigh_lookup, 181 .neigh_lookup = ip6_neigh_lookup,
220}; 182};
221 183
222static const u32 ip6_template_metrics[RTAX_MAX] = { 184static const u32 ip6_template_metrics[RTAX_MAX] = {
223 [RTAX_HOPLIMIT - 1] = 0, 185 [RTAX_HOPLIMIT - 1] = 255,
224}; 186};
225 187
226static const struct rt6_info ip6_null_entry_template = { 188static struct rt6_info ip6_null_entry_template = {
227 .dst = { 189 .dst = {
228 .__refcnt = ATOMIC_INIT(1), 190 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1, 191 .__use = 1,
230 .obsolete = DST_OBSOLETE_FORCE_CHK, 192 .obsolete = -1,
231 .error = -ENETUNREACH, 193 .error = -ENETUNREACH,
232 .input = ip6_pkt_discard, 194 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out, 195 .output = ip6_pkt_discard_out,
@@ -243,11 +205,11 @@ static const struct rt6_info ip6_null_entry_template = {
243static int ip6_pkt_prohibit(struct sk_buff *skb); 205static int ip6_pkt_prohibit(struct sk_buff *skb);
244static int ip6_pkt_prohibit_out(struct sk_buff *skb); 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245 207
246static const struct rt6_info ip6_prohibit_entry_template = { 208static struct rt6_info ip6_prohibit_entry_template = {
247 .dst = { 209 .dst = {
248 .__refcnt = ATOMIC_INIT(1), 210 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1, 211 .__use = 1,
250 .obsolete = DST_OBSOLETE_FORCE_CHK, 212 .obsolete = -1,
251 .error = -EACCES, 213 .error = -EACCES,
252 .input = ip6_pkt_prohibit, 214 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out, 215 .output = ip6_pkt_prohibit_out,
@@ -258,11 +220,11 @@ static const struct rt6_info ip6_prohibit_entry_template = {
258 .rt6i_ref = ATOMIC_INIT(1), 220 .rt6i_ref = ATOMIC_INIT(1),
259}; 221};
260 222
261static const struct rt6_info ip6_blk_hole_entry_template = { 223static struct rt6_info ip6_blk_hole_entry_template = {
262 .dst = { 224 .dst = {
263 .__refcnt = ATOMIC_INIT(1), 225 .__refcnt = ATOMIC_INIT(1),
264 .__use = 1, 226 .__use = 1,
265 .obsolete = DST_OBSOLETE_FORCE_CHK, 227 .obsolete = -1,
266 .error = -EINVAL, 228 .error = -EINVAL,
267 .input = dst_discard, 229 .input = dst_discard,
268 .output = dst_discard, 230 .output = dst_discard,
@@ -276,23 +238,16 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
276#endif 238#endif
277 239
278/* allocate dst with ip6_dst_ops */ 240/* allocate dst with ip6_dst_ops */
279static inline struct rt6_info *ip6_dst_alloc(struct net *net, 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
280 struct net_device *dev, 242 struct net_device *dev,
281 int flags, 243 int flags)
282 struct fib6_table *table)
283{ 244{
284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 245 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
285 0, DST_OBSOLETE_FORCE_CHK, flags);
286 246
287 if (rt) { 247 if (rt != NULL)
288 struct dst_entry *dst = &rt->dst; 248 memset(&rt->rt6i_table, 0,
249 sizeof(*rt) - sizeof(struct dst_entry));
289 250
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292 rt->rt6i_genid = rt_genid(net);
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
295 }
296 return rt; 251 return rt;
297} 252}
298 253
@@ -300,41 +255,37 @@ static void ip6_dst_destroy(struct dst_entry *dst)
300{ 255{
301 struct rt6_info *rt = (struct rt6_info *)dst; 256 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev; 257 struct inet6_dev *idev = rt->rt6i_idev;
303 258 struct inet_peer *peer = rt->rt6i_peer;
304 if (rt->n)
305 neigh_release(rt->n);
306 259
307 if (!(rt->dst.flags & DST_HOST)) 260 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst); 261 dst_destroy_metrics_generic(dst);
309 262
310 if (idev) { 263 if (idev != NULL) {
311 rt->rt6i_idev = NULL; 264 rt->rt6i_idev = NULL;
312 in6_dev_put(idev); 265 in6_dev_put(idev);
313 } 266 }
314 267 if (peer) {
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) 268 rt->rt6i_peer = NULL;
316 dst_release(dst->from);
317
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
320 inet_putpeer(peer); 269 inet_putpeer(peer);
321 } 270 }
322} 271}
323 272
273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
274
275static u32 rt6_peer_genid(void)
276{
277 return atomic_read(&__rt6_peer_genid);
278}
279
324void rt6_bind_peer(struct rt6_info *rt, int create) 280void rt6_bind_peer(struct rt6_info *rt, int create)
325{ 281{
326 struct inet_peer_base *base;
327 struct inet_peer *peer; 282 struct inet_peer *peer;
328 283
329 base = inetpeer_base_ptr(rt->_rt6i_peer); 284 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
330 if (!base) 285 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
331 return; 286 inet_putpeer(peer);
332 287 else
333 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 288 rt->rt6i_peer_genid = rt6_peer_genid();
334 if (peer) {
335 if (!rt6_set_peer(rt, peer))
336 inet_putpeer(peer);
337 }
338} 289}
339 290
340static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -345,103 +296,28 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
345 struct net_device *loopback_dev = 296 struct net_device *loopback_dev =
346 dev_net(dev)->loopback_dev; 297 dev_net(dev)->loopback_dev;
347 298
348 if (dev != loopback_dev) { 299 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
349 if (idev && idev->dev == dev) { 300 struct inet6_dev *loopback_idev =
350 struct inet6_dev *loopback_idev = 301 in6_dev_get(loopback_dev);
351 in6_dev_get(loopback_dev); 302 if (loopback_idev != NULL) {
352 if (loopback_idev) { 303 rt->rt6i_idev = loopback_idev;
353 rt->rt6i_idev = loopback_idev; 304 in6_dev_put(idev);
354 in6_dev_put(idev);
355 }
356 }
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
360 dev_put(dev);
361 } 305 }
362 } 306 }
363} 307}
364 308
365static bool rt6_check_expired(const struct rt6_info *rt) 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
366{ 310{
367 if (rt->rt6i_flags & RTF_EXPIRES) { 311 return (rt->rt6i_flags & RTF_EXPIRES) &&
368 if (time_after(jiffies, rt->dst.expires)) 312 time_after(jiffies, rt->rt6i_expires);
369 return true;
370 } else if (rt->dst.from) {
371 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372 }
373 return false;
374} 313}
375 314
376static bool rt6_need_strict(const struct in6_addr *daddr) 315static inline int rt6_need_strict(const struct in6_addr *daddr)
377{ 316{
378 return ipv6_addr_type(daddr) & 317 return ipv6_addr_type(daddr) &
379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 318 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380} 319}
381 320
382/* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
385 */
386static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
388{
389 unsigned int val = fl6->flowi6_proto;
390
391 val ^= (__force u32)fl6->daddr.s6_addr32[0];
392 val ^= (__force u32)fl6->daddr.s6_addr32[1];
393 val ^= (__force u32)fl6->daddr.s6_addr32[2];
394 val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396 val ^= (__force u32)fl6->saddr.s6_addr32[0];
397 val ^= (__force u32)fl6->saddr.s6_addr32[1];
398 val ^= (__force u32)fl6->saddr.s6_addr32[2];
399 val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401 /* Work only if this not encapsulated */
402 switch (fl6->flowi6_proto) {
403 case IPPROTO_UDP:
404 case IPPROTO_TCP:
405 case IPPROTO_SCTP:
406 val ^= (__force u16)fl6->fl6_sport;
407 val ^= (__force u16)fl6->fl6_dport;
408 break;
409
410 case IPPROTO_ICMPV6:
411 val ^= (__force u16)fl6->fl6_icmp_type;
412 val ^= (__force u16)fl6->fl6_icmp_code;
413 break;
414 }
415 /* RFC6438 recommands to use flowlabel */
416 val ^= (__force u32)fl6->flowlabel;
417
418 /* Perhaps, we need to tune, this function? */
419 val = val ^ (val >> 7) ^ (val >> 12);
420 return val % candidate_count;
421}
422
423static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424 struct flowi6 *fl6)
425{
426 struct rt6_info *sibling, *next_sibling;
427 int route_choosen;
428
429 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430 /* Don't change the route, if route_choosen == 0
431 * (siblings does not include ourself)
432 */
433 if (route_choosen)
434 list_for_each_entry_safe(sibling, next_sibling,
435 &match->rt6i_siblings, rt6i_siblings) {
436 route_choosen--;
437 if (route_choosen == 0) {
438 match = sibling;
439 break;
440 }
441 }
442 return match;
443}
444
445/* 321/*
446 * Route lookup. Any table->tb6_lock is implied. 322 * Route lookup. Any table->tb6_lock is implied.
447 */ 323 */
@@ -459,13 +335,13 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
459 goto out; 335 goto out;
460 336
461 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 337 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462 struct net_device *dev = sprt->dst.dev; 338 struct net_device *dev = sprt->rt6i_dev;
463 339
464 if (oif) { 340 if (oif) {
465 if (dev->ifindex == oif) 341 if (dev->ifindex == oif)
466 return sprt; 342 return sprt;
467 if (dev->flags & IFF_LOOPBACK) { 343 if (dev->flags & IFF_LOOPBACK) {
468 if (!sprt->rt6i_idev || 344 if (sprt->rt6i_idev == NULL ||
469 sprt->rt6i_idev->dev->ifindex != oif) { 345 sprt->rt6i_idev->dev->ifindex != oif) {
470 if (flags & RT6_LOOKUP_F_IFACE && oif) 346 if (flags & RT6_LOOKUP_F_IFACE && oif)
471 continue; 347 continue;
@@ -505,9 +381,10 @@ static void rt6_probe(struct rt6_info *rt)
505 * Router Reachability Probe MUST be rate-limited 381 * Router Reachability Probe MUST be rate-limited
506 * to no more than one per minute. 382 * to no more than one per minute.
507 */ 383 */
508 neigh = rt ? rt->n : NULL; 384 rcu_read_lock();
385 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
509 if (!neigh || (neigh->nud_state & NUD_VALID)) 386 if (!neigh || (neigh->nud_state & NUD_VALID))
510 return; 387 goto out;
511 read_lock_bh(&neigh->lock); 388 read_lock_bh(&neigh->lock);
512 if (!(neigh->nud_state & NUD_VALID) && 389 if (!(neigh->nud_state & NUD_VALID) &&
513 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 390 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -519,10 +396,12 @@ static void rt6_probe(struct rt6_info *rt)
519 396
520 target = (struct in6_addr *)&neigh->primary_key; 397 target = (struct in6_addr *)&neigh->primary_key;
521 addrconf_addr_solict_mult(target, &mcaddr); 398 addrconf_addr_solict_mult(target, &mcaddr);
522 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); 399 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
523 } else { 400 } else {
524 read_unlock_bh(&neigh->lock); 401 read_unlock_bh(&neigh->lock);
525 } 402 }
403out:
404 rcu_read_unlock();
526} 405}
527#else 406#else
528static inline void rt6_probe(struct rt6_info *rt) 407static inline void rt6_probe(struct rt6_info *rt)
@@ -535,7 +414,7 @@ static inline void rt6_probe(struct rt6_info *rt)
535 */ 414 */
536static inline int rt6_check_dev(struct rt6_info *rt, int oif) 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537{ 416{
538 struct net_device *dev = rt->dst.dev; 417 struct net_device *dev = rt->rt6i_dev;
539 if (!oif || dev->ifindex == oif) 418 if (!oif || dev->ifindex == oif)
540 return 2; 419 return 2;
541 if ((dev->flags & IFF_LOOPBACK) && 420 if ((dev->flags & IFF_LOOPBACK) &&
@@ -544,32 +423,37 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
544 return 0; 423 return 0;
545} 424}
546 425
547static inline bool rt6_check_neigh(struct rt6_info *rt) 426static inline int rt6_check_neigh(struct rt6_info *rt)
548{ 427{
549 struct neighbour *neigh; 428 struct neighbour *neigh;
550 bool ret = false; 429 int m;
551 430
552 neigh = rt->n; 431 rcu_read_lock();
432 neigh = dst_get_neighbour(&rt->dst);
553 if (rt->rt6i_flags & RTF_NONEXTHOP || 433 if (rt->rt6i_flags & RTF_NONEXTHOP ||
554 !(rt->rt6i_flags & RTF_GATEWAY)) 434 !(rt->rt6i_flags & RTF_GATEWAY))
555 ret = true; 435 m = 1;
556 else if (neigh) { 436 else if (neigh) {
557 read_lock_bh(&neigh->lock); 437 read_lock_bh(&neigh->lock);
558 if (neigh->nud_state & NUD_VALID) 438 if (neigh->nud_state & NUD_VALID)
559 ret = true; 439 m = 2;
560#ifdef CONFIG_IPV6_ROUTER_PREF 440#ifdef CONFIG_IPV6_ROUTER_PREF
561 else if (!(neigh->nud_state & NUD_FAILED)) 441 else if (neigh->nud_state & NUD_FAILED)
562 ret = true; 442 m = 0;
563#endif 443#endif
444 else
445 m = 1;
564 read_unlock_bh(&neigh->lock); 446 read_unlock_bh(&neigh->lock);
565 } 447 } else
566 return ret; 448 m = 0;
449 rcu_read_unlock();
450 return m;
567} 451}
568 452
569static int rt6_score_route(struct rt6_info *rt, int oif, 453static int rt6_score_route(struct rt6_info *rt, int oif,
570 int strict) 454 int strict)
571{ 455{
572 int m; 456 int m, n;
573 457
574 m = rt6_check_dev(rt, oif); 458 m = rt6_check_dev(rt, oif);
575 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 459 if (!m && (strict & RT6_LOOKUP_F_IFACE))
@@ -577,7 +461,8 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
577#ifdef CONFIG_IPV6_ROUTER_PREF 461#ifdef CONFIG_IPV6_ROUTER_PREF
578 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 462 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
579#endif 463#endif
580 if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) 464 n = rt6_check_neigh(rt);
465 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
581 return -1; 466 return -1;
582 return m; 467 return m;
583} 468}
@@ -630,6 +515,9 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
630 struct rt6_info *match, *rt0; 515 struct rt6_info *match, *rt0;
631 struct net *net; 516 struct net *net;
632 517
518 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
519 __func__, fn->leaf, oif);
520
633 rt0 = fn->rr_ptr; 521 rt0 = fn->rr_ptr;
634 if (!rt0) 522 if (!rt0)
635 fn->rr_ptr = rt0 = fn->leaf; 523 fn->rr_ptr = rt0 = fn->leaf;
@@ -648,7 +536,10 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
648 fn->rr_ptr = next; 536 fn->rr_ptr = next;
649 } 537 }
650 538
651 net = dev_net(rt0->dst.dev); 539 RT6_TRACE("%s() => %p\n",
540 __func__, match);
541
542 net = dev_net(rt0->rt6i_dev);
652 return match ? match : net->ipv6.ip6_null_entry; 543 return match ? match : net->ipv6.ip6_null_entry;
653} 544}
654 545
@@ -714,12 +605,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
714 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 605 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
715 606
716 if (rt) { 607 if (rt) {
717 if (!addrconf_finite_timeout(lifetime)) 608 if (!addrconf_finite_timeout(lifetime)) {
718 rt6_clean_expires(rt); 609 rt->rt6i_flags &= ~RTF_EXPIRES;
719 else 610 } else {
720 rt6_set_expires(rt, jiffies + HZ * lifetime); 611 rt->rt6i_expires = jiffies + HZ * lifetime;
721 612 rt->rt6i_flags |= RTF_EXPIRES;
722 ip6_rt_put(rt); 613 }
614 dst_release(&rt->dst);
723 } 615 }
724 return 0; 616 return 0;
725} 617}
@@ -741,7 +633,7 @@ do { \
741 goto restart; \ 633 goto restart; \
742 } \ 634 } \
743 } \ 635 } \
744} while (0) 636} while(0)
745 637
746static struct rt6_info *ip6_pol_route_lookup(struct net *net, 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
747 struct fib6_table *table, 639 struct fib6_table *table,
@@ -755,8 +647,6 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
755restart: 647restart:
756 rt = fn->leaf; 648 rt = fn->leaf;
757 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 649 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
758 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
759 rt = rt6_multipath_select(rt, fl6);
760 BACKTRACK(net, &fl6->saddr); 650 BACKTRACK(net, &fl6->saddr);
761out: 651out:
762 dst_use(&rt->dst, jiffies); 652 dst_use(&rt->dst, jiffies);
@@ -765,13 +655,6 @@ out:
765 655
766} 656}
767 657
768struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
769 int flags)
770{
771 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
772}
773EXPORT_SYMBOL_GPL(ip6_route_lookup);
774
775struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
776 const struct in6_addr *saddr, int oif, int strict) 659 const struct in6_addr *saddr, int oif, int strict)
777{ 660{
@@ -820,12 +703,12 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
820int ip6_ins_rt(struct rt6_info *rt) 703int ip6_ins_rt(struct rt6_info *rt)
821{ 704{
822 struct nl_info info = { 705 struct nl_info info = {
823 .nl_net = dev_net(rt->dst.dev), 706 .nl_net = dev_net(rt->rt6i_dev),
824 }; 707 };
825 return __ip6_ins_rt(rt, &info); 708 return __ip6_ins_rt(rt, &info);
826} 709}
827 710
828static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
829 const struct in6_addr *daddr, 712 const struct in6_addr *daddr,
830 const struct in6_addr *saddr) 713 const struct in6_addr *saddr)
831{ 714{
@@ -838,27 +721,29 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
838 rt = ip6_rt_copy(ort, daddr); 721 rt = ip6_rt_copy(ort, daddr);
839 722
840 if (rt) { 723 if (rt) {
724 struct neighbour *neigh;
841 int attempts = !in_softirq(); 725 int attempts = !in_softirq();
842 726
843 if (!(rt->rt6i_flags & RTF_GATEWAY)) { 727 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
844 if (ort->rt6i_dst.plen != 128 && 728 if (ort->rt6i_dst.plen != 128 &&
845 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 729 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
846 rt->rt6i_flags |= RTF_ANYCAST; 730 rt->rt6i_flags |= RTF_ANYCAST;
847 rt->rt6i_gateway = *daddr; 731 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
848 } 732 }
849 733
850 rt->rt6i_flags |= RTF_CACHE; 734 rt->rt6i_flags |= RTF_CACHE;
851 735
852#ifdef CONFIG_IPV6_SUBTREES 736#ifdef CONFIG_IPV6_SUBTREES
853 if (rt->rt6i_src.plen && saddr) { 737 if (rt->rt6i_src.plen && saddr) {
854 rt->rt6i_src.addr = *saddr; 738 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
855 rt->rt6i_src.plen = 128; 739 rt->rt6i_src.plen = 128;
856 } 740 }
857#endif 741#endif
858 742
859 retry: 743 retry:
860 if (rt6_bind_neighbour(rt, rt->dst.dev)) { 744 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
861 struct net *net = dev_net(rt->dst.dev); 745 if (IS_ERR(neigh)) {
746 struct net *net = dev_net(rt->rt6i_dev);
862 int saved_rt_min_interval = 747 int saved_rt_min_interval =
863 net->ipv6.sysctl.ip6_rt_gc_min_interval; 748 net->ipv6.sysctl.ip6_rt_gc_min_interval;
864 int saved_rt_elasticity = 749 int saved_rt_elasticity =
@@ -877,10 +762,14 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
877 goto retry; 762 goto retry;
878 } 763 }
879 764
880 net_warn_ratelimited("Neighbour table overflow\n"); 765 if (net_ratelimit())
766 printk(KERN_WARNING
767 "ipv6: Neighbour table overflow.\n");
881 dst_free(&rt->dst); 768 dst_free(&rt->dst);
882 return NULL; 769 return NULL;
883 } 770 }
771 dst_set_neighbour(&rt->dst, neigh);
772
884 } 773 }
885 774
886 return rt; 775 return rt;
@@ -893,7 +782,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
893 782
894 if (rt) { 783 if (rt) {
895 rt->rt6i_flags |= RTF_CACHE; 784 rt->rt6i_flags |= RTF_CACHE;
896 rt->n = neigh_clone(ort->n); 785 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
897 } 786 }
898 return rt; 787 return rt;
899} 788}
@@ -918,8 +807,7 @@ restart_2:
918 807
919restart: 808restart:
920 rt = rt6_select(fn, oif, strict | reachable); 809 rt = rt6_select(fn, oif, strict | reachable);
921 if (rt->rt6i_nsiblings && oif == 0) 810
922 rt = rt6_multipath_select(rt, fl6);
923 BACKTRACK(net, &fl6->saddr); 811 BACKTRACK(net, &fl6->saddr);
924 if (rt == net->ipv6.ip6_null_entry || 812 if (rt == net->ipv6.ip6_null_entry ||
925 rt->rt6i_flags & RTF_CACHE) 813 rt->rt6i_flags & RTF_CACHE)
@@ -928,14 +816,14 @@ restart:
928 dst_hold(&rt->dst); 816 dst_hold(&rt->dst);
929 read_unlock_bh(&table->tb6_lock); 817 read_unlock_bh(&table->tb6_lock);
930 818
931 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) 819 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
932 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 820 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
933 else if (!(rt->dst.flags & DST_HOST)) 821 else if (!(rt->dst.flags & DST_HOST))
934 nrt = rt6_alloc_clone(rt, &fl6->daddr); 822 nrt = rt6_alloc_clone(rt, &fl6->daddr);
935 else 823 else
936 goto out2; 824 goto out2;
937 825
938 ip6_rt_put(rt); 826 dst_release(&rt->dst);
939 rt = nrt ? : net->ipv6.ip6_null_entry; 827 rt = nrt ? : net->ipv6.ip6_null_entry;
940 828
941 dst_hold(&rt->dst); 829 dst_hold(&rt->dst);
@@ -952,7 +840,7 @@ restart:
952 * Race condition! In the gap, when table->tb6_lock was 840 * Race condition! In the gap, when table->tb6_lock was
953 * released someone could insert this route. Relookup. 841 * released someone could insert this route. Relookup.
954 */ 842 */
955 ip6_rt_put(rt); 843 dst_release(&rt->dst);
956 goto relookup; 844 goto relookup;
957 845
958out: 846out:
@@ -975,16 +863,6 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *
975 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 863 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
976} 864}
977 865
978static struct dst_entry *ip6_route_input_lookup(struct net *net,
979 struct net_device *dev,
980 struct flowi6 *fl6, int flags)
981{
982 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
983 flags |= RT6_LOOKUP_F_IFACE;
984
985 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
986}
987
988void ip6_route_input(struct sk_buff *skb) 866void ip6_route_input(struct sk_buff *skb)
989{ 867{
990 const struct ipv6hdr *iph = ipv6_hdr(skb); 868 const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -994,12 +872,15 @@ void ip6_route_input(struct sk_buff *skb)
994 .flowi6_iif = skb->dev->ifindex, 872 .flowi6_iif = skb->dev->ifindex,
995 .daddr = iph->daddr, 873 .daddr = iph->daddr,
996 .saddr = iph->saddr, 874 .saddr = iph->saddr,
997 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, 875 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
998 .flowi6_mark = skb->mark, 876 .flowi6_mark = skb->mark,
999 .flowi6_proto = iph->nexthdr, 877 .flowi6_proto = iph->nexthdr,
1000 }; 878 };
1001 879
1002 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 880 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
881 flags |= RT6_LOOKUP_F_IFACE;
882
883 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
1003} 884}
1004 885
1005static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
@@ -1013,8 +894,6 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1013{ 894{
1014 int flags = 0; 895 int flags = 0;
1015 896
1016 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1017
1018 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 897 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1019 flags |= RT6_LOOKUP_F_IFACE; 898 flags |= RT6_LOOKUP_F_IFACE;
1020 899
@@ -1033,12 +912,11 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1033 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 912 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1034 struct dst_entry *new = NULL; 913 struct dst_entry *new = NULL;
1035 914
1036 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 915 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
1037 if (rt) { 916 if (rt) {
1038 new = &rt->dst; 917 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
1039 918
1040 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 919 new = &rt->dst;
1041 rt6_init_peer(rt, net->ipv6.peers);
1042 920
1043 new->__use = 1; 921 new->__use = 1;
1044 new->input = dst_discard; 922 new->input = dst_discard;
@@ -1051,10 +929,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
1051 rt->rt6i_idev = ort->rt6i_idev; 929 rt->rt6i_idev = ort->rt6i_idev;
1052 if (rt->rt6i_idev) 930 if (rt->rt6i_idev)
1053 in6_dev_hold(rt->rt6i_idev); 931 in6_dev_hold(rt->rt6i_idev);
932 rt->rt6i_expires = 0;
1054 933
1055 rt->rt6i_gateway = ort->rt6i_gateway; 934 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1056 rt->rt6i_flags = ort->rt6i_flags; 935 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1057 rt6_clean_expires(rt);
1058 rt->rt6i_metric = 0; 936 rt->rt6i_metric = 0;
1059 937
1060 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 938 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1079,16 +957,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1079 957
1080 rt = (struct rt6_info *) dst; 958 rt = (struct rt6_info *) dst;
1081 959
1082 /* All IPV6 dsts are created with ->obsolete set to the value 960 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1083 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 961 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1084 * into this function always. 962 if (!rt->rt6i_peer)
1085 */ 963 rt6_bind_peer(rt, 0);
1086 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) 964 rt->rt6i_peer_genid = rt6_peer_genid();
1087 return NULL; 965 }
1088
1089 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1090 return dst; 966 return dst;
1091 967 }
1092 return NULL; 968 return NULL;
1093} 969}
1094 970
@@ -1118,22 +994,19 @@ static void ip6_link_failure(struct sk_buff *skb)
1118 994
1119 rt = (struct rt6_info *) skb_dst(skb); 995 rt = (struct rt6_info *) skb_dst(skb);
1120 if (rt) { 996 if (rt) {
1121 if (rt->rt6i_flags & RTF_CACHE) 997 if (rt->rt6i_flags&RTF_CACHE) {
1122 rt6_update_expires(rt, 0); 998 dst_set_expires(&rt->dst, 0);
1123 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 999 rt->rt6i_flags |= RTF_EXPIRES;
1000 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1124 rt->rt6i_node->fn_sernum = -1; 1001 rt->rt6i_node->fn_sernum = -1;
1125 } 1002 }
1126} 1003}
1127 1004
1128static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1129 struct sk_buff *skb, u32 mtu)
1130{ 1006{
1131 struct rt6_info *rt6 = (struct rt6_info*)dst; 1007 struct rt6_info *rt6 = (struct rt6_info*)dst;
1132 1008
1133 dst_confirm(dst);
1134 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1009 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1135 struct net *net = dev_net(dst->dev);
1136
1137 rt6->rt6i_flags |= RTF_MODIFIED; 1010 rt6->rt6i_flags |= RTF_MODIFIED;
1138 if (mtu < IPV6_MIN_MTU) { 1011 if (mtu < IPV6_MIN_MTU) {
1139 u32 features = dst_metric(dst, RTAX_FEATURES); 1012 u32 features = dst_metric(dst, RTAX_FEATURES);
@@ -1142,66 +1015,9 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1142 dst_metric_set(dst, RTAX_FEATURES, features); 1015 dst_metric_set(dst, RTAX_FEATURES, features);
1143 } 1016 }
1144 dst_metric_set(dst, RTAX_MTU, mtu); 1017 dst_metric_set(dst, RTAX_MTU, mtu);
1145 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1146 } 1018 }
1147} 1019}
1148 1020
1149void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1150 int oif, u32 mark)
1151{
1152 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1153 struct dst_entry *dst;
1154 struct flowi6 fl6;
1155
1156 memset(&fl6, 0, sizeof(fl6));
1157 fl6.flowi6_oif = oif;
1158 fl6.flowi6_mark = mark;
1159 fl6.flowi6_flags = 0;
1160 fl6.daddr = iph->daddr;
1161 fl6.saddr = iph->saddr;
1162 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1163
1164 dst = ip6_route_output(net, NULL, &fl6);
1165 if (!dst->error)
1166 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1167 dst_release(dst);
1168}
1169EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1170
1171void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1172{
1173 ip6_update_pmtu(skb, sock_net(sk), mtu,
1174 sk->sk_bound_dev_if, sk->sk_mark);
1175}
1176EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1177
1178void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1179{
1180 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1181 struct dst_entry *dst;
1182 struct flowi6 fl6;
1183
1184 memset(&fl6, 0, sizeof(fl6));
1185 fl6.flowi6_oif = oif;
1186 fl6.flowi6_mark = mark;
1187 fl6.flowi6_flags = 0;
1188 fl6.daddr = iph->daddr;
1189 fl6.saddr = iph->saddr;
1190 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1191
1192 dst = ip6_route_output(net, NULL, &fl6);
1193 if (!dst->error)
1194 rt6_do_redirect(dst, NULL, skb);
1195 dst_release(dst);
1196}
1197EXPORT_SYMBOL_GPL(ip6_redirect);
1198
1199void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1200{
1201 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1202}
1203EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1204
1205static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1206{ 1022{
1207 struct net_device *dev = dst->dev; 1023 struct net_device *dev = dst->dev;
@@ -1224,15 +1040,10 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1224 return mtu; 1040 return mtu;
1225} 1041}
1226 1042
1227static unsigned int ip6_mtu(const struct dst_entry *dst) 1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1228{ 1044{
1045 unsigned int mtu = IPV6_MIN_MTU;
1229 struct inet6_dev *idev; 1046 struct inet6_dev *idev;
1230 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1231
1232 if (mtu)
1233 return mtu;
1234
1235 mtu = IPV6_MIN_MTU;
1236 1047
1237 rcu_read_lock(); 1048 rcu_read_lock();
1238 idev = __in6_dev_get(dst->dev); 1049 idev = __in6_dev_get(dst->dev);
@@ -1248,42 +1059,37 @@ static DEFINE_SPINLOCK(icmp6_dst_lock);
1248 1059
1249struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1250 struct neighbour *neigh, 1061 struct neighbour *neigh,
1251 struct flowi6 *fl6) 1062 const struct in6_addr *addr)
1252{ 1063{
1253 struct dst_entry *dst;
1254 struct rt6_info *rt; 1064 struct rt6_info *rt;
1255 struct inet6_dev *idev = in6_dev_get(dev); 1065 struct inet6_dev *idev = in6_dev_get(dev);
1256 struct net *net = dev_net(dev); 1066 struct net *net = dev_net(dev);
1257 1067
1258 if (unlikely(!idev)) 1068 if (unlikely(idev == NULL))
1259 return ERR_PTR(-ENODEV); 1069 return NULL;
1260 1070
1261 rt = ip6_dst_alloc(net, dev, 0, NULL); 1071 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1262 if (unlikely(!rt)) { 1072 if (unlikely(rt == NULL)) {
1263 in6_dev_put(idev); 1073 in6_dev_put(idev);
1264 dst = ERR_PTR(-ENOMEM);
1265 goto out; 1074 goto out;
1266 } 1075 }
1267 1076
1268 if (neigh) 1077 if (neigh)
1269 neigh_hold(neigh); 1078 neigh_hold(neigh);
1270 else { 1079 else {
1271 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); 1080 neigh = ndisc_get_neigh(dev, addr);
1272 if (IS_ERR(neigh)) { 1081 if (IS_ERR(neigh))
1273 in6_dev_put(idev); 1082 neigh = NULL;
1274 dst_free(&rt->dst);
1275 return ERR_CAST(neigh);
1276 }
1277 } 1083 }
1278 1084
1279 rt->dst.flags |= DST_HOST; 1085 rt->dst.flags |= DST_HOST;
1280 rt->dst.output = ip6_output; 1086 rt->dst.output = ip6_output;
1281 rt->n = neigh; 1087 dst_set_neighbour(&rt->dst, neigh);
1282 atomic_set(&rt->dst.__refcnt, 1); 1088 atomic_set(&rt->dst.__refcnt, 1);
1283 rt->rt6i_dst.addr = fl6->daddr; 1089 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1284 rt->rt6i_dst.plen = 128; 1090 rt->rt6i_dst.plen = 128;
1285 rt->rt6i_idev = idev; 1091 rt->rt6i_idev = idev;
1286 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1092 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1287 1093
1288 spin_lock_bh(&icmp6_dst_lock); 1094 spin_lock_bh(&icmp6_dst_lock);
1289 rt->dst.next = icmp6_dst_gc_list; 1095 rt->dst.next = icmp6_dst_gc_list;
@@ -1292,10 +1098,8 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1292 1098
1293 fib6_force_start_gc(net); 1099 fib6_force_start_gc(net);
1294 1100
1295 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1296
1297out: 1101out:
1298 return dst; 1102 return &rt->dst;
1299} 1103}
1300 1104
1301int icmp6_dst_gc(void) 1105int icmp6_dst_gc(void)
@@ -1367,6 +1171,12 @@ out:
1367 return entries > rt_max_size; 1171 return entries > rt_max_size;
1368} 1172}
1369 1173
1174/* Clean host part of a prefix. Not necessary in radix tree,
1175 but results in cleaner routing tables.
1176
1177 Remove it only when all the things will work!
1178 */
1179
1370int ip6_dst_hoplimit(struct dst_entry *dst) 1180int ip6_dst_hoplimit(struct dst_entry *dst)
1371{ 1181{
1372 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1182 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
@@ -1419,33 +1229,23 @@ int ip6_route_add(struct fib6_config *cfg)
1419 if (cfg->fc_metric == 0) 1229 if (cfg->fc_metric == 0)
1420 cfg->fc_metric = IP6_RT_PRIO_USER; 1230 cfg->fc_metric = IP6_RT_PRIO_USER;
1421 1231
1422 err = -ENOBUFS; 1232 table = fib6_new_table(net, cfg->fc_table);
1423 if (cfg->fc_nlinfo.nlh && 1233 if (table == NULL) {
1424 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1234 err = -ENOBUFS;
1425 table = fib6_get_table(net, cfg->fc_table);
1426 if (!table) {
1427 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1428 table = fib6_new_table(net, cfg->fc_table);
1429 }
1430 } else {
1431 table = fib6_new_table(net, cfg->fc_table);
1432 }
1433
1434 if (!table)
1435 goto out; 1235 goto out;
1236 }
1436 1237
1437 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); 1238 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1438 1239
1439 if (!rt) { 1240 if (rt == NULL) {
1440 err = -ENOMEM; 1241 err = -ENOMEM;
1441 goto out; 1242 goto out;
1442 } 1243 }
1443 1244
1444 if (cfg->fc_flags & RTF_EXPIRES) 1245 rt->dst.obsolete = -1;
1445 rt6_set_expires(rt, jiffies + 1246 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1446 clock_t_to_jiffies(cfg->fc_expires)); 1247 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1447 else 1248 0;
1448 rt6_clean_expires(rt);
1449 1249
1450 if (cfg->fc_protocol == RTPROT_UNSPEC) 1250 if (cfg->fc_protocol == RTPROT_UNSPEC)
1451 cfg->fc_protocol = RTPROT_BOOT; 1251 cfg->fc_protocol = RTPROT_BOOT;
@@ -1486,9 +1286,8 @@ int ip6_route_add(struct fib6_config *cfg)
1486 they would result in kernel looping; promote them to reject routes 1286 they would result in kernel looping; promote them to reject routes
1487 */ 1287 */
1488 if ((cfg->fc_flags & RTF_REJECT) || 1288 if ((cfg->fc_flags & RTF_REJECT) ||
1489 (dev && (dev->flags & IFF_LOOPBACK) && 1289 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1490 !(addr_type & IPV6_ADDR_LOOPBACK) && 1290 && !(cfg->fc_flags&RTF_LOCAL))) {
1491 !(cfg->fc_flags & RTF_LOCAL))) {
1492 /* hold loopback dev/idev if we haven't done so. */ 1291 /* hold loopback dev/idev if we haven't done so. */
1493 if (dev != net->loopback_dev) { 1292 if (dev != net->loopback_dev) {
1494 if (dev) { 1293 if (dev) {
@@ -1505,21 +1304,8 @@ int ip6_route_add(struct fib6_config *cfg)
1505 } 1304 }
1506 rt->dst.output = ip6_pkt_discard_out; 1305 rt->dst.output = ip6_pkt_discard_out;
1507 rt->dst.input = ip6_pkt_discard; 1306 rt->dst.input = ip6_pkt_discard;
1307 rt->dst.error = -ENETUNREACH;
1508 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1308 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1509 switch (cfg->fc_type) {
1510 case RTN_BLACKHOLE:
1511 rt->dst.error = -EINVAL;
1512 break;
1513 case RTN_PROHIBIT:
1514 rt->dst.error = -EACCES;
1515 break;
1516 case RTN_THROW:
1517 rt->dst.error = -EAGAIN;
1518 break;
1519 default:
1520 rt->dst.error = -ENETUNREACH;
1521 break;
1522 }
1523 goto install_route; 1309 goto install_route;
1524 } 1310 }
1525 1311
@@ -1528,7 +1314,7 @@ int ip6_route_add(struct fib6_config *cfg)
1528 int gwa_type; 1314 int gwa_type;
1529 1315
1530 gw_addr = &cfg->fc_gateway; 1316 gw_addr = &cfg->fc_gateway;
1531 rt->rt6i_gateway = *gw_addr; 1317 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1532 gwa_type = ipv6_addr_type(gw_addr); 1318 gwa_type = ipv6_addr_type(gw_addr);
1533 1319
1534 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1320 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1542,39 +1328,39 @@ int ip6_route_add(struct fib6_config *cfg)
1542 some exceptions. --ANK 1328 some exceptions. --ANK
1543 */ 1329 */
1544 err = -EINVAL; 1330 err = -EINVAL;
1545 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1331 if (!(gwa_type&IPV6_ADDR_UNICAST))
1546 goto out; 1332 goto out;
1547 1333
1548 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1334 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1549 1335
1550 err = -EHOSTUNREACH; 1336 err = -EHOSTUNREACH;
1551 if (!grt) 1337 if (grt == NULL)
1552 goto out; 1338 goto out;
1553 if (dev) { 1339 if (dev) {
1554 if (dev != grt->dst.dev) { 1340 if (dev != grt->rt6i_dev) {
1555 ip6_rt_put(grt); 1341 dst_release(&grt->dst);
1556 goto out; 1342 goto out;
1557 } 1343 }
1558 } else { 1344 } else {
1559 dev = grt->dst.dev; 1345 dev = grt->rt6i_dev;
1560 idev = grt->rt6i_idev; 1346 idev = grt->rt6i_idev;
1561 dev_hold(dev); 1347 dev_hold(dev);
1562 in6_dev_hold(grt->rt6i_idev); 1348 in6_dev_hold(grt->rt6i_idev);
1563 } 1349 }
1564 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1350 if (!(grt->rt6i_flags&RTF_GATEWAY))
1565 err = 0; 1351 err = 0;
1566 ip6_rt_put(grt); 1352 dst_release(&grt->dst);
1567 1353
1568 if (err) 1354 if (err)
1569 goto out; 1355 goto out;
1570 } 1356 }
1571 err = -EINVAL; 1357 err = -EINVAL;
1572 if (!dev || (dev->flags & IFF_LOOPBACK)) 1358 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1573 goto out; 1359 goto out;
1574 } 1360 }
1575 1361
1576 err = -ENODEV; 1362 err = -ENODEV;
1577 if (!dev) 1363 if (dev == NULL)
1578 goto out; 1364 goto out;
1579 1365
1580 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1366 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
@@ -1582,15 +1368,18 @@ int ip6_route_add(struct fib6_config *cfg)
1582 err = -EINVAL; 1368 err = -EINVAL;
1583 goto out; 1369 goto out;
1584 } 1370 }
1585 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1371 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1586 rt->rt6i_prefsrc.plen = 128; 1372 rt->rt6i_prefsrc.plen = 128;
1587 } else 1373 } else
1588 rt->rt6i_prefsrc.plen = 0; 1374 rt->rt6i_prefsrc.plen = 0;
1589 1375
1590 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1376 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1591 err = rt6_bind_neighbour(rt, dev); 1377 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1592 if (err) 1378 if (IS_ERR(n)) {
1379 err = PTR_ERR(n);
1593 goto out; 1380 goto out;
1381 }
1382 dst_set_neighbour(&rt->dst, n);
1594 } 1383 }
1595 1384
1596 rt->rt6i_flags = cfg->fc_flags; 1385 rt->rt6i_flags = cfg->fc_flags;
@@ -1636,27 +1425,26 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1636{ 1425{
1637 int err; 1426 int err;
1638 struct fib6_table *table; 1427 struct fib6_table *table;
1639 struct net *net = dev_net(rt->dst.dev); 1428 struct net *net = dev_net(rt->rt6i_dev);
1640 1429
1641 if (rt == net->ipv6.ip6_null_entry) { 1430 if (rt == net->ipv6.ip6_null_entry)
1642 err = -ENOENT; 1431 return -ENOENT;
1643 goto out;
1644 }
1645 1432
1646 table = rt->rt6i_table; 1433 table = rt->rt6i_table;
1647 write_lock_bh(&table->tb6_lock); 1434 write_lock_bh(&table->tb6_lock);
1435
1648 err = fib6_del(rt, info); 1436 err = fib6_del(rt, info);
1437 dst_release(&rt->dst);
1438
1649 write_unlock_bh(&table->tb6_lock); 1439 write_unlock_bh(&table->tb6_lock);
1650 1440
1651out:
1652 ip6_rt_put(rt);
1653 return err; 1441 return err;
1654} 1442}
1655 1443
1656int ip6_del_rt(struct rt6_info *rt) 1444int ip6_del_rt(struct rt6_info *rt)
1657{ 1445{
1658 struct nl_info info = { 1446 struct nl_info info = {
1659 .nl_net = dev_net(rt->dst.dev), 1447 .nl_net = dev_net(rt->rt6i_dev),
1660 }; 1448 };
1661 return __ip6_del_rt(rt, &info); 1449 return __ip6_del_rt(rt, &info);
1662} 1450}
@@ -1669,7 +1457,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1669 int err = -ESRCH; 1457 int err = -ESRCH;
1670 1458
1671 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1459 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1672 if (!table) 1460 if (table == NULL)
1673 return err; 1461 return err;
1674 1462
1675 read_lock_bh(&table->tb6_lock); 1463 read_lock_bh(&table->tb6_lock);
@@ -1681,8 +1469,8 @@ static int ip6_route_del(struct fib6_config *cfg)
1681 if (fn) { 1469 if (fn) {
1682 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1470 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1683 if (cfg->fc_ifindex && 1471 if (cfg->fc_ifindex &&
1684 (!rt->dst.dev || 1472 (rt->rt6i_dev == NULL ||
1685 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1473 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1686 continue; 1474 continue;
1687 if (cfg->fc_flags & RTF_GATEWAY && 1475 if (cfg->fc_flags & RTF_GATEWAY &&
1688 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1476 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
@@ -1700,93 +1488,108 @@ static int ip6_route_del(struct fib6_config *cfg)
1700 return err; 1488 return err;
1701} 1489}
1702 1490
1703static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 1491/*
1492 * Handle redirects
1493 */
1494struct ip6rd_flowi {
1495 struct flowi6 fl6;
1496 struct in6_addr gateway;
1497};
1498
1499static struct rt6_info *__ip6_route_redirect(struct net *net,
1500 struct fib6_table *table,
1501 struct flowi6 *fl6,
1502 int flags)
1704{ 1503{
1705 struct net *net = dev_net(skb->dev); 1504 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1706 struct netevent_redirect netevent; 1505 struct rt6_info *rt;
1707 struct rt6_info *rt, *nrt = NULL; 1506 struct fib6_node *fn;
1708 const struct in6_addr *target;
1709 struct ndisc_options ndopts;
1710 const struct in6_addr *dest;
1711 struct neighbour *old_neigh;
1712 struct inet6_dev *in6_dev;
1713 struct neighbour *neigh;
1714 struct icmp6hdr *icmph;
1715 int optlen, on_link;
1716 u8 *lladdr;
1717 1507
1718 optlen = skb->tail - skb->transport_header; 1508 /*
1719 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); 1509 * Get the "current" route for this destination and
1510 * check if the redirect has come from approriate router.
1511 *
1512 * RFC 2461 specifies that redirects should only be
1513 * accepted if they come from the nexthop to the target.
1514 * Due to the way the routes are chosen, this notion
1515 * is a bit fuzzy and one might need to check all possible
1516 * routes.
1517 */
1720 1518
1721 if (optlen < 0) { 1519 read_lock_bh(&table->tb6_lock);
1722 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 1520 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1723 return; 1521restart:
1522 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1523 /*
1524 * Current route is on-link; redirect is always invalid.
1525 *
1526 * Seems, previous statement is not true. It could
1527 * be node, which looks for us as on-link (f.e. proxy ndisc)
1528 * But then router serving it might decide, that we should
1529 * know truth 8)8) --ANK (980726).
1530 */
1531 if (rt6_check_expired(rt))
1532 continue;
1533 if (!(rt->rt6i_flags & RTF_GATEWAY))
1534 continue;
1535 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1536 continue;
1537 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1538 continue;
1539 break;
1724 } 1540 }
1725 1541
1726 icmph = icmp6_hdr(skb); 1542 if (!rt)
1727 target = (const struct in6_addr *) (icmph + 1); 1543 rt = net->ipv6.ip6_null_entry;
1728 dest = target + 1; 1544 BACKTRACK(net, &fl6->saddr);
1729 1545out:
1730 if (ipv6_addr_is_multicast(dest)) { 1546 dst_hold(&rt->dst);
1731 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1732 return;
1733 }
1734 1547
1735 on_link = 0; 1548 read_unlock_bh(&table->tb6_lock);
1736 if (ipv6_addr_equal(dest, target)) {
1737 on_link = 1;
1738 } else if (ipv6_addr_type(target) !=
1739 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1740 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1741 return;
1742 }
1743 1549
1744 in6_dev = __in6_dev_get(skb->dev); 1550 return rt;
1745 if (!in6_dev) 1551};
1746 return;
1747 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1748 return;
1749 1552
1750 /* RFC2461 8.1: 1553static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1751 * The IP source address of the Redirect MUST be the same as the current 1554 const struct in6_addr *src,
1752 * first-hop router for the specified ICMP Destination Address. 1555 const struct in6_addr *gateway,
1753 */ 1556 struct net_device *dev)
1557{
1558 int flags = RT6_LOOKUP_F_HAS_SADDR;
1559 struct net *net = dev_net(dev);
1560 struct ip6rd_flowi rdfl = {
1561 .fl6 = {
1562 .flowi6_oif = dev->ifindex,
1563 .daddr = *dest,
1564 .saddr = *src,
1565 },
1566 };
1754 1567
1755 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { 1568 ipv6_addr_copy(&rdfl.gateway, gateway);
1756 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1757 return;
1758 }
1759 1569
1760 lladdr = NULL; 1570 if (rt6_need_strict(dest))
1761 if (ndopts.nd_opts_tgt_lladdr) { 1571 flags |= RT6_LOOKUP_F_IFACE;
1762 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1763 skb->dev);
1764 if (!lladdr) {
1765 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1766 return;
1767 }
1768 }
1769 1572
1770 rt = (struct rt6_info *) dst; 1573 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1771 if (rt == net->ipv6.ip6_null_entry) { 1574 flags, __ip6_route_redirect);
1772 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1575}
1773 return;
1774 }
1775 1576
1776 /* Redirect received -> path was valid. 1577void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1777 * Look, redirects are sent only in response to data packets, 1578 const struct in6_addr *saddr,
1778 * so that this nexthop apparently is reachable. --ANK 1579 struct neighbour *neigh, u8 *lladdr, int on_link)
1779 */ 1580{
1780 dst_confirm(&rt->dst); 1581 struct rt6_info *rt, *nrt = NULL;
1582 struct netevent_redirect netevent;
1583 struct net *net = dev_net(neigh->dev);
1781 1584
1782 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); 1585 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1783 if (!neigh)
1784 return;
1785 1586
1786 /* Duplicate redirect: silently ignore. */ 1587 if (rt == net->ipv6.ip6_null_entry) {
1787 old_neigh = rt->n; 1588 if (net_ratelimit())
1788 if (neigh == old_neigh) 1589 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1590 "for redirect target\n");
1789 goto out; 1591 goto out;
1592 }
1790 1593
1791 /* 1594 /*
1792 * We have finally decided to accept it. 1595 * We have finally decided to accept it.
@@ -1799,53 +1602,172 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
1799 NEIGH_UPDATE_F_ISROUTER)) 1602 NEIGH_UPDATE_F_ISROUTER))
1800 ); 1603 );
1801 1604
1605 /*
1606 * Redirect received -> path was valid.
1607 * Look, redirects are sent only in response to data packets,
1608 * so that this nexthop apparently is reachable. --ANK
1609 */
1610 dst_confirm(&rt->dst);
1611
1612 /* Duplicate redirect: silently ignore. */
1613 if (neigh == dst_get_neighbour_raw(&rt->dst))
1614 goto out;
1615
1802 nrt = ip6_rt_copy(rt, dest); 1616 nrt = ip6_rt_copy(rt, dest);
1803 if (!nrt) 1617 if (nrt == NULL)
1804 goto out; 1618 goto out;
1805 1619
1806 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1620 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1807 if (on_link) 1621 if (on_link)
1808 nrt->rt6i_flags &= ~RTF_GATEWAY; 1622 nrt->rt6i_flags &= ~RTF_GATEWAY;
1809 1623
1810 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1624 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1811 nrt->n = neigh_clone(neigh); 1625 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1812 1626
1813 if (ip6_ins_rt(nrt)) 1627 if (ip6_ins_rt(nrt))
1814 goto out; 1628 goto out;
1815 1629
1816 netevent.old = &rt->dst; 1630 netevent.old = &rt->dst;
1817 netevent.old_neigh = old_neigh;
1818 netevent.new = &nrt->dst; 1631 netevent.new = &nrt->dst;
1819 netevent.new_neigh = neigh;
1820 netevent.daddr = dest;
1821 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1632 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1822 1633
1823 if (rt->rt6i_flags & RTF_CACHE) { 1634 if (rt->rt6i_flags&RTF_CACHE) {
1824 rt = (struct rt6_info *) dst_clone(&rt->dst); 1635 ip6_del_rt(rt);
1636 return;
1637 }
1638
1639out:
1640 dst_release(&rt->dst);
1641}
1642
1643/*
1644 * Handle ICMP "packet too big" messages
1645 * i.e. Path MTU discovery
1646 */
1647
1648static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1649 struct net *net, u32 pmtu, int ifindex)
1650{
1651 struct rt6_info *rt, *nrt;
1652 int allfrag = 0;
1653again:
1654 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1655 if (rt == NULL)
1656 return;
1657
1658 if (rt6_check_expired(rt)) {
1825 ip6_del_rt(rt); 1659 ip6_del_rt(rt);
1660 goto again;
1826 } 1661 }
1827 1662
1663 if (pmtu >= dst_mtu(&rt->dst))
1664 goto out;
1665
1666 if (pmtu < IPV6_MIN_MTU) {
1667 /*
1668 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1669 * MTU (1280) and a fragment header should always be included
1670 * after a node receiving Too Big message reporting PMTU is
1671 * less than the IPv6 Minimum Link MTU.
1672 */
1673 pmtu = IPV6_MIN_MTU;
1674 allfrag = 1;
1675 }
1676
1677 /* New mtu received -> path was valid.
1678 They are sent only in response to data packets,
1679 so that this nexthop apparently is reachable. --ANK
1680 */
1681 dst_confirm(&rt->dst);
1682
1683 /* Host route. If it is static, it would be better
1684 not to override it, but add new one, so that
1685 when cache entry will expire old pmtu
1686 would return automatically.
1687 */
1688 if (rt->rt6i_flags & RTF_CACHE) {
1689 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1690 if (allfrag) {
1691 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1692 features |= RTAX_FEATURE_ALLFRAG;
1693 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1694 }
1695 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1696 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1697 goto out;
1698 }
1699
1700 /* Network route.
1701 Two cases are possible:
1702 1. It is connected route. Action: COW
1703 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1704 */
1705 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1706 nrt = rt6_alloc_cow(rt, daddr, saddr);
1707 else
1708 nrt = rt6_alloc_clone(rt, daddr);
1709
1710 if (nrt) {
1711 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1712 if (allfrag) {
1713 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1714 features |= RTAX_FEATURE_ALLFRAG;
1715 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1716 }
1717
1718 /* According to RFC 1981, detecting PMTU increase shouldn't be
1719 * happened within 5 mins, the recommended timer is 10 mins.
1720 * Here this route expiration time is set to ip6_rt_mtu_expires
1721 * which is 10 mins. After 10 mins the decreased pmtu is expired
1722 * and detecting PMTU increase will be automatically happened.
1723 */
1724 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1725 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1726
1727 ip6_ins_rt(nrt);
1728 }
1828out: 1729out:
1829 neigh_release(neigh); 1730 dst_release(&rt->dst);
1731}
1732
1733void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1734 struct net_device *dev, u32 pmtu)
1735{
1736 struct net *net = dev_net(dev);
1737
1738 /*
1739 * RFC 1981 states that a node "MUST reduce the size of the packets it
1740 * is sending along the path" that caused the Packet Too Big message.
1741 * Since it's not possible in the general case to determine which
1742 * interface was used to send the original packet, we update the MTU
1743 * on the interface that will be used to send future packets. We also
1744 * update the MTU on the interface that received the Packet Too Big in
1745 * case the original packet was forced out that interface with
1746 * SO_BINDTODEVICE or similar. This is the next best thing to the
1747 * correct behaviour, which would be to update the MTU on all
1748 * interfaces.
1749 */
1750 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1751 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1830} 1752}
1831 1753
1832/* 1754/*
1833 * Misc support functions 1755 * Misc support functions
1834 */ 1756 */
1835 1757
1836static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 1758static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1837 const struct in6_addr *dest) 1759 const struct in6_addr *dest)
1838{ 1760{
1839 struct net *net = dev_net(ort->dst.dev); 1761 struct net *net = dev_net(ort->rt6i_dev);
1840 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, 1762 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1841 ort->rt6i_table); 1763 ort->dst.dev, 0);
1842 1764
1843 if (rt) { 1765 if (rt) {
1844 rt->dst.input = ort->dst.input; 1766 rt->dst.input = ort->dst.input;
1845 rt->dst.output = ort->dst.output; 1767 rt->dst.output = ort->dst.output;
1846 rt->dst.flags |= DST_HOST; 1768 rt->dst.flags |= DST_HOST;
1847 1769
1848 rt->rt6i_dst.addr = *dest; 1770 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1849 rt->rt6i_dst.plen = 128; 1771 rt->rt6i_dst.plen = 128;
1850 dst_copy_metrics(&rt->dst, &ort->dst); 1772 dst_copy_metrics(&rt->dst, &ort->dst);
1851 rt->dst.error = ort->dst.error; 1773 rt->dst.error = ort->dst.error;
@@ -1853,14 +1775,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1853 if (rt->rt6i_idev) 1775 if (rt->rt6i_idev)
1854 in6_dev_hold(rt->rt6i_idev); 1776 in6_dev_hold(rt->rt6i_idev);
1855 rt->dst.lastuse = jiffies; 1777 rt->dst.lastuse = jiffies;
1778 rt->rt6i_expires = 0;
1856 1779
1857 rt->rt6i_gateway = ort->rt6i_gateway; 1780 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1858 rt->rt6i_flags = ort->rt6i_flags; 1781 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1859 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1860 (RTF_DEFAULT | RTF_ADDRCONF))
1861 rt6_set_from(rt, ort);
1862 else
1863 rt6_clean_expires(rt);
1864 rt->rt6i_metric = 0; 1782 rt->rt6i_metric = 0;
1865 1783
1866#ifdef CONFIG_IPV6_SUBTREES 1784#ifdef CONFIG_IPV6_SUBTREES
@@ -1882,16 +1800,16 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1882 struct fib6_table *table; 1800 struct fib6_table *table;
1883 1801
1884 table = fib6_get_table(net, RT6_TABLE_INFO); 1802 table = fib6_get_table(net, RT6_TABLE_INFO);
1885 if (!table) 1803 if (table == NULL)
1886 return NULL; 1804 return NULL;
1887 1805
1888 read_lock_bh(&table->tb6_lock); 1806 write_lock_bh(&table->tb6_lock);
1889 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1807 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1890 if (!fn) 1808 if (!fn)
1891 goto out; 1809 goto out;
1892 1810
1893 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1811 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1894 if (rt->dst.dev->ifindex != ifindex) 1812 if (rt->rt6i_dev->ifindex != ifindex)
1895 continue; 1813 continue;
1896 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1814 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1897 continue; 1815 continue;
@@ -1901,14 +1819,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1901 break; 1819 break;
1902 } 1820 }
1903out: 1821out:
1904 read_unlock_bh(&table->tb6_lock); 1822 write_unlock_bh(&table->tb6_lock);
1905 return rt; 1823 return rt;
1906} 1824}
1907 1825
1908static struct rt6_info *rt6_add_route_info(struct net *net, 1826static struct rt6_info *rt6_add_route_info(struct net *net,
1909 const struct in6_addr *prefix, int prefixlen, 1827 const struct in6_addr *prefix, int prefixlen,
1910 const struct in6_addr *gwaddr, int ifindex, 1828 const struct in6_addr *gwaddr, int ifindex,
1911 unsigned int pref) 1829 unsigned pref)
1912{ 1830{
1913 struct fib6_config cfg = { 1831 struct fib6_config cfg = {
1914 .fc_table = RT6_TABLE_INFO, 1832 .fc_table = RT6_TABLE_INFO,
@@ -1917,13 +1835,13 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
1917 .fc_dst_len = prefixlen, 1835 .fc_dst_len = prefixlen,
1918 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1836 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1919 RTF_UP | RTF_PREF(pref), 1837 RTF_UP | RTF_PREF(pref),
1920 .fc_nlinfo.portid = 0, 1838 .fc_nlinfo.pid = 0,
1921 .fc_nlinfo.nlh = NULL, 1839 .fc_nlinfo.nlh = NULL,
1922 .fc_nlinfo.nl_net = net, 1840 .fc_nlinfo.nl_net = net,
1923 }; 1841 };
1924 1842
1925 cfg.fc_dst = *prefix; 1843 ipv6_addr_copy(&cfg.fc_dst, prefix);
1926 cfg.fc_gateway = *gwaddr; 1844 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1927 1845
1928 /* We should treat it as a default route if prefix length is 0. */ 1846 /* We should treat it as a default route if prefix length is 0. */
1929 if (!prefixlen) 1847 if (!prefixlen)
@@ -1941,19 +1859,19 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
1941 struct fib6_table *table; 1859 struct fib6_table *table;
1942 1860
1943 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1861 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1944 if (!table) 1862 if (table == NULL)
1945 return NULL; 1863 return NULL;
1946 1864
1947 read_lock_bh(&table->tb6_lock); 1865 write_lock_bh(&table->tb6_lock);
1948 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1866 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1949 if (dev == rt->dst.dev && 1867 if (dev == rt->rt6i_dev &&
1950 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1868 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1951 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1869 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1952 break; 1870 break;
1953 } 1871 }
1954 if (rt) 1872 if (rt)
1955 dst_hold(&rt->dst); 1873 dst_hold(&rt->dst);
1956 read_unlock_bh(&table->tb6_lock); 1874 write_unlock_bh(&table->tb6_lock);
1957 return rt; 1875 return rt;
1958} 1876}
1959 1877
@@ -1967,12 +1885,12 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1967 .fc_ifindex = dev->ifindex, 1885 .fc_ifindex = dev->ifindex,
1968 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1886 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1969 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1887 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1970 .fc_nlinfo.portid = 0, 1888 .fc_nlinfo.pid = 0,
1971 .fc_nlinfo.nlh = NULL, 1889 .fc_nlinfo.nlh = NULL,
1972 .fc_nlinfo.nl_net = dev_net(dev), 1890 .fc_nlinfo.nl_net = dev_net(dev),
1973 }; 1891 };
1974 1892
1975 cfg.fc_gateway = *gwaddr; 1893 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1976 1894
1977 ip6_route_add(&cfg); 1895 ip6_route_add(&cfg);
1978 1896
@@ -1986,7 +1904,7 @@ void rt6_purge_dflt_routers(struct net *net)
1986 1904
1987 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1905 /* NOTE: Keep consistent with rt6_get_dflt_router */
1988 table = fib6_get_table(net, RT6_TABLE_DFLT); 1906 table = fib6_get_table(net, RT6_TABLE_DFLT);
1989 if (!table) 1907 if (table == NULL)
1990 return; 1908 return;
1991 1909
1992restart: 1910restart:
@@ -2018,9 +1936,9 @@ static void rtmsg_to_fib6_config(struct net *net,
2018 1936
2019 cfg->fc_nlinfo.nl_net = net; 1937 cfg->fc_nlinfo.nl_net = net;
2020 1938
2021 cfg->fc_dst = rtmsg->rtmsg_dst; 1939 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
2022 cfg->fc_src = rtmsg->rtmsg_src; 1940 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
2023 cfg->fc_gateway = rtmsg->rtmsg_gateway; 1941 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
2024} 1942}
2025 1943
2026int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1944int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -2032,7 +1950,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2032 switch(cmd) { 1950 switch(cmd) {
2033 case SIOCADDRT: /* Add a route */ 1951 case SIOCADDRT: /* Add a route */
2034 case SIOCDELRT: /* Delete a route */ 1952 case SIOCDELRT: /* Delete a route */
2035 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1953 if (!capable(CAP_NET_ADMIN))
2036 return -EPERM; 1954 return -EPERM;
2037 err = copy_from_user(&rtmsg, arg, 1955 err = copy_from_user(&rtmsg, arg,
2038 sizeof(struct in6_rtmsg)); 1956 sizeof(struct in6_rtmsg));
@@ -2119,14 +2037,17 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2119 2037
2120struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2038struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2121 const struct in6_addr *addr, 2039 const struct in6_addr *addr,
2122 bool anycast) 2040 int anycast)
2123{ 2041{
2124 struct net *net = dev_net(idev->dev); 2042 struct net *net = dev_net(idev->dev);
2125 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); 2043 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2126 int err; 2044 net->loopback_dev, 0);
2045 struct neighbour *neigh;
2127 2046
2128 if (!rt) { 2047 if (rt == NULL) {
2129 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); 2048 if (net_ratelimit())
2049 pr_warning("IPv6: Maximum number of routes reached,"
2050 " consider increasing route/max_size.\n");
2130 return ERR_PTR(-ENOMEM); 2051 return ERR_PTR(-ENOMEM);
2131 } 2052 }
2132 2053
@@ -2136,19 +2057,22 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2136 rt->dst.input = ip6_input; 2057 rt->dst.input = ip6_input;
2137 rt->dst.output = ip6_output; 2058 rt->dst.output = ip6_output;
2138 rt->rt6i_idev = idev; 2059 rt->rt6i_idev = idev;
2060 rt->dst.obsolete = -1;
2139 2061
2140 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2062 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2141 if (anycast) 2063 if (anycast)
2142 rt->rt6i_flags |= RTF_ANYCAST; 2064 rt->rt6i_flags |= RTF_ANYCAST;
2143 else 2065 else
2144 rt->rt6i_flags |= RTF_LOCAL; 2066 rt->rt6i_flags |= RTF_LOCAL;
2145 err = rt6_bind_neighbour(rt, rt->dst.dev); 2067 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2146 if (err) { 2068 if (IS_ERR(neigh)) {
2147 dst_free(&rt->dst); 2069 dst_free(&rt->dst);
2148 return ERR_PTR(err); 2070
2071 return ERR_CAST(neigh);
2149 } 2072 }
2073 dst_set_neighbour(&rt->dst, neigh);
2150 2074
2151 rt->rt6i_dst.addr = *addr; 2075 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2152 rt->rt6i_dst.plen = 128; 2076 rt->rt6i_dst.plen = 128;
2153 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2077 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2154 2078
@@ -2166,7 +2090,7 @@ int ip6_route_get_saddr(struct net *net,
2166 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2090 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2167 int err = 0; 2091 int err = 0;
2168 if (rt->rt6i_prefsrc.plen) 2092 if (rt->rt6i_prefsrc.plen)
2169 *saddr = rt->rt6i_prefsrc.addr; 2093 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2170 else 2094 else
2171 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2095 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2172 daddr, prefs, saddr); 2096 daddr, prefs, saddr);
@@ -2186,7 +2110,7 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2186 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2110 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2187 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2111 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2188 2112
2189 if (((void *)rt->dst.dev == dev || !dev) && 2113 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2190 rt != net->ipv6.ip6_null_entry && 2114 rt != net->ipv6.ip6_null_entry &&
2191 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2115 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2192 /* remove prefsrc entry */ 2116 /* remove prefsrc entry */
@@ -2216,10 +2140,11 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
2216 const struct arg_dev_net *adn = arg; 2140 const struct arg_dev_net *adn = arg;
2217 const struct net_device *dev = adn->dev; 2141 const struct net_device *dev = adn->dev;
2218 2142
2219 if ((rt->dst.dev == dev || !dev) && 2143 if ((rt->rt6i_dev == dev || dev == NULL) &&
2220 rt != adn->net->ipv6.ip6_null_entry) 2144 rt != adn->net->ipv6.ip6_null_entry) {
2145 RT6_TRACE("deleted by ifdown %p\n", rt);
2221 return -1; 2146 return -1;
2222 2147 }
2223 return 0; 2148 return 0;
2224} 2149}
2225 2150
@@ -2234,9 +2159,10 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
2234 icmp6_clean_all(fib6_ifdown, &adn); 2159 icmp6_clean_all(fib6_ifdown, &adn);
2235} 2160}
2236 2161
2237struct rt6_mtu_change_arg { 2162struct rt6_mtu_change_arg
2163{
2238 struct net_device *dev; 2164 struct net_device *dev;
2239 unsigned int mtu; 2165 unsigned mtu;
2240}; 2166};
2241 2167
2242static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2168static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
@@ -2251,7 +2177,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2251 */ 2177 */
2252 2178
2253 idev = __in6_dev_get(arg->dev); 2179 idev = __in6_dev_get(arg->dev);
2254 if (!idev) 2180 if (idev == NULL)
2255 return 0; 2181 return 0;
2256 2182
2257 /* For administrative MTU increase, there is no way to discover 2183 /* For administrative MTU increase, there is no way to discover
@@ -2268,7 +2194,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2268 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2194 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2269 PMTU discouvery. 2195 PMTU discouvery.
2270 */ 2196 */
2271 if (rt->dst.dev == arg->dev && 2197 if (rt->rt6i_dev == arg->dev &&
2272 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2198 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2273 (dst_mtu(&rt->dst) >= arg->mtu || 2199 (dst_mtu(&rt->dst) >= arg->mtu ||
2274 (dst_mtu(&rt->dst) < arg->mtu && 2200 (dst_mtu(&rt->dst) < arg->mtu &&
@@ -2278,7 +2204,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2278 return 0; 2204 return 0;
2279} 2205}
2280 2206
2281void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2207void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2282{ 2208{
2283 struct rt6_mtu_change_arg arg = { 2209 struct rt6_mtu_change_arg arg = {
2284 .dev = dev, 2210 .dev = dev,
@@ -2294,7 +2220,6 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2294 [RTA_IIF] = { .type = NLA_U32 }, 2220 [RTA_IIF] = { .type = NLA_U32 },
2295 [RTA_PRIORITY] = { .type = NLA_U32 }, 2221 [RTA_PRIORITY] = { .type = NLA_U32 },
2296 [RTA_METRICS] = { .type = NLA_NESTED }, 2222 [RTA_METRICS] = { .type = NLA_NESTED },
2297 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2298}; 2223};
2299 2224
2300static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2225static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2317,18 +2242,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2317 cfg->fc_src_len = rtm->rtm_src_len; 2242 cfg->fc_src_len = rtm->rtm_src_len;
2318 cfg->fc_flags = RTF_UP; 2243 cfg->fc_flags = RTF_UP;
2319 cfg->fc_protocol = rtm->rtm_protocol; 2244 cfg->fc_protocol = rtm->rtm_protocol;
2320 cfg->fc_type = rtm->rtm_type;
2321 2245
2322 if (rtm->rtm_type == RTN_UNREACHABLE || 2246 if (rtm->rtm_type == RTN_UNREACHABLE)
2323 rtm->rtm_type == RTN_BLACKHOLE ||
2324 rtm->rtm_type == RTN_PROHIBIT ||
2325 rtm->rtm_type == RTN_THROW)
2326 cfg->fc_flags |= RTF_REJECT; 2247 cfg->fc_flags |= RTF_REJECT;
2327 2248
2328 if (rtm->rtm_type == RTN_LOCAL) 2249 if (rtm->rtm_type == RTN_LOCAL)
2329 cfg->fc_flags |= RTF_LOCAL; 2250 cfg->fc_flags |= RTF_LOCAL;
2330 2251
2331 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2252 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2332 cfg->fc_nlinfo.nlh = nlh; 2253 cfg->fc_nlinfo.nlh = nlh;
2333 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2254 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2334 2255
@@ -2372,71 +2293,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2372 if (tb[RTA_TABLE]) 2293 if (tb[RTA_TABLE])
2373 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2294 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2374 2295
2375 if (tb[RTA_MULTIPATH]) {
2376 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2377 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2378 }
2379
2380 err = 0; 2296 err = 0;
2381errout: 2297errout:
2382 return err; 2298 return err;
2383} 2299}
2384 2300
2385static int ip6_route_multipath(struct fib6_config *cfg, int add)
2386{
2387 struct fib6_config r_cfg;
2388 struct rtnexthop *rtnh;
2389 int remaining;
2390 int attrlen;
2391 int err = 0, last_err = 0;
2392
2393beginning:
2394 rtnh = (struct rtnexthop *)cfg->fc_mp;
2395 remaining = cfg->fc_mp_len;
2396
2397 /* Parse a Multipath Entry */
2398 while (rtnh_ok(rtnh, remaining)) {
2399 memcpy(&r_cfg, cfg, sizeof(*cfg));
2400 if (rtnh->rtnh_ifindex)
2401 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2402
2403 attrlen = rtnh_attrlen(rtnh);
2404 if (attrlen > 0) {
2405 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2406
2407 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2408 if (nla) {
2409 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2410 r_cfg.fc_flags |= RTF_GATEWAY;
2411 }
2412 }
2413 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2414 if (err) {
2415 last_err = err;
2416 /* If we are trying to remove a route, do not stop the
2417 * loop when ip6_route_del() fails (because next hop is
2418 * already gone), we should try to remove all next hops.
2419 */
2420 if (add) {
2421 /* If add fails, we should try to delete all
2422 * next hops that have been already added.
2423 */
2424 add = 0;
2425 goto beginning;
2426 }
2427 }
2428 /* Because each route is added like a single route we remove
2429 * this flag after the first nexthop (if there is a collision,
2430 * we have already fail to add the first nexthop:
2431 * fib6_add_rt2node() has reject it).
2432 */
2433 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2434 rtnh = rtnh_next(rtnh, &remaining);
2435 }
2436
2437 return last_err;
2438}
2439
2440static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2301static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2441{ 2302{
2442 struct fib6_config cfg; 2303 struct fib6_config cfg;
@@ -2446,10 +2307,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2446 if (err < 0) 2307 if (err < 0)
2447 return err; 2308 return err;
2448 2309
2449 if (cfg.fc_mp) 2310 return ip6_route_del(&cfg);
2450 return ip6_route_multipath(&cfg, 0);
2451 else
2452 return ip6_route_del(&cfg);
2453} 2311}
2454 2312
2455static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2313static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2461,10 +2319,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2461 if (err < 0) 2319 if (err < 0)
2462 return err; 2320 return err;
2463 2321
2464 if (cfg.fc_mp) 2322 return ip6_route_add(&cfg);
2465 return ip6_route_multipath(&cfg, 1);
2466 else
2467 return ip6_route_add(&cfg);
2468} 2323}
2469 2324
2470static inline size_t rt6_nlmsg_size(void) 2325static inline size_t rt6_nlmsg_size(void)
@@ -2485,7 +2340,7 @@ static inline size_t rt6_nlmsg_size(void)
2485static int rt6_fill_node(struct net *net, 2340static int rt6_fill_node(struct net *net,
2486 struct sk_buff *skb, struct rt6_info *rt, 2341 struct sk_buff *skb, struct rt6_info *rt,
2487 struct in6_addr *dst, struct in6_addr *src, 2342 struct in6_addr *dst, struct in6_addr *src,
2488 int iif, int type, u32 portid, u32 seq, 2343 int iif, int type, u32 pid, u32 seq,
2489 int prefix, int nowait, unsigned int flags) 2344 int prefix, int nowait, unsigned int flags)
2490{ 2345{
2491 struct rtmsg *rtm; 2346 struct rtmsg *rtm;
@@ -2501,8 +2356,8 @@ static int rt6_fill_node(struct net *net,
2501 } 2356 }
2502 } 2357 }
2503 2358
2504 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 2359 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2505 if (!nlh) 2360 if (nlh == NULL)
2506 return -EMSGSIZE; 2361 return -EMSGSIZE;
2507 2362
2508 rtm = nlmsg_data(nlh); 2363 rtm = nlmsg_data(nlh);
@@ -2515,60 +2370,39 @@ static int rt6_fill_node(struct net *net,
2515 else 2370 else
2516 table = RT6_TABLE_UNSPEC; 2371 table = RT6_TABLE_UNSPEC;
2517 rtm->rtm_table = table; 2372 rtm->rtm_table = table;
2518 if (nla_put_u32(skb, RTA_TABLE, table)) 2373 NLA_PUT_U32(skb, RTA_TABLE, table);
2519 goto nla_put_failure; 2374 if (rt->rt6i_flags&RTF_REJECT)
2520 if (rt->rt6i_flags & RTF_REJECT) { 2375 rtm->rtm_type = RTN_UNREACHABLE;
2521 switch (rt->dst.error) { 2376 else if (rt->rt6i_flags&RTF_LOCAL)
2522 case -EINVAL:
2523 rtm->rtm_type = RTN_BLACKHOLE;
2524 break;
2525 case -EACCES:
2526 rtm->rtm_type = RTN_PROHIBIT;
2527 break;
2528 case -EAGAIN:
2529 rtm->rtm_type = RTN_THROW;
2530 break;
2531 default:
2532 rtm->rtm_type = RTN_UNREACHABLE;
2533 break;
2534 }
2535 }
2536 else if (rt->rt6i_flags & RTF_LOCAL)
2537 rtm->rtm_type = RTN_LOCAL; 2377 rtm->rtm_type = RTN_LOCAL;
2538 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2378 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2539 rtm->rtm_type = RTN_LOCAL; 2379 rtm->rtm_type = RTN_LOCAL;
2540 else 2380 else
2541 rtm->rtm_type = RTN_UNICAST; 2381 rtm->rtm_type = RTN_UNICAST;
2542 rtm->rtm_flags = 0; 2382 rtm->rtm_flags = 0;
2543 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2383 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2544 rtm->rtm_protocol = rt->rt6i_protocol; 2384 rtm->rtm_protocol = rt->rt6i_protocol;
2545 if (rt->rt6i_flags & RTF_DYNAMIC) 2385 if (rt->rt6i_flags&RTF_DYNAMIC)
2546 rtm->rtm_protocol = RTPROT_REDIRECT; 2386 rtm->rtm_protocol = RTPROT_REDIRECT;
2547 else if (rt->rt6i_flags & RTF_ADDRCONF) { 2387 else if (rt->rt6i_flags & RTF_ADDRCONF)
2548 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 2388 rtm->rtm_protocol = RTPROT_KERNEL;
2549 rtm->rtm_protocol = RTPROT_RA; 2389 else if (rt->rt6i_flags&RTF_DEFAULT)
2550 else 2390 rtm->rtm_protocol = RTPROT_RA;
2551 rtm->rtm_protocol = RTPROT_KERNEL;
2552 }
2553 2391
2554 if (rt->rt6i_flags & RTF_CACHE) 2392 if (rt->rt6i_flags&RTF_CACHE)
2555 rtm->rtm_flags |= RTM_F_CLONED; 2393 rtm->rtm_flags |= RTM_F_CLONED;
2556 2394
2557 if (dst) { 2395 if (dst) {
2558 if (nla_put(skb, RTA_DST, 16, dst)) 2396 NLA_PUT(skb, RTA_DST, 16, dst);
2559 goto nla_put_failure;
2560 rtm->rtm_dst_len = 128; 2397 rtm->rtm_dst_len = 128;
2561 } else if (rtm->rtm_dst_len) 2398 } else if (rtm->rtm_dst_len)
2562 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) 2399 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2563 goto nla_put_failure;
2564#ifdef CONFIG_IPV6_SUBTREES 2400#ifdef CONFIG_IPV6_SUBTREES
2565 if (src) { 2401 if (src) {
2566 if (nla_put(skb, RTA_SRC, 16, src)) 2402 NLA_PUT(skb, RTA_SRC, 16, src);
2567 goto nla_put_failure;
2568 rtm->rtm_src_len = 128; 2403 rtm->rtm_src_len = 128;
2569 } else if (rtm->rtm_src_len && 2404 } else if (rtm->rtm_src_len)
2570 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) 2405 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2571 goto nla_put_failure;
2572#endif 2406#endif
2573 if (iif) { 2407 if (iif) {
2574#ifdef CONFIG_IPV6_MROUTE 2408#ifdef CONFIG_IPV6_MROUTE
@@ -2586,40 +2420,42 @@ static int rt6_fill_node(struct net *net,
2586 } 2420 }
2587 } else 2421 } else
2588#endif 2422#endif
2589 if (nla_put_u32(skb, RTA_IIF, iif)) 2423 NLA_PUT_U32(skb, RTA_IIF, iif);
2590 goto nla_put_failure;
2591 } else if (dst) { 2424 } else if (dst) {
2592 struct in6_addr saddr_buf; 2425 struct in6_addr saddr_buf;
2593 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2426 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2594 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2427 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2595 goto nla_put_failure;
2596 } 2428 }
2597 2429
2598 if (rt->rt6i_prefsrc.plen) { 2430 if (rt->rt6i_prefsrc.plen) {
2599 struct in6_addr saddr_buf; 2431 struct in6_addr saddr_buf;
2600 saddr_buf = rt->rt6i_prefsrc.addr; 2432 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2601 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2433 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2602 goto nla_put_failure;
2603 } 2434 }
2604 2435
2605 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2436 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2606 goto nla_put_failure; 2437 goto nla_put_failure;
2607 2438
2608 n = rt->n; 2439 rcu_read_lock();
2609 if (n) { 2440 n = dst_get_neighbour(&rt->dst);
2610 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) 2441 if (n)
2611 goto nla_put_failure; 2442 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2612 } 2443 rcu_read_unlock();
2613 2444
2614 if (rt->dst.dev && 2445 if (rt->dst.dev)
2615 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2446 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2616 goto nla_put_failure; 2447
2617 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2448 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2618 goto nla_put_failure;
2619 2449
2620 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 2450 if (!(rt->rt6i_flags & RTF_EXPIRES))
2451 expires = 0;
2452 else if (rt->rt6i_expires - jiffies < INT_MAX)
2453 expires = rt->rt6i_expires - jiffies;
2454 else
2455 expires = INT_MAX;
2621 2456
2622 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2457 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2458 expires, rt->dst.error) < 0)
2623 goto nla_put_failure; 2459 goto nla_put_failure;
2624 2460
2625 return nlmsg_end(skb, nlh); 2461 return nlmsg_end(skb, nlh);
@@ -2642,7 +2478,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2642 2478
2643 return rt6_fill_node(arg->net, 2479 return rt6_fill_node(arg->net,
2644 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2480 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2645 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 2481 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2646 prefix, 0, NLM_F_MULTI); 2482 prefix, 0, NLM_F_MULTI);
2647} 2483}
2648 2484
@@ -2654,7 +2490,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2654 struct sk_buff *skb; 2490 struct sk_buff *skb;
2655 struct rtmsg *rtm; 2491 struct rtmsg *rtm;
2656 struct flowi6 fl6; 2492 struct flowi6 fl6;
2657 int err, iif = 0, oif = 0; 2493 int err, iif = 0;
2658 2494
2659 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2495 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2660 if (err < 0) 2496 if (err < 0)
@@ -2667,48 +2503,33 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2667 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2503 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2668 goto errout; 2504 goto errout;
2669 2505
2670 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 2506 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2671 } 2507 }
2672 2508
2673 if (tb[RTA_DST]) { 2509 if (tb[RTA_DST]) {
2674 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2510 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2675 goto errout; 2511 goto errout;
2676 2512
2677 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 2513 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2678 } 2514 }
2679 2515
2680 if (tb[RTA_IIF]) 2516 if (tb[RTA_IIF])
2681 iif = nla_get_u32(tb[RTA_IIF]); 2517 iif = nla_get_u32(tb[RTA_IIF]);
2682 2518
2683 if (tb[RTA_OIF]) 2519 if (tb[RTA_OIF])
2684 oif = nla_get_u32(tb[RTA_OIF]); 2520 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2685 2521
2686 if (iif) { 2522 if (iif) {
2687 struct net_device *dev; 2523 struct net_device *dev;
2688 int flags = 0;
2689
2690 dev = __dev_get_by_index(net, iif); 2524 dev = __dev_get_by_index(net, iif);
2691 if (!dev) { 2525 if (!dev) {
2692 err = -ENODEV; 2526 err = -ENODEV;
2693 goto errout; 2527 goto errout;
2694 } 2528 }
2695
2696 fl6.flowi6_iif = iif;
2697
2698 if (!ipv6_addr_any(&fl6.saddr))
2699 flags |= RT6_LOOKUP_F_HAS_SADDR;
2700
2701 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2702 flags);
2703 } else {
2704 fl6.flowi6_oif = oif;
2705
2706 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2707 } 2529 }
2708 2530
2709 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2531 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2710 if (!skb) { 2532 if (skb == NULL) {
2711 ip6_rt_put(rt);
2712 err = -ENOBUFS; 2533 err = -ENOBUFS;
2713 goto errout; 2534 goto errout;
2714 } 2535 }
@@ -2719,17 +2540,18 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2719 skb_reset_mac_header(skb); 2540 skb_reset_mac_header(skb);
2720 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2541 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2721 2542
2543 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2722 skb_dst_set(skb, &rt->dst); 2544 skb_dst_set(skb, &rt->dst);
2723 2545
2724 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2546 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2725 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 2547 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2726 nlh->nlmsg_seq, 0, 0, 0); 2548 nlh->nlmsg_seq, 0, 0, 0);
2727 if (err < 0) { 2549 if (err < 0) {
2728 kfree_skb(skb); 2550 kfree_skb(skb);
2729 goto errout; 2551 goto errout;
2730 } 2552 }
2731 2553
2732 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2554 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2733errout: 2555errout:
2734 return err; 2556 return err;
2735} 2557}
@@ -2742,21 +2564,21 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2742 int err; 2564 int err;
2743 2565
2744 err = -ENOBUFS; 2566 err = -ENOBUFS;
2745 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2567 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2746 2568
2747 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2569 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2748 if (!skb) 2570 if (skb == NULL)
2749 goto errout; 2571 goto errout;
2750 2572
2751 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2573 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2752 event, info->portid, seq, 0, 0, 0); 2574 event, info->pid, seq, 0, 0, 0);
2753 if (err < 0) { 2575 if (err < 0) {
2754 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2576 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2755 WARN_ON(err == -EMSGSIZE); 2577 WARN_ON(err == -EMSGSIZE);
2756 kfree_skb(skb); 2578 kfree_skb(skb);
2757 goto errout; 2579 goto errout;
2758 } 2580 }
2759 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 2581 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2760 info->nlh, gfp_any()); 2582 info->nlh, gfp_any());
2761 return; 2583 return;
2762errout: 2584errout:
@@ -2811,23 +2633,25 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2811#else 2633#else
2812 seq_puts(m, "00000000000000000000000000000000 00 "); 2634 seq_puts(m, "00000000000000000000000000000000 00 ");
2813#endif 2635#endif
2814 n = rt->n; 2636 rcu_read_lock();
2637 n = dst_get_neighbour(&rt->dst);
2815 if (n) { 2638 if (n) {
2816 seq_printf(m, "%pi6", n->primary_key); 2639 seq_printf(m, "%pi6", n->primary_key);
2817 } else { 2640 } else {
2818 seq_puts(m, "00000000000000000000000000000000"); 2641 seq_puts(m, "00000000000000000000000000000000");
2819 } 2642 }
2643 rcu_read_unlock();
2820 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2644 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2821 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2645 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2822 rt->dst.__use, rt->rt6i_flags, 2646 rt->dst.__use, rt->rt6i_flags,
2823 rt->dst.dev ? rt->dst.dev->name : ""); 2647 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2824 return 0; 2648 return 0;
2825} 2649}
2826 2650
2827static int ipv6_route_show(struct seq_file *m, void *v) 2651static int ipv6_route_show(struct seq_file *m, void *v)
2828{ 2652{
2829 struct net *net = (struct net *)m->private; 2653 struct net *net = (struct net *)m->private;
2830 fib6_clean_all_ro(net, rt6_info_route, 0, m); 2654 fib6_clean_all(net, rt6_info_route, 0, m);
2831 return 0; 2655 return 0;
2832} 2656}
2833 2657
@@ -2985,10 +2809,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2985 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2809 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2986 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2810 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2987 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2811 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2988
2989 /* Don't export sysctls to unprivileged users */
2990 if (net->user_ns != &init_user_ns)
2991 table[0].procname = NULL;
2992 } 2812 }
2993 2813
2994 return table; 2814 return table;
@@ -3049,6 +2869,10 @@ static int __net_init ip6_route_net_init(struct net *net)
3049 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2869 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3050 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2870 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3051 2871
2872#ifdef CONFIG_PROC_FS
2873 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2874 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2875#endif
3052 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2876 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3053 2877
3054 ret = 0; 2878 ret = 0;
@@ -3069,6 +2893,10 @@ out_ip6_dst_ops:
3069 2893
3070static void __net_exit ip6_route_net_exit(struct net *net) 2894static void __net_exit ip6_route_net_exit(struct net *net)
3071{ 2895{
2896#ifdef CONFIG_PROC_FS
2897 proc_net_remove(net, "ipv6_route");
2898 proc_net_remove(net, "rt6_stats");
2899#endif
3072 kfree(net->ipv6.ip6_null_entry); 2900 kfree(net->ipv6.ip6_null_entry);
3073#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2901#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3074 kfree(net->ipv6.ip6_prohibit_entry); 2902 kfree(net->ipv6.ip6_prohibit_entry);
@@ -3077,58 +2905,11 @@ static void __net_exit ip6_route_net_exit(struct net *net)
3077 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2905 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3078} 2906}
3079 2907
3080static int __net_init ip6_route_net_init_late(struct net *net)
3081{
3082#ifdef CONFIG_PROC_FS
3083 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3084 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3085#endif
3086 return 0;
3087}
3088
3089static void __net_exit ip6_route_net_exit_late(struct net *net)
3090{
3091#ifdef CONFIG_PROC_FS
3092 proc_net_remove(net, "ipv6_route");
3093 proc_net_remove(net, "rt6_stats");
3094#endif
3095}
3096
3097static struct pernet_operations ip6_route_net_ops = { 2908static struct pernet_operations ip6_route_net_ops = {
3098 .init = ip6_route_net_init, 2909 .init = ip6_route_net_init,
3099 .exit = ip6_route_net_exit, 2910 .exit = ip6_route_net_exit,
3100}; 2911};
3101 2912
3102static int __net_init ipv6_inetpeer_init(struct net *net)
3103{
3104 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3105
3106 if (!bp)
3107 return -ENOMEM;
3108 inet_peer_base_init(bp);
3109 net->ipv6.peers = bp;
3110 return 0;
3111}
3112
3113static void __net_exit ipv6_inetpeer_exit(struct net *net)
3114{
3115 struct inet_peer_base *bp = net->ipv6.peers;
3116
3117 net->ipv6.peers = NULL;
3118 inetpeer_invalidate_tree(bp);
3119 kfree(bp);
3120}
3121
3122static struct pernet_operations ipv6_inetpeer_ops = {
3123 .init = ipv6_inetpeer_init,
3124 .exit = ipv6_inetpeer_exit,
3125};
3126
3127static struct pernet_operations ip6_route_net_late_ops = {
3128 .init = ip6_route_net_init_late,
3129 .exit = ip6_route_net_exit_late,
3130};
3131
3132static struct notifier_block ip6_route_dev_notifier = { 2913static struct notifier_block ip6_route_dev_notifier = {
3133 .notifier_call = ip6_route_dev_notify, 2914 .notifier_call = ip6_route_dev_notify,
3134 .priority = 0, 2915 .priority = 0,
@@ -3149,13 +2930,9 @@ int __init ip6_route_init(void)
3149 if (ret) 2930 if (ret)
3150 goto out_kmem_cache; 2931 goto out_kmem_cache;
3151 2932
3152 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3153 if (ret)
3154 goto out_dst_entries;
3155
3156 ret = register_pernet_subsys(&ip6_route_net_ops); 2933 ret = register_pernet_subsys(&ip6_route_net_ops);
3157 if (ret) 2934 if (ret)
3158 goto out_register_inetpeer; 2935 goto out_dst_entries;
3159 2936
3160 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2937 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3161 2938
@@ -3182,25 +2959,19 @@ int __init ip6_route_init(void)
3182 if (ret) 2959 if (ret)
3183 goto xfrm6_init; 2960 goto xfrm6_init;
3184 2961
3185 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3186 if (ret)
3187 goto fib6_rules_init;
3188
3189 ret = -ENOBUFS; 2962 ret = -ENOBUFS;
3190 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 2963 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3191 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 2964 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3192 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 2965 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3193 goto out_register_late_subsys; 2966 goto fib6_rules_init;
3194 2967
3195 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2968 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3196 if (ret) 2969 if (ret)
3197 goto out_register_late_subsys; 2970 goto fib6_rules_init;
3198 2971
3199out: 2972out:
3200 return ret; 2973 return ret;
3201 2974
3202out_register_late_subsys:
3203 unregister_pernet_subsys(&ip6_route_net_late_ops);
3204fib6_rules_init: 2975fib6_rules_init:
3205 fib6_rules_cleanup(); 2976 fib6_rules_cleanup();
3206xfrm6_init: 2977xfrm6_init:
@@ -3209,8 +2980,6 @@ out_fib6_init:
3209 fib6_gc_cleanup(); 2980 fib6_gc_cleanup();
3210out_register_subsys: 2981out_register_subsys:
3211 unregister_pernet_subsys(&ip6_route_net_ops); 2982 unregister_pernet_subsys(&ip6_route_net_ops);
3212out_register_inetpeer:
3213 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3214out_dst_entries: 2983out_dst_entries:
3215 dst_entries_destroy(&ip6_dst_blackhole_ops); 2984 dst_entries_destroy(&ip6_dst_blackhole_ops);
3216out_kmem_cache: 2985out_kmem_cache:
@@ -3221,11 +2990,9 @@ out_kmem_cache:
3221void ip6_route_cleanup(void) 2990void ip6_route_cleanup(void)
3222{ 2991{
3223 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2992 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3224 unregister_pernet_subsys(&ip6_route_net_late_ops);
3225 fib6_rules_cleanup(); 2993 fib6_rules_cleanup();
3226 xfrm6_fini(); 2994 xfrm6_fini();
3227 fib6_gc_cleanup(); 2995 fib6_gc_cleanup();
3228 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3229 unregister_pernet_subsys(&ip6_route_net_ops); 2996 unregister_pernet_subsys(&ip6_route_net_ops);
3230 dst_entries_destroy(&ip6_dst_blackhole_ops); 2997 dst_entries_destroy(&ip6_dst_blackhole_ops);
3231 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2998 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cfba99b2c2a..c1e0d63db2e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -17,8 +17,6 @@
17 * Fred Templin <fred.l.templin@boeing.com>: isatap support 17 * Fred Templin <fred.l.templin@boeing.com>: isatap support
18 */ 18 */
19 19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/module.h> 20#include <linux/module.h>
23#include <linux/capability.h> 21#include <linux/capability.h>
24#include <linux/errno.h> 22#include <linux/errno.h>
@@ -65,14 +63,9 @@
65#define HASH_SIZE 16 63#define HASH_SIZE 16
66#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
67 65
68static bool log_ecn_error = true;
69module_param(log_ecn_error, bool, 0644);
70MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
71
72static int ipip6_tunnel_init(struct net_device *dev); 66static int ipip6_tunnel_init(struct net_device *dev);
73static void ipip6_tunnel_setup(struct net_device *dev); 67static void ipip6_tunnel_setup(struct net_device *dev);
74static void ipip6_dev_free(struct net_device *dev); 68static void ipip6_dev_free(struct net_device *dev);
75static struct rtnl_link_ops sit_link_ops __read_mostly;
76 69
77static int sit_net_id __read_mostly; 70static int sit_net_id __read_mostly;
78struct sit_net { 71struct sit_net {
@@ -85,45 +78,44 @@ struct sit_net {
85 struct net_device *fb_tunnel_dev; 78 struct net_device *fb_tunnel_dev;
86}; 79};
87 80
88static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, 81/*
89 struct rtnl_link_stats64 *tot) 82 * Locking : hash tables are protected by RCU and RTNL
83 */
84
85#define for_each_ip_tunnel_rcu(start) \
86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
87
88/* often modified stats are per cpu, other are shared (netdev->stats) */
89struct pcpu_tstats {
90 unsigned long rx_packets;
91 unsigned long rx_bytes;
92 unsigned long tx_packets;
93 unsigned long tx_bytes;
94};
95
96static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
90{ 97{
98 struct pcpu_tstats sum = { 0 };
91 int i; 99 int i;
92 100
93 for_each_possible_cpu(i) { 101 for_each_possible_cpu(i) {
94 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 102 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
95 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
96 unsigned int start;
97
98 do {
99 start = u64_stats_fetch_begin_bh(&tstats->syncp);
100 rx_packets = tstats->rx_packets;
101 tx_packets = tstats->tx_packets;
102 rx_bytes = tstats->rx_bytes;
103 tx_bytes = tstats->tx_bytes;
104 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
105
106 tot->rx_packets += rx_packets;
107 tot->tx_packets += tx_packets;
108 tot->rx_bytes += rx_bytes;
109 tot->tx_bytes += tx_bytes;
110 }
111
112 tot->rx_errors = dev->stats.rx_errors;
113 tot->rx_frame_errors = dev->stats.rx_frame_errors;
114 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
115 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
116 tot->tx_dropped = dev->stats.tx_dropped;
117 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
118 tot->tx_errors = dev->stats.tx_errors;
119 103
120 return tot; 104 sum.rx_packets += tstats->rx_packets;
105 sum.rx_bytes += tstats->rx_bytes;
106 sum.tx_packets += tstats->tx_packets;
107 sum.tx_bytes += tstats->tx_bytes;
108 }
109 dev->stats.rx_packets = sum.rx_packets;
110 dev->stats.rx_bytes = sum.rx_bytes;
111 dev->stats.tx_packets = sum.tx_packets;
112 dev->stats.tx_bytes = sum.tx_bytes;
113 return &dev->stats;
121} 114}
122
123/* 115/*
124 * Must be invoked with rcu_read_lock 116 * Must be invoked with rcu_read_lock
125 */ 117 */
126static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, 118static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
127 struct net_device *dev, __be32 remote, __be32 local) 119 struct net_device *dev, __be32 remote, __be32 local)
128{ 120{
129 unsigned int h0 = HASH(remote); 121 unsigned int h0 = HASH(remote);
@@ -131,20 +123,20 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
131 struct ip_tunnel *t; 123 struct ip_tunnel *t;
132 struct sit_net *sitn = net_generic(net, sit_net_id); 124 struct sit_net *sitn = net_generic(net, sit_net_id);
133 125
134 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { 126 for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) {
135 if (local == t->parms.iph.saddr && 127 if (local == t->parms.iph.saddr &&
136 remote == t->parms.iph.daddr && 128 remote == t->parms.iph.daddr &&
137 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 129 (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
138 (t->dev->flags & IFF_UP)) 130 (t->dev->flags & IFF_UP))
139 return t; 131 return t;
140 } 132 }
141 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { 133 for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) {
142 if (remote == t->parms.iph.daddr && 134 if (remote == t->parms.iph.daddr &&
143 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 135 (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
144 (t->dev->flags & IFF_UP)) 136 (t->dev->flags & IFF_UP))
145 return t; 137 return t;
146 } 138 }
147 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { 139 for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) {
148 if (local == t->parms.iph.saddr && 140 if (local == t->parms.iph.saddr &&
149 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 141 (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
150 (t->dev->flags & IFF_UP)) 142 (t->dev->flags & IFF_UP))
@@ -221,37 +213,6 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
221#endif 213#endif
222} 214}
223 215
224static int ipip6_tunnel_create(struct net_device *dev)
225{
226 struct ip_tunnel *t = netdev_priv(dev);
227 struct net *net = dev_net(dev);
228 struct sit_net *sitn = net_generic(net, sit_net_id);
229 int err;
230
231 err = ipip6_tunnel_init(dev);
232 if (err < 0)
233 goto out;
234 ipip6_tunnel_clone_6rd(dev, sitn);
235
236 if ((__force u16)t->parms.i_flags & SIT_ISATAP)
237 dev->priv_flags |= IFF_ISATAP;
238
239 err = register_netdevice(dev);
240 if (err < 0)
241 goto out;
242
243 strcpy(t->parms.name, dev->name);
244 dev->rtnl_link_ops = &sit_link_ops;
245
246 dev_hold(dev);
247
248 ipip6_tunnel_link(sitn, t);
249 return 0;
250
251out:
252 return err;
253}
254
255static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, 216static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
256 struct ip_tunnel_parm *parms, int create) 217 struct ip_tunnel_parm *parms, int create)
257{ 218{
@@ -292,9 +253,21 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
292 nt = netdev_priv(dev); 253 nt = netdev_priv(dev);
293 254
294 nt->parms = *parms; 255 nt->parms = *parms;
295 if (ipip6_tunnel_create(dev) < 0) 256 if (ipip6_tunnel_init(dev) < 0)
257 goto failed_free;
258 ipip6_tunnel_clone_6rd(dev, sitn);
259
260 if (parms->i_flags & SIT_ISATAP)
261 dev->priv_flags |= IFF_ISATAP;
262
263 if (register_netdevice(dev) < 0)
296 goto failed_free; 264 goto failed_free;
297 265
266 strcpy(nt->parms.name, dev->name);
267
268 dev_hold(dev);
269
270 ipip6_tunnel_link(sitn, nt);
298 return nt; 271 return nt;
299 272
300failed_free: 273failed_free:
@@ -503,7 +476,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
503 struct sit_net *sitn = net_generic(net, sit_net_id); 476 struct sit_net *sitn = net_generic(net, sit_net_id);
504 477
505 if (dev == sitn->fb_tunnel_dev) { 478 if (dev == sitn->fb_tunnel_dev) {
506 RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); 479 rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
507 } else { 480 } else {
508 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 481 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
509 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 482 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
@@ -536,6 +509,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
536 case ICMP_PORT_UNREACH: 509 case ICMP_PORT_UNREACH:
537 /* Impossible event. */ 510 /* Impossible event. */
538 return 0; 511 return 0;
512 case ICMP_FRAG_NEEDED:
513 /* Soft state for pmtu is maintained by IP core. */
514 return 0;
539 default: 515 default:
540 /* All others are translated to HOST_UNREACH. 516 /* All others are translated to HOST_UNREACH.
541 rfc2003 contains "deep thoughts" about NET_UNREACH, 517 rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -548,33 +524,16 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
548 if (code != ICMP_EXC_TTL) 524 if (code != ICMP_EXC_TTL)
549 return 0; 525 return 0;
550 break; 526 break;
551 case ICMP_REDIRECT:
552 break;
553 } 527 }
554 528
555 err = -ENOENT; 529 err = -ENOENT;
556 530
531 rcu_read_lock();
557 t = ipip6_tunnel_lookup(dev_net(skb->dev), 532 t = ipip6_tunnel_lookup(dev_net(skb->dev),
558 skb->dev, 533 skb->dev,
559 iph->daddr, 534 iph->daddr,
560 iph->saddr); 535 iph->saddr);
561 if (t == NULL) 536 if (t == NULL || t->parms.iph.daddr == 0)
562 goto out;
563
564 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
565 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
566 t->dev->ifindex, 0, IPPROTO_IPV6, 0);
567 err = 0;
568 goto out;
569 }
570 if (type == ICMP_REDIRECT) {
571 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
572 IPPROTO_IPV6, 0);
573 err = 0;
574 goto out;
575 }
576
577 if (t->parms.iph.daddr == 0)
578 goto out; 537 goto out;
579 538
580 err = 0; 539 err = 0;
@@ -587,20 +546,27 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
587 t->err_count = 1; 546 t->err_count = 1;
588 t->err_time = jiffies; 547 t->err_time = jiffies;
589out: 548out:
549 rcu_read_unlock();
590 return err; 550 return err;
591} 551}
592 552
553static inline void ipip6_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
554{
555 if (INET_ECN_is_ce(iph->tos))
556 IP6_ECN_set_ce(ipv6_hdr(skb));
557}
558
593static int ipip6_rcv(struct sk_buff *skb) 559static int ipip6_rcv(struct sk_buff *skb)
594{ 560{
595 const struct iphdr *iph; 561 const struct iphdr *iph;
596 struct ip_tunnel *tunnel; 562 struct ip_tunnel *tunnel;
597 int err;
598 563
599 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 564 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
600 goto out; 565 goto out;
601 566
602 iph = ip_hdr(skb); 567 iph = ip_hdr(skb);
603 568
569 rcu_read_lock();
604 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 570 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
605 iph->saddr, iph->daddr); 571 iph->saddr, iph->daddr);
606 if (tunnel != NULL) { 572 if (tunnel != NULL) {
@@ -616,33 +582,27 @@ static int ipip6_rcv(struct sk_buff *skb)
616 if ((tunnel->dev->priv_flags & IFF_ISATAP) && 582 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
617 !isatap_chksrc(skb, iph, tunnel)) { 583 !isatap_chksrc(skb, iph, tunnel)) {
618 tunnel->dev->stats.rx_errors++; 584 tunnel->dev->stats.rx_errors++;
619 goto out; 585 rcu_read_unlock();
620 } 586 kfree_skb(skb);
621 587 return 0;
622 __skb_tunnel_rx(skb, tunnel->dev);
623
624 err = IP_ECN_decapsulate(iph, skb);
625 if (unlikely(err)) {
626 if (log_ecn_error)
627 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
628 &iph->saddr, iph->tos);
629 if (err > 1) {
630 ++tunnel->dev->stats.rx_frame_errors;
631 ++tunnel->dev->stats.rx_errors;
632 goto out;
633 }
634 } 588 }
635 589
636 tstats = this_cpu_ptr(tunnel->dev->tstats); 590 tstats = this_cpu_ptr(tunnel->dev->tstats);
637 tstats->rx_packets++; 591 tstats->rx_packets++;
638 tstats->rx_bytes += skb->len; 592 tstats->rx_bytes += skb->len;
639 593
594 __skb_tunnel_rx(skb, tunnel->dev);
595
596 ipip6_ecn_decapsulate(iph, skb);
597
640 netif_rx(skb); 598 netif_rx(skb);
641 599
600 rcu_read_unlock();
642 return 0; 601 return 0;
643 } 602 }
644 603
645 /* no tunnel matched, let upstream know, ipsec may handle it */ 604 /* no tunnel matched, let upstream know, ipsec may handle it */
605 rcu_read_unlock();
646 return 1; 606 return 1;
647out: 607out:
648 kfree_skb(skb); 608 kfree_skb(skb);
@@ -696,6 +656,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
696 struct net_device *dev) 656 struct net_device *dev)
697{ 657{
698 struct ip_tunnel *tunnel = netdev_priv(dev); 658 struct ip_tunnel *tunnel = netdev_priv(dev);
659 struct pcpu_tstats *tstats;
699 const struct iphdr *tiph = &tunnel->parms.iph; 660 const struct iphdr *tiph = &tunnel->parms.iph;
700 const struct ipv6hdr *iph6 = ipv6_hdr(skb); 661 const struct ipv6hdr *iph6 = ipv6_hdr(skb);
701 u8 tos = tunnel->parms.iph.tos; 662 u8 tos = tunnel->parms.iph.tos;
@@ -719,27 +680,23 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
719 /* ISATAP (RFC4214) - must come before 6to4 */ 680 /* ISATAP (RFC4214) - must come before 6to4 */
720 if (dev->priv_flags & IFF_ISATAP) { 681 if (dev->priv_flags & IFF_ISATAP) {
721 struct neighbour *neigh = NULL; 682 struct neighbour *neigh = NULL;
722 bool do_tx_error = false;
723 683
724 if (skb_dst(skb)) 684 if (skb_dst(skb))
725 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 685 neigh = dst_get_neighbour(skb_dst(skb));
726 686
727 if (neigh == NULL) { 687 if (neigh == NULL) {
728 net_dbg_ratelimited("sit: nexthop == NULL\n"); 688 if (net_ratelimit())
689 printk(KERN_DEBUG "sit: nexthop == NULL\n");
729 goto tx_error; 690 goto tx_error;
730 } 691 }
731 692
732 addr6 = (const struct in6_addr *)&neigh->primary_key; 693 addr6 = (const struct in6_addr*)&neigh->primary_key;
733 addr_type = ipv6_addr_type(addr6); 694 addr_type = ipv6_addr_type(addr6);
734 695
735 if ((addr_type & IPV6_ADDR_UNICAST) && 696 if ((addr_type & IPV6_ADDR_UNICAST) &&
736 ipv6_addr_is_isatap(addr6)) 697 ipv6_addr_is_isatap(addr6))
737 dst = addr6->s6_addr32[3]; 698 dst = addr6->s6_addr32[3];
738 else 699 else
739 do_tx_error = true;
740
741 neigh_release(neigh);
742 if (do_tx_error)
743 goto tx_error; 700 goto tx_error;
744 } 701 }
745 702
@@ -748,17 +705,17 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
748 705
749 if (!dst) { 706 if (!dst) {
750 struct neighbour *neigh = NULL; 707 struct neighbour *neigh = NULL;
751 bool do_tx_error = false;
752 708
753 if (skb_dst(skb)) 709 if (skb_dst(skb))
754 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 710 neigh = dst_get_neighbour(skb_dst(skb));
755 711
756 if (neigh == NULL) { 712 if (neigh == NULL) {
757 net_dbg_ratelimited("sit: nexthop == NULL\n"); 713 if (net_ratelimit())
714 printk(KERN_DEBUG "sit: nexthop == NULL\n");
758 goto tx_error; 715 goto tx_error;
759 } 716 }
760 717
761 addr6 = (const struct in6_addr *)&neigh->primary_key; 718 addr6 = (const struct in6_addr*)&neigh->primary_key;
762 addr_type = ipv6_addr_type(addr6); 719 addr_type = ipv6_addr_type(addr6);
763 720
764 if (addr_type == IPV6_ADDR_ANY) { 721 if (addr_type == IPV6_ADDR_ANY) {
@@ -766,14 +723,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
766 addr_type = ipv6_addr_type(addr6); 723 addr_type = ipv6_addr_type(addr6);
767 } 724 }
768 725
769 if ((addr_type & IPV6_ADDR_COMPATv4) != 0) 726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
770 dst = addr6->s6_addr32[3]; 727 goto tx_error_icmp;
771 else
772 do_tx_error = true;
773 728
774 neigh_release(neigh); 729 dst = addr6->s6_addr32[3];
775 if (do_tx_error)
776 goto tx_error;
777 } 730 }
778 731
779 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 732 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
@@ -813,7 +766,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
813 } 766 }
814 767
815 if (tunnel->parms.iph.daddr && skb_dst(skb)) 768 if (tunnel->parms.iph.daddr && skb_dst(skb))
816 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 769 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
817 770
818 if (skb->len > mtu) { 771 if (skb->len > mtu) {
819 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 772 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -876,7 +829,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
876 if ((iph->ttl = tiph->ttl) == 0) 829 if ((iph->ttl = tiph->ttl) == 0)
877 iph->ttl = iph6->hop_limit; 830 iph->ttl = iph6->hop_limit;
878 831
879 iptunnel_xmit(skb, dev); 832 nf_reset(skb);
833 tstats = this_cpu_ptr(dev->tstats);
834 __IPTUNNEL_XMIT(tstats, &dev->stats);
880 return NETDEV_TX_OK; 835 return NETDEV_TX_OK;
881 836
882tx_error_icmp: 837tx_error_icmp:
@@ -924,59 +879,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
924 dev->iflink = tunnel->parms.link; 879 dev->iflink = tunnel->parms.link;
925} 880}
926 881
927static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
928{
929 struct net *net = dev_net(t->dev);
930 struct sit_net *sitn = net_generic(net, sit_net_id);
931
932 ipip6_tunnel_unlink(sitn, t);
933 synchronize_net();
934 t->parms.iph.saddr = p->iph.saddr;
935 t->parms.iph.daddr = p->iph.daddr;
936 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
937 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
938 ipip6_tunnel_link(sitn, t);
939 t->parms.iph.ttl = p->iph.ttl;
940 t->parms.iph.tos = p->iph.tos;
941 if (t->parms.link != p->link) {
942 t->parms.link = p->link;
943 ipip6_tunnel_bind_dev(t->dev);
944 }
945 netdev_state_change(t->dev);
946}
947
948#ifdef CONFIG_IPV6_SIT_6RD
949static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
950 struct ip_tunnel_6rd *ip6rd)
951{
952 struct in6_addr prefix;
953 __be32 relay_prefix;
954
955 if (ip6rd->relay_prefixlen > 32 ||
956 ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
957 return -EINVAL;
958
959 ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
960 if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
961 return -EINVAL;
962 if (ip6rd->relay_prefixlen)
963 relay_prefix = ip6rd->relay_prefix &
964 htonl(0xffffffffUL <<
965 (32 - ip6rd->relay_prefixlen));
966 else
967 relay_prefix = 0;
968 if (relay_prefix != ip6rd->relay_prefix)
969 return -EINVAL;
970
971 t->ip6rd.prefix = prefix;
972 t->ip6rd.relay_prefix = relay_prefix;
973 t->ip6rd.prefixlen = ip6rd->prefixlen;
974 t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
975 netdev_state_change(t->dev);
976 return 0;
977}
978#endif
979
980static int 882static int
981ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 883ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
982{ 884{
@@ -1014,7 +916,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1014 goto done; 916 goto done;
1015#ifdef CONFIG_IPV6_SIT_6RD 917#ifdef CONFIG_IPV6_SIT_6RD
1016 } else { 918 } else {
1017 ip6rd.prefix = t->ip6rd.prefix; 919 ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
1018 ip6rd.relay_prefix = t->ip6rd.relay_prefix; 920 ip6rd.relay_prefix = t->ip6rd.relay_prefix;
1019 ip6rd.prefixlen = t->ip6rd.prefixlen; 921 ip6rd.prefixlen = t->ip6rd.prefixlen;
1020 ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; 922 ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
@@ -1029,7 +931,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1029 case SIOCADDTUNNEL: 931 case SIOCADDTUNNEL:
1030 case SIOCCHGTUNNEL: 932 case SIOCCHGTUNNEL:
1031 err = -EPERM; 933 err = -EPERM;
1032 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 934 if (!capable(CAP_NET_ADMIN))
1033 goto done; 935 goto done;
1034 936
1035 err = -EFAULT; 937 err = -EFAULT;
@@ -1058,13 +960,28 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1058 break; 960 break;
1059 } 961 }
1060 t = netdev_priv(dev); 962 t = netdev_priv(dev);
963 ipip6_tunnel_unlink(sitn, t);
964 synchronize_net();
965 t->parms.iph.saddr = p.iph.saddr;
966 t->parms.iph.daddr = p.iph.daddr;
967 memcpy(dev->dev_addr, &p.iph.saddr, 4);
968 memcpy(dev->broadcast, &p.iph.daddr, 4);
969 ipip6_tunnel_link(sitn, t);
970 netdev_state_change(dev);
1061 } 971 }
1062
1063 ipip6_tunnel_update(t, &p);
1064 } 972 }
1065 973
1066 if (t) { 974 if (t) {
1067 err = 0; 975 err = 0;
976 if (cmd == SIOCCHGTUNNEL) {
977 t->parms.iph.ttl = p.iph.ttl;
978 t->parms.iph.tos = p.iph.tos;
979 if (t->parms.link != p.link) {
980 t->parms.link = p.link;
981 ipip6_tunnel_bind_dev(dev);
982 netdev_state_change(dev);
983 }
984 }
1068 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 985 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1069 err = -EFAULT; 986 err = -EFAULT;
1070 } else 987 } else
@@ -1073,7 +990,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1073 990
1074 case SIOCDELTUNNEL: 991 case SIOCDELTUNNEL:
1075 err = -EPERM; 992 err = -EPERM;
1076 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 993 if (!capable(CAP_NET_ADMIN))
1077 goto done; 994 goto done;
1078 995
1079 if (dev == sitn->fb_tunnel_dev) { 996 if (dev == sitn->fb_tunnel_dev) {
@@ -1106,7 +1023,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1106 case SIOCDELPRL: 1023 case SIOCDELPRL:
1107 case SIOCCHGPRL: 1024 case SIOCCHGPRL:
1108 err = -EPERM; 1025 err = -EPERM;
1109 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1026 if (!capable(CAP_NET_ADMIN))
1110 goto done; 1027 goto done;
1111 err = -EINVAL; 1028 err = -EINVAL;
1112 if (dev == sitn->fb_tunnel_dev) 1029 if (dev == sitn->fb_tunnel_dev)
@@ -1135,7 +1052,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1135 case SIOCCHG6RD: 1052 case SIOCCHG6RD:
1136 case SIOCDEL6RD: 1053 case SIOCDEL6RD:
1137 err = -EPERM; 1054 err = -EPERM;
1138 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1055 if (!capable(CAP_NET_ADMIN))
1139 goto done; 1056 goto done;
1140 1057
1141 err = -EFAULT; 1058 err = -EFAULT;
@@ -1146,9 +1063,31 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1146 t = netdev_priv(dev); 1063 t = netdev_priv(dev);
1147 1064
1148 if (cmd != SIOCDEL6RD) { 1065 if (cmd != SIOCDEL6RD) {
1149 err = ipip6_tunnel_update_6rd(t, &ip6rd); 1066 struct in6_addr prefix;
1150 if (err < 0) 1067 __be32 relay_prefix;
1068
1069 err = -EINVAL;
1070 if (ip6rd.relay_prefixlen > 32 ||
1071 ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64)
1072 goto done;
1073
1074 ipv6_addr_prefix(&prefix, &ip6rd.prefix,
1075 ip6rd.prefixlen);
1076 if (!ipv6_addr_equal(&prefix, &ip6rd.prefix))
1151 goto done; 1077 goto done;
1078 if (ip6rd.relay_prefixlen)
1079 relay_prefix = ip6rd.relay_prefix &
1080 htonl(0xffffffffUL <<
1081 (32 - ip6rd.relay_prefixlen));
1082 else
1083 relay_prefix = 0;
1084 if (relay_prefix != ip6rd.relay_prefix)
1085 goto done;
1086
1087 ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
1088 t->ip6rd.relay_prefix = relay_prefix;
1089 t->ip6rd.prefixlen = ip6rd.prefixlen;
1090 t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
1152 } else 1091 } else
1153 ipip6_tunnel_clone_6rd(dev, sitn); 1092 ipip6_tunnel_clone_6rd(dev, sitn);
1154 1093
@@ -1177,7 +1116,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
1177 .ndo_start_xmit = ipip6_tunnel_xmit, 1116 .ndo_start_xmit = ipip6_tunnel_xmit,
1178 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1117 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1179 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1118 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1180 .ndo_get_stats64= ipip6_get_stats64, 1119 .ndo_get_stats = ipip6_get_stats,
1181}; 1120};
1182 1121
1183static void ipip6_dev_free(struct net_device *dev) 1122static void ipip6_dev_free(struct net_device *dev)
@@ -1242,239 +1181,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1242 return 0; 1181 return 0;
1243} 1182}
1244 1183
1245static void ipip6_netlink_parms(struct nlattr *data[],
1246 struct ip_tunnel_parm *parms)
1247{
1248 memset(parms, 0, sizeof(*parms));
1249
1250 parms->iph.version = 4;
1251 parms->iph.protocol = IPPROTO_IPV6;
1252 parms->iph.ihl = 5;
1253 parms->iph.ttl = 64;
1254
1255 if (!data)
1256 return;
1257
1258 if (data[IFLA_IPTUN_LINK])
1259 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1260
1261 if (data[IFLA_IPTUN_LOCAL])
1262 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
1263
1264 if (data[IFLA_IPTUN_REMOTE])
1265 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
1266
1267 if (data[IFLA_IPTUN_TTL]) {
1268 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
1269 if (parms->iph.ttl)
1270 parms->iph.frag_off = htons(IP_DF);
1271 }
1272
1273 if (data[IFLA_IPTUN_TOS])
1274 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
1275
1276 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
1277 parms->iph.frag_off = htons(IP_DF);
1278
1279 if (data[IFLA_IPTUN_FLAGS])
1280 parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
1281}
1282
1283#ifdef CONFIG_IPV6_SIT_6RD
1284/* This function returns true when 6RD attributes are present in the nl msg */
1285static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
1286 struct ip_tunnel_6rd *ip6rd)
1287{
1288 bool ret = false;
1289 memset(ip6rd, 0, sizeof(*ip6rd));
1290
1291 if (!data)
1292 return ret;
1293
1294 if (data[IFLA_IPTUN_6RD_PREFIX]) {
1295 ret = true;
1296 nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX],
1297 sizeof(struct in6_addr));
1298 }
1299
1300 if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
1301 ret = true;
1302 ip6rd->relay_prefix =
1303 nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
1304 }
1305
1306 if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
1307 ret = true;
1308 ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
1309 }
1310
1311 if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
1312 ret = true;
1313 ip6rd->relay_prefixlen =
1314 nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
1315 }
1316
1317 return ret;
1318}
1319#endif
1320
1321static int ipip6_newlink(struct net *src_net, struct net_device *dev,
1322 struct nlattr *tb[], struct nlattr *data[])
1323{
1324 struct net *net = dev_net(dev);
1325 struct ip_tunnel *nt;
1326#ifdef CONFIG_IPV6_SIT_6RD
1327 struct ip_tunnel_6rd ip6rd;
1328#endif
1329 int err;
1330
1331 nt = netdev_priv(dev);
1332 ipip6_netlink_parms(data, &nt->parms);
1333
1334 if (ipip6_tunnel_locate(net, &nt->parms, 0))
1335 return -EEXIST;
1336
1337 err = ipip6_tunnel_create(dev);
1338 if (err < 0)
1339 return err;
1340
1341#ifdef CONFIG_IPV6_SIT_6RD
1342 if (ipip6_netlink_6rd_parms(data, &ip6rd))
1343 err = ipip6_tunnel_update_6rd(nt, &ip6rd);
1344#endif
1345
1346 return err;
1347}
1348
1349static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
1350 struct nlattr *data[])
1351{
1352 struct ip_tunnel *t;
1353 struct ip_tunnel_parm p;
1354 struct net *net = dev_net(dev);
1355 struct sit_net *sitn = net_generic(net, sit_net_id);
1356#ifdef CONFIG_IPV6_SIT_6RD
1357 struct ip_tunnel_6rd ip6rd;
1358#endif
1359
1360 if (dev == sitn->fb_tunnel_dev)
1361 return -EINVAL;
1362
1363 ipip6_netlink_parms(data, &p);
1364
1365 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
1366 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
1367 return -EINVAL;
1368
1369 t = ipip6_tunnel_locate(net, &p, 0);
1370
1371 if (t) {
1372 if (t->dev != dev)
1373 return -EEXIST;
1374 } else
1375 t = netdev_priv(dev);
1376
1377 ipip6_tunnel_update(t, &p);
1378
1379#ifdef CONFIG_IPV6_SIT_6RD
1380 if (ipip6_netlink_6rd_parms(data, &ip6rd))
1381 return ipip6_tunnel_update_6rd(t, &ip6rd);
1382#endif
1383
1384 return 0;
1385}
1386
1387static size_t ipip6_get_size(const struct net_device *dev)
1388{
1389 return
1390 /* IFLA_IPTUN_LINK */
1391 nla_total_size(4) +
1392 /* IFLA_IPTUN_LOCAL */
1393 nla_total_size(4) +
1394 /* IFLA_IPTUN_REMOTE */
1395 nla_total_size(4) +
1396 /* IFLA_IPTUN_TTL */
1397 nla_total_size(1) +
1398 /* IFLA_IPTUN_TOS */
1399 nla_total_size(1) +
1400 /* IFLA_IPTUN_PMTUDISC */
1401 nla_total_size(1) +
1402 /* IFLA_IPTUN_FLAGS */
1403 nla_total_size(2) +
1404#ifdef CONFIG_IPV6_SIT_6RD
1405 /* IFLA_IPTUN_6RD_PREFIX */
1406 nla_total_size(sizeof(struct in6_addr)) +
1407 /* IFLA_IPTUN_6RD_RELAY_PREFIX */
1408 nla_total_size(4) +
1409 /* IFLA_IPTUN_6RD_PREFIXLEN */
1410 nla_total_size(2) +
1411 /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
1412 nla_total_size(2) +
1413#endif
1414 0;
1415}
1416
1417static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
1418{
1419 struct ip_tunnel *tunnel = netdev_priv(dev);
1420 struct ip_tunnel_parm *parm = &tunnel->parms;
1421
1422 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1423 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
1424 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
1425 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
1426 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1427 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
1428 !!(parm->iph.frag_off & htons(IP_DF))) ||
1429 nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags))
1430 goto nla_put_failure;
1431
1432#ifdef CONFIG_IPV6_SIT_6RD
1433 if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr),
1434 &tunnel->ip6rd.prefix) ||
1435 nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
1436 tunnel->ip6rd.relay_prefix) ||
1437 nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
1438 tunnel->ip6rd.prefixlen) ||
1439 nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
1440 tunnel->ip6rd.relay_prefixlen))
1441 goto nla_put_failure;
1442#endif
1443
1444 return 0;
1445
1446nla_put_failure:
1447 return -EMSGSIZE;
1448}
1449
1450static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
1451 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
1452 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
1453 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
1454 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
1455 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
1456 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
1457 [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 },
1458#ifdef CONFIG_IPV6_SIT_6RD
1459 [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) },
1460 [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 },
1461 [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
1462 [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
1463#endif
1464};
1465
1466static struct rtnl_link_ops sit_link_ops __read_mostly = {
1467 .kind = "sit",
1468 .maxtype = IFLA_IPTUN_MAX,
1469 .policy = ipip6_policy,
1470 .priv_size = sizeof(struct ip_tunnel),
1471 .setup = ipip6_tunnel_setup,
1472 .newlink = ipip6_newlink,
1473 .changelink = ipip6_changelink,
1474 .get_size = ipip6_get_size,
1475 .fill_info = ipip6_fill_info,
1476};
1477
1478static struct xfrm_tunnel sit_handler __read_mostly = { 1184static struct xfrm_tunnel sit_handler __read_mostly = {
1479 .handler = ipip6_rcv, 1185 .handler = ipip6_rcv,
1480 .err_handler = ipip6_err, 1186 .err_handler = ipip6_err,
@@ -1561,7 +1267,6 @@ static struct pernet_operations sit_net_ops = {
1561 1267
1562static void __exit sit_cleanup(void) 1268static void __exit sit_cleanup(void)
1563{ 1269{
1564 rtnl_link_unregister(&sit_link_ops);
1565 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1270 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1566 1271
1567 unregister_pernet_device(&sit_net_ops); 1272 unregister_pernet_device(&sit_net_ops);
@@ -1572,28 +1277,17 @@ static int __init sit_init(void)
1572{ 1277{
1573 int err; 1278 int err;
1574 1279
1575 pr_info("IPv6 over IPv4 tunneling driver\n"); 1280 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
1576 1281
1577 err = register_pernet_device(&sit_net_ops); 1282 err = register_pernet_device(&sit_net_ops);
1578 if (err < 0) 1283 if (err < 0)
1579 return err; 1284 return err;
1580 err = xfrm4_tunnel_register(&sit_handler, AF_INET6); 1285 err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1581 if (err < 0) { 1286 if (err < 0) {
1582 pr_info("%s: can't add protocol\n", __func__); 1287 unregister_pernet_device(&sit_net_ops);
1583 goto xfrm_tunnel_failed; 1288 printk(KERN_INFO "sit init: Can't add protocol\n");
1584 } 1289 }
1585 err = rtnl_link_register(&sit_link_ops);
1586 if (err < 0)
1587 goto rtnl_link_failed;
1588
1589out:
1590 return err; 1290 return err;
1591
1592rtnl_link_failed:
1593 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1594xfrm_tunnel_failed:
1595 unregister_pernet_device(&sit_net_ops);
1596 goto out;
1597} 1291}
1598 1292
1599module_init(sit_init); 1293module_init(sit_init);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 40161977f7c..ac838965ff3 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -21,6 +21,9 @@
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/tcp.h> 22#include <net/tcp.h>
23 23
24extern int sysctl_tcp_syncookies;
25extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
26
24#define COOKIEBITS 24 /* Upper bits store count */ 27#define COOKIEBITS 24 /* Upper bits store count */
25#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) 28#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
26 29
@@ -112,7 +115,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
112 & COOKIEMASK; 115 & COOKIEMASK;
113} 116}
114 117
115__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) 118__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
116{ 119{
117 const struct ipv6hdr *iph = ipv6_hdr(skb); 120 const struct ipv6hdr *iph = ipv6_hdr(skb);
118 const struct tcphdr *th = tcp_hdr(skb); 121 const struct tcphdr *th = tcp_hdr(skb);
@@ -134,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16
134 jiffies / (HZ * 60), mssind); 137 jiffies / (HZ * 60), mssind);
135} 138}
136 139
137static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) 140static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
138{ 141{
139 const struct ipv6hdr *iph = ipv6_hdr(skb); 142 const struct ipv6hdr *iph = ipv6_hdr(skb);
140 const struct tcphdr *th = tcp_hdr(skb); 143 const struct tcphdr *th = tcp_hdr(skb);
@@ -149,7 +152,7 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 152struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
150{ 153{
151 struct tcp_options_received tcp_opt; 154 struct tcp_options_received tcp_opt;
152 const u8 *hash_location; 155 u8 *hash_location;
153 struct inet_request_sock *ireq; 156 struct inet_request_sock *ireq;
154 struct inet6_request_sock *ireq6; 157 struct inet6_request_sock *ireq6;
155 struct tcp_request_sock *treq; 158 struct tcp_request_sock *treq;
@@ -177,7 +180,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
177 180
178 /* check for timestamp cookie support */ 181 /* check for timestamp cookie support */
179 memset(&tcp_opt, 0, sizeof(tcp_opt)); 182 memset(&tcp_opt, 0, sizeof(tcp_opt));
180 tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 183 tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
181 184
182 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) 185 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
183 goto out; 186 goto out;
@@ -190,7 +193,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
190 ireq = inet_rsk(req); 193 ireq = inet_rsk(req);
191 ireq6 = inet6_rsk(req); 194 ireq6 = inet6_rsk(req);
192 treq = tcp_rsk(req); 195 treq = tcp_rsk(req);
193 treq->listener = NULL;
194 196
195 if (security_inet_conn_request(sk, skb, req)) 197 if (security_inet_conn_request(sk, skb, req))
196 goto out_free; 198 goto out_free;
@@ -198,8 +200,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
198 req->mss = mss; 200 req->mss = mss;
199 ireq->rmt_port = th->source; 201 ireq->rmt_port = th->source;
200 ireq->loc_port = th->dest; 202 ireq->loc_port = th->dest;
201 ireq6->rmt_addr = ipv6_hdr(skb)->saddr; 203 ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
202 ireq6->loc_addr = ipv6_hdr(skb)->daddr; 204 ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
203 if (ipv6_opt_accepted(sk, skb) || 205 if (ipv6_opt_accepted(sk, skb) ||
204 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 206 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
205 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { 207 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -214,7 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
214 ireq6->iif = inet6_iif(skb); 216 ireq6->iif = inet6_iif(skb);
215 217
216 req->expires = 0UL; 218 req->expires = 0UL;
217 req->num_retrans = 0; 219 req->retrans = 0;
218 ireq->ecn_ok = ecn_ok; 220 ireq->ecn_ok = ecn_ok;
219 ireq->snd_wscale = tcp_opt.snd_wscale; 221 ireq->snd_wscale = tcp_opt.snd_wscale;
220 ireq->sack_ok = tcp_opt.sack_ok; 222 ireq->sack_ok = tcp_opt.sack_ok;
@@ -235,9 +237,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
235 struct flowi6 fl6; 237 struct flowi6 fl6;
236 memset(&fl6, 0, sizeof(fl6)); 238 memset(&fl6, 0, sizeof(fl6));
237 fl6.flowi6_proto = IPPROTO_TCP; 239 fl6.flowi6_proto = IPPROTO_TCP;
238 fl6.daddr = ireq6->rmt_addr; 240 ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
239 final_p = fl6_update_dst(&fl6, np->opt, &final); 241 final_p = fl6_update_dst(&fl6, np->opt, &final);
240 fl6.saddr = ireq6->loc_addr; 242 ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
241 fl6.flowi6_oif = sk->sk_bound_dev_if; 243 fl6.flowi6_oif = sk->sk_bound_dev_if;
242 fl6.flowi6_mark = sk->sk_mark; 244 fl6.flowi6_mark = sk->sk_mark;
243 fl6.fl6_dport = inet_rsk(req)->rmt_port; 245 fl6.fl6_dport = inet_rsk(req)->rmt_port;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e85c48bd404..6dcf5e7d661 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -10,14 +10,37 @@
10#include <linux/in6.h> 10#include <linux/in6.h>
11#include <linux/ipv6.h> 11#include <linux/ipv6.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/export.h>
14#include <net/ndisc.h> 13#include <net/ndisc.h>
15#include <net/ipv6.h> 14#include <net/ipv6.h>
16#include <net/addrconf.h> 15#include <net/addrconf.h>
17#include <net/inet_frag.h> 16#include <net/inet_frag.h>
18 17
18static struct ctl_table empty[1];
19
20static ctl_table ipv6_static_skeleton[] = {
21 {
22 .procname = "neigh",
23 .maxlen = 0,
24 .mode = 0555,
25 .child = empty,
26 },
27 { }
28};
29
19static ctl_table ipv6_table_template[] = { 30static ctl_table ipv6_table_template[] = {
20 { 31 {
32 .procname = "route",
33 .maxlen = 0,
34 .mode = 0555,
35 .child = ipv6_route_table_template
36 },
37 {
38 .procname = "icmp",
39 .maxlen = 0,
40 .mode = 0555,
41 .child = ipv6_icmp_table_template
42 },
43 {
21 .procname = "bindv6only", 44 .procname = "bindv6only",
22 .data = &init_net.ipv6.sysctl.bindv6only, 45 .data = &init_net.ipv6.sysctl.bindv6only,
23 .maxlen = sizeof(int), 46 .maxlen = sizeof(int),
@@ -38,6 +61,13 @@ static ctl_table ipv6_rotable[] = {
38 { } 61 { }
39}; 62};
40 63
64struct ctl_path net_ipv6_ctl_path[] = {
65 { .procname = "net", },
66 { .procname = "ipv6", },
67 { },
68};
69EXPORT_SYMBOL_GPL(net_ipv6_ctl_path);
70
41static int __net_init ipv6_sysctl_net_init(struct net *net) 71static int __net_init ipv6_sysctl_net_init(struct net *net)
42{ 72{
43 struct ctl_table *ipv6_table; 73 struct ctl_table *ipv6_table;
@@ -50,37 +80,28 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
50 GFP_KERNEL); 80 GFP_KERNEL);
51 if (!ipv6_table) 81 if (!ipv6_table)
52 goto out; 82 goto out;
53 ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
54 83
55 ipv6_route_table = ipv6_route_sysctl_init(net); 84 ipv6_route_table = ipv6_route_sysctl_init(net);
56 if (!ipv6_route_table) 85 if (!ipv6_route_table)
57 goto out_ipv6_table; 86 goto out_ipv6_table;
87 ipv6_table[0].child = ipv6_route_table;
58 88
59 ipv6_icmp_table = ipv6_icmp_sysctl_init(net); 89 ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
60 if (!ipv6_icmp_table) 90 if (!ipv6_icmp_table)
61 goto out_ipv6_route_table; 91 goto out_ipv6_route_table;
92 ipv6_table[1].child = ipv6_icmp_table;
62 93
63 net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table); 94 ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
64 if (!net->ipv6.sysctl.hdr)
65 goto out_ipv6_icmp_table;
66
67 net->ipv6.sysctl.route_hdr =
68 register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
69 if (!net->ipv6.sysctl.route_hdr)
70 goto out_unregister_ipv6_table;
71 95
72 net->ipv6.sysctl.icmp_hdr = 96 net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
73 register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table); 97 ipv6_table);
74 if (!net->ipv6.sysctl.icmp_hdr) 98 if (!net->ipv6.sysctl.table)
75 goto out_unregister_route_table; 99 goto out_ipv6_icmp_table;
76 100
77 err = 0; 101 err = 0;
78out: 102out:
79 return err; 103 return err;
80out_unregister_route_table: 104
81 unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
82out_unregister_ipv6_table:
83 unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
84out_ipv6_icmp_table: 105out_ipv6_icmp_table:
85 kfree(ipv6_icmp_table); 106 kfree(ipv6_icmp_table);
86out_ipv6_route_table: 107out_ipv6_route_table:
@@ -96,13 +117,11 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
96 struct ctl_table *ipv6_route_table; 117 struct ctl_table *ipv6_route_table;
97 struct ctl_table *ipv6_icmp_table; 118 struct ctl_table *ipv6_icmp_table;
98 119
99 ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg; 120 ipv6_table = net->ipv6.sysctl.table->ctl_table_arg;
100 ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg; 121 ipv6_route_table = ipv6_table[0].child;
101 ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg; 122 ipv6_icmp_table = ipv6_table[1].child;
102 123
103 unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr); 124 unregister_net_sysctl_table(net->ipv6.sysctl.table);
104 unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
105 unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
106 125
107 kfree(ipv6_table); 126 kfree(ipv6_table);
108 kfree(ipv6_route_table); 127 kfree(ipv6_route_table);
@@ -120,7 +139,7 @@ int ipv6_sysctl_register(void)
120{ 139{
121 int err = -ENOMEM; 140 int err = -ENOMEM;
122 141
123 ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable); 142 ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_rotable);
124 if (ip6_header == NULL) 143 if (ip6_header == NULL)
125 goto out; 144 goto out;
126 145
@@ -140,3 +159,18 @@ void ipv6_sysctl_unregister(void)
140 unregister_net_sysctl_table(ip6_header); 159 unregister_net_sysctl_table(ip6_header);
141 unregister_pernet_subsys(&ipv6_sysctl_net_ops); 160 unregister_pernet_subsys(&ipv6_sysctl_net_ops);
142} 161}
162
163static struct ctl_table_header *ip6_base;
164
165int ipv6_static_sysctl_register(void)
166{
167 ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
168 if (ip6_base == NULL)
169 return -ENOMEM;
170 return 0;
171}
172
173void ipv6_static_sysctl_unregister(void)
174{
175 unregister_net_sysctl_table(ip6_base);
176}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93825dd3a7c..cdbce216521 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -62,7 +62,6 @@
62#include <net/netdma.h> 62#include <net/netdma.h>
63#include <net/inet_common.h> 63#include <net/inet_common.h>
64#include <net/secure_seq.h> 64#include <net/secure_seq.h>
65#include <net/tcp_memcontrol.h>
66 65
67#include <asm/uaccess.h> 66#include <asm/uaccess.h>
68 67
@@ -77,6 +76,9 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req); 76 struct request_sock *req);
78 77
79static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 78static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
79static void __tcp_v6_send_check(struct sk_buff *skb,
80 const struct in6_addr *saddr,
81 const struct in6_addr *daddr);
80 82
81static const struct inet_connection_sock_af_ops ipv6_mapped; 83static const struct inet_connection_sock_af_ops ipv6_mapped;
82static const struct inet_connection_sock_af_ops ipv6_specific; 84static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -91,18 +93,6 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91} 93}
92#endif 94#endif
93 95
94static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95{
96 struct dst_entry *dst = skb_dst(skb);
97 const struct rt6_info *rt = (const struct rt6_info *)dst;
98
99 dst_hold(dst);
100 sk->sk_rx_dst = dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 if (rt->rt6i_node)
103 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
104}
105
106static void tcp_v6_hash(struct sock *sk) 96static void tcp_v6_hash(struct sock *sk)
107{ 97{
108 if (sk->sk_state != TCP_CLOSE) { 98 if (sk->sk_state != TCP_CLOSE) {
@@ -116,7 +106,15 @@ static void tcp_v6_hash(struct sock *sk)
116 } 106 }
117} 107}
118 108
119static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) 109static __inline__ __sum16 tcp_v6_check(int len,
110 const struct in6_addr *saddr,
111 const struct in6_addr *daddr,
112 __wsum base)
113{
114 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
115}
116
117static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
120{ 118{
121 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, 119 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
122 ipv6_hdr(skb)->saddr.s6_addr32, 120 ipv6_hdr(skb)->saddr.s6_addr32,
@@ -155,7 +153,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
155 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 153 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
156 if (flowlabel == NULL) 154 if (flowlabel == NULL)
157 return -EINVAL; 155 return -EINVAL;
158 usin->sin6_addr = flowlabel->dst; 156 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
159 fl6_sock_release(flowlabel); 157 fl6_sock_release(flowlabel);
160 } 158 }
161 } 159 }
@@ -197,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
197 tp->write_seq = 0; 195 tp->write_seq = 0;
198 } 196 }
199 197
200 np->daddr = usin->sin6_addr; 198 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
201 np->flow_label = fl6.flowlabel; 199 np->flow_label = fl6.flowlabel;
202 200
203 /* 201 /*
@@ -246,8 +244,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
246 saddr = &np->rcv_saddr; 244 saddr = &np->rcv_saddr;
247 245
248 fl6.flowi6_proto = IPPROTO_TCP; 246 fl6.flowi6_proto = IPPROTO_TCP;
249 fl6.daddr = np->daddr; 247 ipv6_addr_copy(&fl6.daddr, &np->daddr);
250 fl6.saddr = saddr ? *saddr : np->saddr; 248 ipv6_addr_copy(&fl6.saddr,
249 (saddr ? saddr : &np->saddr));
251 fl6.flowi6_oif = sk->sk_bound_dev_if; 250 fl6.flowi6_oif = sk->sk_bound_dev_if;
252 fl6.flowi6_mark = sk->sk_mark; 251 fl6.flowi6_mark = sk->sk_mark;
253 fl6.fl6_dport = usin->sin6_port; 252 fl6.fl6_dport = usin->sin6_port;
@@ -265,11 +264,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
265 264
266 if (saddr == NULL) { 265 if (saddr == NULL) {
267 saddr = &fl6.saddr; 266 saddr = &fl6.saddr;
268 np->rcv_saddr = *saddr; 267 ipv6_addr_copy(&np->rcv_saddr, saddr);
269 } 268 }
270 269
271 /* set the source address */ 270 /* set the source address */
272 np->saddr = *saddr; 271 ipv6_addr_copy(&np->saddr, saddr);
273 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 272 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
274 273
275 sk->sk_gso_type = SKB_GSO_TCPV6; 274 sk->sk_gso_type = SKB_GSO_TCPV6;
@@ -278,8 +277,22 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
278 rt = (struct rt6_info *) dst; 277 rt = (struct rt6_info *) dst;
279 if (tcp_death_row.sysctl_tw_recycle && 278 if (tcp_death_row.sysctl_tw_recycle &&
280 !tp->rx_opt.ts_recent_stamp && 279 !tp->rx_opt.ts_recent_stamp &&
281 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) 280 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
282 tcp_fetch_timewait_stamp(sk, dst); 281 struct inet_peer *peer = rt6_get_peer(rt);
282 /*
283 * VJ's idea. We save last timestamp seen from
284 * the destination in peer table, when entering state
285 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
286 * when trying new connection.
287 */
288 if (peer) {
289 inet_peer_refcheck(peer);
290 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
291 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
292 tp->rx_opt.ts_recent = peer->tcp_ts;
293 }
294 }
295 }
283 296
284 icsk->icsk_ext_hdr_len = 0; 297 icsk->icsk_ext_hdr_len = 0;
285 if (np->opt) 298 if (np->opt)
@@ -295,7 +308,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
295 if (err) 308 if (err)
296 goto late_failure; 309 goto late_failure;
297 310
298 if (!tp->write_seq && likely(!tp->repair)) 311 if (!tp->write_seq)
299 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, 312 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
300 np->daddr.s6_addr32, 313 np->daddr.s6_addr32,
301 inet->inet_sport, 314 inet->inet_sport,
@@ -316,23 +329,6 @@ failure:
316 return err; 329 return err;
317} 330}
318 331
319static void tcp_v6_mtu_reduced(struct sock *sk)
320{
321 struct dst_entry *dst;
322
323 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
324 return;
325
326 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
327 if (!dst)
328 return;
329
330 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
331 tcp_sync_mss(sk, dst_mtu(dst));
332 tcp_simple_retransmit(sk);
333 }
334}
335
336static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 332static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
337 u8 type, u8 code, int offset, __be32 info) 333 u8 type, u8 code, int offset, __be32 info)
338{ 334{
@@ -360,7 +356,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
360 } 356 }
361 357
362 bh_lock_sock(sk); 358 bh_lock_sock(sk);
363 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 359 if (sock_owned_by_user(sk))
364 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 360 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
365 361
366 if (sk->sk_state == TCP_CLOSE) 362 if (sk->sk_state == TCP_CLOSE)
@@ -381,20 +377,49 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
381 377
382 np = inet6_sk(sk); 378 np = inet6_sk(sk);
383 379
384 if (type == NDISC_REDIRECT) { 380 if (type == ICMPV6_PKT_TOOBIG) {
385 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 381 struct dst_entry *dst;
386 382
387 if (dst) 383 if (sock_owned_by_user(sk))
388 dst->ops->redirect(dst, sk, skb); 384 goto out;
389 } 385 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
386 goto out;
390 387
391 if (type == ICMPV6_PKT_TOOBIG) { 388 /* icmp should have updated the destination cache entry */
392 tp->mtu_info = ntohl(info); 389 dst = __sk_dst_check(sk, np->dst_cookie);
393 if (!sock_owned_by_user(sk)) 390
394 tcp_v6_mtu_reduced(sk); 391 if (dst == NULL) {
395 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 392 struct inet_sock *inet = inet_sk(sk);
396 &tp->tsq_flags)) 393 struct flowi6 fl6;
397 sock_hold(sk); 394
395 /* BUGGG_FUTURE: Again, it is not clear how
396 to handle rthdr case. Ignore this complexity
397 for now.
398 */
399 memset(&fl6, 0, sizeof(fl6));
400 fl6.flowi6_proto = IPPROTO_TCP;
401 ipv6_addr_copy(&fl6.daddr, &np->daddr);
402 ipv6_addr_copy(&fl6.saddr, &np->saddr);
403 fl6.flowi6_oif = sk->sk_bound_dev_if;
404 fl6.flowi6_mark = sk->sk_mark;
405 fl6.fl6_dport = inet->inet_dport;
406 fl6.fl6_sport = inet->inet_sport;
407 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
408
409 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
410 if (IS_ERR(dst)) {
411 sk->sk_err_soft = -PTR_ERR(dst);
412 goto out;
413 }
414
415 } else
416 dst_hold(dst);
417
418 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
419 tcp_sync_mss(sk, dst_mtu(dst));
420 tcp_simple_retransmit(sk);
421 } /* else let the usual retransmit timer handle it */
422 dst_release(dst);
398 goto out; 423 goto out;
399 } 424 }
400 425
@@ -450,46 +475,60 @@ out:
450} 475}
451 476
452 477
453static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, 478static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
454 struct flowi6 *fl6, 479 struct request_values *rvp)
455 struct request_sock *req,
456 struct request_values *rvp,
457 u16 queue_mapping)
458{ 480{
459 struct inet6_request_sock *treq = inet6_rsk(req); 481 struct inet6_request_sock *treq = inet6_rsk(req);
460 struct ipv6_pinfo *np = inet6_sk(sk); 482 struct ipv6_pinfo *np = inet6_sk(sk);
461 struct sk_buff * skb; 483 struct sk_buff * skb;
462 int err = -ENOMEM; 484 struct ipv6_txoptions *opt = NULL;
485 struct in6_addr * final_p, final;
486 struct flowi6 fl6;
487 struct dst_entry *dst;
488 int err;
463 489
464 /* First, grab a route. */ 490 memset(&fl6, 0, sizeof(fl6));
465 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) 491 fl6.flowi6_proto = IPPROTO_TCP;
466 goto done; 492 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
493 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
494 fl6.flowlabel = 0;
495 fl6.flowi6_oif = treq->iif;
496 fl6.flowi6_mark = sk->sk_mark;
497 fl6.fl6_dport = inet_rsk(req)->rmt_port;
498 fl6.fl6_sport = inet_rsk(req)->loc_port;
499 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
467 500
468 skb = tcp_make_synack(sk, dst, req, rvp, NULL); 501 opt = np->opt;
502 final_p = fl6_update_dst(&fl6, opt, &final);
469 503
504 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
505 if (IS_ERR(dst)) {
506 err = PTR_ERR(dst);
507 dst = NULL;
508 goto done;
509 }
510 skb = tcp_make_synack(sk, dst, req, rvp);
511 err = -ENOMEM;
470 if (skb) { 512 if (skb) {
471 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 513 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
472 514
473 fl6->daddr = treq->rmt_addr; 515 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
474 skb_set_queue_mapping(skb, queue_mapping); 516 err = ip6_xmit(sk, skb, &fl6, opt);
475 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
476 err = net_xmit_eval(err); 517 err = net_xmit_eval(err);
477 } 518 }
478 519
479done: 520done:
521 if (opt && opt != np->opt)
522 sock_kfree_s(sk, opt, opt->tot_len);
523 dst_release(dst);
480 return err; 524 return err;
481} 525}
482 526
483static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, 527static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
484 struct request_values *rvp) 528 struct request_values *rvp)
485{ 529{
486 struct flowi6 fl6; 530 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
487 int res; 531 return tcp_v6_send_synack(sk, req, rvp);
488
489 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
490 if (!res)
491 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
492 return res;
493} 532}
494 533
495static void tcp_v6_reqsk_destructor(struct request_sock *req) 534static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -501,7 +540,19 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
501static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, 540static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
502 const struct in6_addr *addr) 541 const struct in6_addr *addr)
503{ 542{
504 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6); 543 struct tcp_sock *tp = tcp_sk(sk);
544 int i;
545
546 BUG_ON(tp == NULL);
547
548 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
549 return NULL;
550
551 for (i = 0; i < tp->md5sig_info->entries6; i++) {
552 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
553 return &tp->md5sig_info->keys6[i].base;
554 }
555 return NULL;
505} 556}
506 557
507static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, 558static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
@@ -516,11 +567,137 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
516 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); 567 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
517} 568}
518 569
570static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
571 char *newkey, u8 newkeylen)
572{
573 /* Add key to the list */
574 struct tcp_md5sig_key *key;
575 struct tcp_sock *tp = tcp_sk(sk);
576 struct tcp6_md5sig_key *keys;
577
578 key = tcp_v6_md5_do_lookup(sk, peer);
579 if (key) {
580 /* modify existing entry - just update that one */
581 kfree(key->key);
582 key->key = newkey;
583 key->keylen = newkeylen;
584 } else {
585 /* reallocate new list if current one is full. */
586 if (!tp->md5sig_info) {
587 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
588 if (!tp->md5sig_info) {
589 kfree(newkey);
590 return -ENOMEM;
591 }
592 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
593 }
594 if (tp->md5sig_info->entries6 == 0 &&
595 tcp_alloc_md5sig_pool(sk) == NULL) {
596 kfree(newkey);
597 return -ENOMEM;
598 }
599 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
600 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
601 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
602
603 if (!keys) {
604 kfree(newkey);
605 if (tp->md5sig_info->entries6 == 0)
606 tcp_free_md5sig_pool();
607 return -ENOMEM;
608 }
609
610 if (tp->md5sig_info->entries6)
611 memmove(keys, tp->md5sig_info->keys6,
612 (sizeof (tp->md5sig_info->keys6[0]) *
613 tp->md5sig_info->entries6));
614
615 kfree(tp->md5sig_info->keys6);
616 tp->md5sig_info->keys6 = keys;
617 tp->md5sig_info->alloced6++;
618 }
619
620 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
621 peer);
622 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
623 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
624
625 tp->md5sig_info->entries6++;
626 }
627 return 0;
628}
629
630static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
631 u8 *newkey, __u8 newkeylen)
632{
633 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
634 newkey, newkeylen);
635}
636
637static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
638{
639 struct tcp_sock *tp = tcp_sk(sk);
640 int i;
641
642 for (i = 0; i < tp->md5sig_info->entries6; i++) {
643 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
644 /* Free the key */
645 kfree(tp->md5sig_info->keys6[i].base.key);
646 tp->md5sig_info->entries6--;
647
648 if (tp->md5sig_info->entries6 == 0) {
649 kfree(tp->md5sig_info->keys6);
650 tp->md5sig_info->keys6 = NULL;
651 tp->md5sig_info->alloced6 = 0;
652 tcp_free_md5sig_pool();
653 } else {
654 /* shrink the database */
655 if (tp->md5sig_info->entries6 != i)
656 memmove(&tp->md5sig_info->keys6[i],
657 &tp->md5sig_info->keys6[i+1],
658 (tp->md5sig_info->entries6 - i)
659 * sizeof (tp->md5sig_info->keys6[0]));
660 }
661 return 0;
662 }
663 }
664 return -ENOENT;
665}
666
667static void tcp_v6_clear_md5_list (struct sock *sk)
668{
669 struct tcp_sock *tp = tcp_sk(sk);
670 int i;
671
672 if (tp->md5sig_info->entries6) {
673 for (i = 0; i < tp->md5sig_info->entries6; i++)
674 kfree(tp->md5sig_info->keys6[i].base.key);
675 tp->md5sig_info->entries6 = 0;
676 tcp_free_md5sig_pool();
677 }
678
679 kfree(tp->md5sig_info->keys6);
680 tp->md5sig_info->keys6 = NULL;
681 tp->md5sig_info->alloced6 = 0;
682
683 if (tp->md5sig_info->entries4) {
684 for (i = 0; i < tp->md5sig_info->entries4; i++)
685 kfree(tp->md5sig_info->keys4[i].base.key);
686 tp->md5sig_info->entries4 = 0;
687 tcp_free_md5sig_pool();
688 }
689
690 kfree(tp->md5sig_info->keys4);
691 tp->md5sig_info->keys4 = NULL;
692 tp->md5sig_info->alloced4 = 0;
693}
694
519static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, 695static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
520 int optlen) 696 int optlen)
521{ 697{
522 struct tcp_md5sig cmd; 698 struct tcp_md5sig cmd;
523 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 699 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
700 u8 *newkey;
524 701
525 if (optlen < sizeof(cmd)) 702 if (optlen < sizeof(cmd))
526 return -EINVAL; 703 return -EINVAL;
@@ -532,22 +709,36 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
532 return -EINVAL; 709 return -EINVAL;
533 710
534 if (!cmd.tcpm_keylen) { 711 if (!cmd.tcpm_keylen) {
712 if (!tcp_sk(sk)->md5sig_info)
713 return -ENOENT;
535 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 714 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
536 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 715 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
537 AF_INET); 716 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
538 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
539 AF_INET6);
540 } 717 }
541 718
542 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 719 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
543 return -EINVAL; 720 return -EINVAL;
544 721
545 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 722 if (!tcp_sk(sk)->md5sig_info) {
546 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 723 struct tcp_sock *tp = tcp_sk(sk);
547 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 724 struct tcp_md5sig_info *p;
725
726 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
727 if (!p)
728 return -ENOMEM;
548 729
549 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 730 tp->md5sig_info = p;
550 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 731 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
732 }
733
734 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
735 if (!newkey)
736 return -ENOMEM;
737 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
738 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
739 newkey, cmd.tcpm_keylen);
740 }
741 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
551} 742}
552 743
553static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 744static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
@@ -559,8 +750,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
559 750
560 bp = &hp->md5_blk.ip6; 751 bp = &hp->md5_blk.ip6;
561 /* 1. TCP pseudo-header (RFC2460) */ 752 /* 1. TCP pseudo-header (RFC2460) */
562 bp->saddr = *saddr; 753 ipv6_addr_copy(&bp->saddr, saddr);
563 bp->daddr = *daddr; 754 ipv6_addr_copy(&bp->daddr, daddr);
564 bp->protocol = cpu_to_be32(IPPROTO_TCP); 755 bp->protocol = cpu_to_be32(IPPROTO_TCP);
565 bp->len = cpu_to_be32(nbytes); 756 bp->len = cpu_to_be32(nbytes);
566 757
@@ -570,7 +761,7 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
570 761
571static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 762static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
572 const struct in6_addr *daddr, struct in6_addr *saddr, 763 const struct in6_addr *daddr, struct in6_addr *saddr,
573 const struct tcphdr *th) 764 struct tcphdr *th)
574{ 765{
575 struct tcp_md5sig_pool *hp; 766 struct tcp_md5sig_pool *hp;
576 struct hash_desc *desc; 767 struct hash_desc *desc;
@@ -602,14 +793,13 @@ clear_hash_noput:
602} 793}
603 794
604static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 795static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
605 const struct sock *sk, 796 struct sock *sk, struct request_sock *req,
606 const struct request_sock *req, 797 struct sk_buff *skb)
607 const struct sk_buff *skb)
608{ 798{
609 const struct in6_addr *saddr, *daddr; 799 const struct in6_addr *saddr, *daddr;
610 struct tcp_md5sig_pool *hp; 800 struct tcp_md5sig_pool *hp;
611 struct hash_desc *desc; 801 struct hash_desc *desc;
612 const struct tcphdr *th = tcp_hdr(skb); 802 struct tcphdr *th = tcp_hdr(skb);
613 803
614 if (sk) { 804 if (sk) {
615 saddr = &inet6_sk(sk)->saddr; 805 saddr = &inet6_sk(sk)->saddr;
@@ -652,12 +842,12 @@ clear_hash_noput:
652 return 1; 842 return 1;
653} 843}
654 844
655static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 845static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
656{ 846{
657 const __u8 *hash_location = NULL; 847 __u8 *hash_location = NULL;
658 struct tcp_md5sig_key *hash_expected; 848 struct tcp_md5sig_key *hash_expected;
659 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 849 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
660 const struct tcphdr *th = tcp_hdr(skb); 850 struct tcphdr *th = tcp_hdr(skb);
661 int genhash; 851 int genhash;
662 u8 newhash[16]; 852 u8 newhash[16];
663 853
@@ -684,10 +874,12 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
684 NULL, NULL, skb); 874 NULL, NULL, skb);
685 875
686 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 876 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
687 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", 877 if (net_ratelimit()) {
688 genhash ? "failed" : "mismatch", 878 printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
689 &ip6h->saddr, ntohs(th->source), 879 genhash ? "failed" : "mismatch",
690 &ip6h->daddr, ntohs(th->dest)); 880 &ip6h->saddr, ntohs(th->source),
881 &ip6h->daddr, ntohs(th->dest));
882 }
691 return 1; 883 return 1;
692 } 884 }
693 return 0; 885 return 0;
@@ -711,11 +903,84 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
711}; 903};
712#endif 904#endif
713 905
906static void __tcp_v6_send_check(struct sk_buff *skb,
907 const struct in6_addr *saddr, const struct in6_addr *daddr)
908{
909 struct tcphdr *th = tcp_hdr(skb);
910
911 if (skb->ip_summed == CHECKSUM_PARTIAL) {
912 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
913 skb->csum_start = skb_transport_header(skb) - skb->head;
914 skb->csum_offset = offsetof(struct tcphdr, check);
915 } else {
916 th->check = tcp_v6_check(skb->len, saddr, daddr,
917 csum_partial(th, th->doff << 2,
918 skb->csum));
919 }
920}
921
922static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
923{
924 struct ipv6_pinfo *np = inet6_sk(sk);
925
926 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
927}
928
929static int tcp_v6_gso_send_check(struct sk_buff *skb)
930{
931 const struct ipv6hdr *ipv6h;
932 struct tcphdr *th;
933
934 if (!pskb_may_pull(skb, sizeof(*th)))
935 return -EINVAL;
936
937 ipv6h = ipv6_hdr(skb);
938 th = tcp_hdr(skb);
939
940 th->check = 0;
941 skb->ip_summed = CHECKSUM_PARTIAL;
942 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
943 return 0;
944}
945
946static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
947 struct sk_buff *skb)
948{
949 const struct ipv6hdr *iph = skb_gro_network_header(skb);
950
951 switch (skb->ip_summed) {
952 case CHECKSUM_COMPLETE:
953 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
954 skb->csum)) {
955 skb->ip_summed = CHECKSUM_UNNECESSARY;
956 break;
957 }
958
959 /* fall through */
960 case CHECKSUM_NONE:
961 NAPI_GRO_CB(skb)->flush = 1;
962 return NULL;
963 }
964
965 return tcp_gro_receive(head, skb);
966}
967
968static int tcp6_gro_complete(struct sk_buff *skb)
969{
970 const struct ipv6hdr *iph = ipv6_hdr(skb);
971 struct tcphdr *th = tcp_hdr(skb);
972
973 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
974 &iph->saddr, &iph->daddr, 0);
975 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
976
977 return tcp_gro_complete(skb);
978}
979
714static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, 980static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
715 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) 981 u32 ts, struct tcp_md5sig_key *key, int rst)
716{ 982{
717 const struct tcphdr *th = tcp_hdr(skb); 983 struct tcphdr *th = tcp_hdr(skb), *t1;
718 struct tcphdr *t1;
719 struct sk_buff *buff; 984 struct sk_buff *buff;
720 struct flowi6 fl6; 985 struct flowi6 fl6;
721 struct net *net = dev_net(skb_dst(skb)->dev); 986 struct net *net = dev_net(skb_dst(skb)->dev);
@@ -772,8 +1037,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
772#endif 1037#endif
773 1038
774 memset(&fl6, 0, sizeof(fl6)); 1039 memset(&fl6, 0, sizeof(fl6));
775 fl6.daddr = ipv6_hdr(skb)->saddr; 1040 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
776 fl6.saddr = ipv6_hdr(skb)->daddr; 1041 ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
777 1042
778 buff->ip_summed = CHECKSUM_PARTIAL; 1043 buff->ip_summed = CHECKSUM_PARTIAL;
779 buff->csum = 0; 1044 buff->csum = 0;
@@ -781,8 +1046,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
781 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 1046 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
782 1047
783 fl6.flowi6_proto = IPPROTO_TCP; 1048 fl6.flowi6_proto = IPPROTO_TCP;
784 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) 1049 fl6.flowi6_oif = inet6_iif(skb);
785 fl6.flowi6_oif = inet6_iif(skb);
786 fl6.fl6_dport = t1->dest; 1050 fl6.fl6_dport = t1->dest;
787 fl6.fl6_sport = t1->source; 1051 fl6.fl6_sport = t1->source;
788 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 1052 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -794,7 +1058,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
794 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); 1058 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
795 if (!IS_ERR(dst)) { 1059 if (!IS_ERR(dst)) {
796 skb_dst_set(buff, dst); 1060 skb_dst_set(buff, dst);
797 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); 1061 ip6_xmit(ctl_sk, buff, &fl6, NULL);
798 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 1062 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
799 if (rst) 1063 if (rst)
800 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 1064 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -806,16 +1070,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
806 1070
807static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) 1071static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
808{ 1072{
809 const struct tcphdr *th = tcp_hdr(skb); 1073 struct tcphdr *th = tcp_hdr(skb);
810 u32 seq = 0, ack_seq = 0; 1074 u32 seq = 0, ack_seq = 0;
811 struct tcp_md5sig_key *key = NULL; 1075 struct tcp_md5sig_key *key = NULL;
812#ifdef CONFIG_TCP_MD5SIG
813 const __u8 *hash_location = NULL;
814 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
815 unsigned char newhash[16];
816 int genhash;
817 struct sock *sk1 = NULL;
818#endif
819 1076
820 if (th->rst) 1077 if (th->rst)
821 return; 1078 return;
@@ -824,32 +1081,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
824 return; 1081 return;
825 1082
826#ifdef CONFIG_TCP_MD5SIG 1083#ifdef CONFIG_TCP_MD5SIG
827 hash_location = tcp_parse_md5sig_option(th); 1084 if (sk)
828 if (!sk && hash_location) { 1085 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
829 /*
830 * active side is lost. Try to find listening socket through
831 * source port, and then find md5 key through listening socket.
832 * we are not loose security here:
833 * Incoming packet is checked with md5 hash with finding key,
834 * no RST generated if md5 hash doesn't match.
835 */
836 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
837 &tcp_hashinfo, &ipv6h->daddr,
838 ntohs(th->source), inet6_iif(skb));
839 if (!sk1)
840 return;
841
842 rcu_read_lock();
843 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
844 if (!key)
845 goto release_sk1;
846
847 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
848 if (genhash || memcmp(hash_location, newhash, 16) != 0)
849 goto release_sk1;
850 } else {
851 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
852 }
853#endif 1086#endif
854 1087
855 if (th->ack) 1088 if (th->ack)
@@ -858,21 +1091,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
858 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1091 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
859 (th->doff << 2); 1092 (th->doff << 2);
860 1093
861 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); 1094 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
862
863#ifdef CONFIG_TCP_MD5SIG
864release_sk1:
865 if (sk1) {
866 rcu_read_unlock();
867 sock_put(sk1);
868 }
869#endif
870} 1095}
871 1096
872static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, 1097static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
873 struct tcp_md5sig_key *key, u8 tclass) 1098 struct tcp_md5sig_key *key)
874{ 1099{
875 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); 1100 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
876} 1101}
877 1102
878static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1103static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -882,8 +1107,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
882 1107
883 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1108 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
884 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1109 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
885 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), 1110 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
886 tw->tw_tclass);
887 1111
888 inet_twsk_put(tw); 1112 inet_twsk_put(tw);
889} 1113}
@@ -892,7 +1116,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
892 struct request_sock *req) 1116 struct request_sock *req)
893{ 1117{
894 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 1118 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
895 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); 1119 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
896} 1120}
897 1121
898 1122
@@ -907,7 +1131,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
907 &ipv6_hdr(skb)->saddr, 1131 &ipv6_hdr(skb)->saddr,
908 &ipv6_hdr(skb)->daddr, inet6_iif(skb)); 1132 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
909 if (req) 1133 if (req)
910 return tcp_check_req(sk, skb, req, prev, false); 1134 return tcp_check_req(sk, skb, req, prev);
911 1135
912 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 1136 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
913 &ipv6_hdr(skb)->saddr, th->source, 1137 &ipv6_hdr(skb)->saddr, th->source,
@@ -936,15 +1160,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
936{ 1160{
937 struct tcp_extend_values tmp_ext; 1161 struct tcp_extend_values tmp_ext;
938 struct tcp_options_received tmp_opt; 1162 struct tcp_options_received tmp_opt;
939 const u8 *hash_location; 1163 u8 *hash_location;
940 struct request_sock *req; 1164 struct request_sock *req;
941 struct inet6_request_sock *treq; 1165 struct inet6_request_sock *treq;
942 struct ipv6_pinfo *np = inet6_sk(sk); 1166 struct ipv6_pinfo *np = inet6_sk(sk);
943 struct tcp_sock *tp = tcp_sk(sk); 1167 struct tcp_sock *tp = tcp_sk(sk);
944 __u32 isn = TCP_SKB_CB(skb)->when; 1168 __u32 isn = TCP_SKB_CB(skb)->when;
945 struct dst_entry *dst = NULL; 1169 struct dst_entry *dst = NULL;
946 struct flowi6 fl6; 1170 int want_cookie = 0;
947 bool want_cookie = false;
948 1171
949 if (skb->protocol == htons(ETH_P_IP)) 1172 if (skb->protocol == htons(ETH_P_IP))
950 return tcp_v4_conn_request(sk, skb); 1173 return tcp_v4_conn_request(sk, skb);
@@ -972,7 +1195,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
972 tcp_clear_options(&tmp_opt); 1195 tcp_clear_options(&tmp_opt);
973 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 1196 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
974 tmp_opt.user_mss = tp->rx_opt.user_mss; 1197 tmp_opt.user_mss = tp->rx_opt.user_mss;
975 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 1198 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
976 1199
977 if (tmp_opt.cookie_plus > 0 && 1200 if (tmp_opt.cookie_plus > 0 &&
978 tmp_opt.saw_tstamp && 1201 tmp_opt.saw_tstamp &&
@@ -1005,7 +1228,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1005 while (l-- > 0) 1228 while (l-- > 0)
1006 *c++ ^= *hash_location++; 1229 *c++ ^= *hash_location++;
1007 1230
1008 want_cookie = false; /* not our kind of cookie */ 1231 want_cookie = 0; /* not our kind of cookie */
1009 tmp_ext.cookie_out_never = 0; /* false */ 1232 tmp_ext.cookie_out_never = 0; /* false */
1010 tmp_ext.cookie_plus = tmp_opt.cookie_plus; 1233 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1011 } else if (!tp->rx_opt.cookie_in_always) { 1234 } else if (!tp->rx_opt.cookie_in_always) {
@@ -1024,25 +1247,26 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1024 tcp_openreq_init(req, &tmp_opt, skb); 1247 tcp_openreq_init(req, &tmp_opt, skb);
1025 1248
1026 treq = inet6_rsk(req); 1249 treq = inet6_rsk(req);
1027 treq->rmt_addr = ipv6_hdr(skb)->saddr; 1250 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1028 treq->loc_addr = ipv6_hdr(skb)->daddr; 1251 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1029 if (!want_cookie || tmp_opt.tstamp_ok) 1252 if (!want_cookie || tmp_opt.tstamp_ok)
1030 TCP_ECN_create_request(req, skb); 1253 TCP_ECN_create_request(req, tcp_hdr(skb));
1031
1032 treq->iif = sk->sk_bound_dev_if;
1033
1034 /* So that link locals have meaning */
1035 if (!sk->sk_bound_dev_if &&
1036 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1037 treq->iif = inet6_iif(skb);
1038 1254
1039 if (!isn) { 1255 if (!isn) {
1256 struct inet_peer *peer = NULL;
1257
1040 if (ipv6_opt_accepted(sk, skb) || 1258 if (ipv6_opt_accepted(sk, skb) ||
1041 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 1259 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1042 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { 1260 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1043 atomic_inc(&skb->users); 1261 atomic_inc(&skb->users);
1044 treq->pktopts = skb; 1262 treq->pktopts = skb;
1045 } 1263 }
1264 treq->iif = sk->sk_bound_dev_if;
1265
1266 /* So that link locals have meaning */
1267 if (!sk->sk_bound_dev_if &&
1268 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1269 treq->iif = inet6_iif(skb);
1046 1270
1047 if (want_cookie) { 1271 if (want_cookie) {
1048 isn = cookie_v6_init_sequence(sk, skb, &req->mss); 1272 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
@@ -1061,8 +1285,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1061 */ 1285 */
1062 if (tmp_opt.saw_tstamp && 1286 if (tmp_opt.saw_tstamp &&
1063 tcp_death_row.sysctl_tw_recycle && 1287 tcp_death_row.sysctl_tw_recycle &&
1064 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { 1288 (dst = inet6_csk_route_req(sk, req)) != NULL &&
1065 if (!tcp_peer_is_proven(req, dst, true)) { 1289 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1290 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1291 &treq->rmt_addr)) {
1292 inet_peer_refcheck(peer);
1293 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1294 (s32)(peer->tcp_ts - req->ts_recent) >
1295 TCP_PAWS_WINDOW) {
1066 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 1296 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1067 goto drop_and_release; 1297 goto drop_and_release;
1068 } 1298 }
@@ -1071,7 +1301,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1071 else if (!sysctl_tcp_syncookies && 1301 else if (!sysctl_tcp_syncookies &&
1072 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 1302 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1073 (sysctl_max_syn_backlog >> 2)) && 1303 (sysctl_max_syn_backlog >> 2)) &&
1074 !tcp_peer_is_proven(req, dst, false)) { 1304 (!peer || !peer->tcp_ts_stamp) &&
1305 (!dst || !dst_metric(dst, RTAX_RTT))) {
1075 /* Without syncookies last quarter of 1306 /* Without syncookies last quarter of
1076 * backlog is filled with destinations, 1307 * backlog is filled with destinations,
1077 * proven to be alive. 1308 * proven to be alive.
@@ -1088,18 +1319,15 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1088 } 1319 }
1089have_isn: 1320have_isn:
1090 tcp_rsk(req)->snt_isn = isn; 1321 tcp_rsk(req)->snt_isn = isn;
1322 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1091 1323
1092 if (security_inet_conn_request(sk, skb, req)) 1324 security_inet_conn_request(sk, skb, req);
1093 goto drop_and_release;
1094 1325
1095 if (tcp_v6_send_synack(sk, dst, &fl6, req, 1326 if (tcp_v6_send_synack(sk, req,
1096 (struct request_values *)&tmp_ext, 1327 (struct request_values *)&tmp_ext) ||
1097 skb_get_queue_mapping(skb)) ||
1098 want_cookie) 1328 want_cookie)
1099 goto drop_and_free; 1329 goto drop_and_free;
1100 1330
1101 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1102 tcp_rsk(req)->listener = NULL;
1103 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1331 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1104 return 0; 1332 return 0;
1105 1333
@@ -1121,10 +1349,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1121 struct inet_sock *newinet; 1349 struct inet_sock *newinet;
1122 struct tcp_sock *newtp; 1350 struct tcp_sock *newtp;
1123 struct sock *newsk; 1351 struct sock *newsk;
1352 struct ipv6_txoptions *opt;
1124#ifdef CONFIG_TCP_MD5SIG 1353#ifdef CONFIG_TCP_MD5SIG
1125 struct tcp_md5sig_key *key; 1354 struct tcp_md5sig_key *key;
1126#endif 1355#endif
1127 struct flowi6 fl6;
1128 1356
1129 if (skb->protocol == htons(ETH_P_IP)) { 1357 if (skb->protocol == htons(ETH_P_IP)) {
1130 /* 1358 /*
@@ -1149,7 +1377,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1149 1377
1150 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); 1378 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1151 1379
1152 newnp->rcv_saddr = newnp->saddr; 1380 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1153 1381
1154 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1382 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1155 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1383 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1163,7 +1391,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1163 newnp->opt = NULL; 1391 newnp->opt = NULL;
1164 newnp->mcast_oif = inet6_iif(skb); 1392 newnp->mcast_oif = inet6_iif(skb);
1165 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1393 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1166 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1167 1394
1168 /* 1395 /*
1169 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1396 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1181,12 +1408,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1181 } 1408 }
1182 1409
1183 treq = inet6_rsk(req); 1410 treq = inet6_rsk(req);
1411 opt = np->opt;
1184 1412
1185 if (sk_acceptq_is_full(sk)) 1413 if (sk_acceptq_is_full(sk))
1186 goto out_overflow; 1414 goto out_overflow;
1187 1415
1188 if (!dst) { 1416 if (!dst) {
1189 dst = inet6_csk_route_req(sk, &fl6, req); 1417 dst = inet6_csk_route_req(sk, req);
1190 if (!dst) 1418 if (!dst)
1191 goto out; 1419 goto out;
1192 } 1420 }
@@ -1203,7 +1431,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1203 1431
1204 newsk->sk_gso_type = SKB_GSO_TCPV6; 1432 newsk->sk_gso_type = SKB_GSO_TCPV6;
1205 __ip6_dst_store(newsk, dst, NULL, NULL); 1433 __ip6_dst_store(newsk, dst, NULL, NULL);
1206 inet6_sk_rx_dst_set(newsk, skb);
1207 1434
1208 newtcp6sk = (struct tcp6_sock *)newsk; 1435 newtcp6sk = (struct tcp6_sock *)newsk;
1209 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; 1436 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1214,9 +1441,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1214 1441
1215 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1442 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1216 1443
1217 newnp->daddr = treq->rmt_addr; 1444 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1218 newnp->saddr = treq->loc_addr; 1445 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1219 newnp->rcv_saddr = treq->loc_addr; 1446 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1220 newsk->sk_bound_dev_if = treq->iif; 1447 newsk->sk_bound_dev_if = treq->iif;
1221 1448
1222 /* Now IPv6 options... 1449 /* Now IPv6 options...
@@ -1233,9 +1460,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1233 /* Clone pktoptions received with SYN */ 1460 /* Clone pktoptions received with SYN */
1234 newnp->pktoptions = NULL; 1461 newnp->pktoptions = NULL;
1235 if (treq->pktopts != NULL) { 1462 if (treq->pktopts != NULL) {
1236 newnp->pktoptions = skb_clone(treq->pktopts, 1463 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1237 sk_gfp_atomic(sk, GFP_ATOMIC)); 1464 kfree_skb(treq->pktopts);
1238 consume_skb(treq->pktopts);
1239 treq->pktopts = NULL; 1465 treq->pktopts = NULL;
1240 if (newnp->pktoptions) 1466 if (newnp->pktoptions)
1241 skb_set_owner_r(newnp->pktoptions, newsk); 1467 skb_set_owner_r(newnp->pktoptions, newsk);
@@ -1243,7 +1469,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1243 newnp->opt = NULL; 1469 newnp->opt = NULL;
1244 newnp->mcast_oif = inet6_iif(skb); 1470 newnp->mcast_oif = inet6_iif(skb);
1245 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1471 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1246 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1247 1472
1248 /* Clone native IPv6 options from listening socket (if any) 1473 /* Clone native IPv6 options from listening socket (if any)
1249 1474
@@ -1251,8 +1476,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1251 but we make one more one thing there: reattach optmem 1476 but we make one more one thing there: reattach optmem
1252 to newsk. 1477 to newsk.
1253 */ 1478 */
1254 if (np->opt) 1479 if (opt) {
1255 newnp->opt = ipv6_dup_options(newsk, np->opt); 1480 newnp->opt = ipv6_dup_options(newsk, opt);
1481 if (opt != np->opt)
1482 sock_kfree_s(sk, opt, opt->tot_len);
1483 }
1256 1484
1257 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1485 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1258 if (newnp->opt) 1486 if (newnp->opt)
@@ -1262,13 +1490,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1262 tcp_mtup_init(newsk); 1490 tcp_mtup_init(newsk);
1263 tcp_sync_mss(newsk, dst_mtu(dst)); 1491 tcp_sync_mss(newsk, dst_mtu(dst));
1264 newtp->advmss = dst_metric_advmss(dst); 1492 newtp->advmss = dst_metric_advmss(dst);
1265 if (tcp_sk(sk)->rx_opt.user_mss &&
1266 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1267 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1268
1269 tcp_initialize_rcv_mss(newsk); 1493 tcp_initialize_rcv_mss(newsk);
1270 tcp_synack_rtt_meas(newsk, req); 1494 if (tcp_rsk(req)->snt_synack)
1271 newtp->total_retrans = req->num_retrans; 1495 tcp_valid_rtt_meas(newsk,
1496 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1497 newtp->total_retrans = req->retrans;
1272 1498
1273 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1499 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1274 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1500 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1281,15 +1507,15 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1281 * memory, then we end up not copying the key 1507 * memory, then we end up not copying the key
1282 * across. Shucks. 1508 * across. Shucks.
1283 */ 1509 */
1284 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, 1510 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1285 AF_INET6, key->key, key->keylen, 1511 if (newkey != NULL)
1286 sk_gfp_atomic(sk, GFP_ATOMIC)); 1512 tcp_v6_md5_do_add(newsk, &newnp->daddr,
1513 newkey, key->keylen);
1287 } 1514 }
1288#endif 1515#endif
1289 1516
1290 if (__inet_inherit_port(sk, newsk) < 0) { 1517 if (__inet_inherit_port(sk, newsk) < 0) {
1291 inet_csk_prepare_forced_close(newsk); 1518 sock_put(newsk);
1292 tcp_done(newsk);
1293 goto out; 1519 goto out;
1294 } 1520 }
1295 __inet6_hash(newsk, NULL); 1521 __inet6_hash(newsk, NULL);
@@ -1299,6 +1525,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1299out_overflow: 1525out_overflow:
1300 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1526 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1301out_nonewsk: 1527out_nonewsk:
1528 if (opt && opt != np->opt)
1529 sock_kfree_s(sk, opt, opt->tot_len);
1302 dst_release(dst); 1530 dst_release(dst);
1303out: 1531out:
1304 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1532 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
@@ -1377,20 +1605,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1377 --ANK (980728) 1605 --ANK (980728)
1378 */ 1606 */
1379 if (np->rxopt.all) 1607 if (np->rxopt.all)
1380 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); 1608 opt_skb = skb_clone(skb, GFP_ATOMIC);
1381 1609
1382 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1610 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1383 struct dst_entry *dst = sk->sk_rx_dst; 1611 sock_rps_save_rxhash(sk, skb->rxhash);
1384
1385 sock_rps_save_rxhash(sk, skb);
1386 if (dst) {
1387 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1388 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1389 dst_release(dst);
1390 sk->sk_rx_dst = NULL;
1391 }
1392 }
1393
1394 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) 1612 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1395 goto reset; 1613 goto reset;
1396 if (opt_skb) 1614 if (opt_skb)
@@ -1412,7 +1630,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1412 * the new socket.. 1630 * the new socket..
1413 */ 1631 */
1414 if(nsk != sk) { 1632 if(nsk != sk) {
1415 sock_rps_save_rxhash(nsk, skb); 1633 sock_rps_save_rxhash(nsk, skb->rxhash);
1416 if (tcp_child_process(sk, nsk, skb)) 1634 if (tcp_child_process(sk, nsk, skb))
1417 goto reset; 1635 goto reset;
1418 if (opt_skb) 1636 if (opt_skb)
@@ -1420,7 +1638,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1420 return 0; 1638 return 0;
1421 } 1639 }
1422 } else 1640 } else
1423 sock_rps_save_rxhash(sk, skb); 1641 sock_rps_save_rxhash(sk, skb->rxhash);
1424 1642
1425 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) 1643 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1426 goto reset; 1644 goto reset;
@@ -1455,8 +1673,6 @@ ipv6_pktoptions:
1455 np->mcast_oif = inet6_iif(opt_skb); 1673 np->mcast_oif = inet6_iif(opt_skb);
1456 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1674 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1457 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1675 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1458 if (np->rxopt.bits.rxtclass)
1459 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1460 if (ipv6_opt_accepted(sk, opt_skb)) { 1676 if (ipv6_opt_accepted(sk, opt_skb)) {
1461 skb_set_owner_r(opt_skb, sk); 1677 skb_set_owner_r(opt_skb, sk);
1462 opt_skb = xchg(&np->pktoptions, opt_skb); 1678 opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1472,7 +1688,7 @@ ipv6_pktoptions:
1472 1688
1473static int tcp_v6_rcv(struct sk_buff *skb) 1689static int tcp_v6_rcv(struct sk_buff *skb)
1474{ 1690{
1475 const struct tcphdr *th; 1691 struct tcphdr *th;
1476 const struct ipv6hdr *hdr; 1692 const struct ipv6hdr *hdr;
1477 struct sock *sk; 1693 struct sock *sk;
1478 int ret; 1694 int ret;
@@ -1506,7 +1722,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1506 skb->len - th->doff*4); 1722 skb->len - th->doff*4);
1507 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1723 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1508 TCP_SKB_CB(skb)->when = 0; 1724 TCP_SKB_CB(skb)->when = 0;
1509 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1725 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
1510 TCP_SKB_CB(skb)->sacked = 0; 1726 TCP_SKB_CB(skb)->sacked = 0;
1511 1727
1512 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1728 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1536,7 +1752,7 @@ process:
1536#ifdef CONFIG_NET_DMA 1752#ifdef CONFIG_NET_DMA
1537 struct tcp_sock *tp = tcp_sk(sk); 1753 struct tcp_sock *tp = tcp_sk(sk);
1538 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1754 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1539 tp->ucopy.dma_chan = net_dma_find_channel(); 1755 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1540 if (tp->ucopy.dma_chan) 1756 if (tp->ucopy.dma_chan)
1541 ret = tcp_v6_do_rcv(sk, skb); 1757 ret = tcp_v6_do_rcv(sk, skb);
1542 else 1758 else
@@ -1545,8 +1761,7 @@ process:
1545 if (!tcp_prequeue(sk, skb)) 1761 if (!tcp_prequeue(sk, skb))
1546 ret = tcp_v6_do_rcv(sk, skb); 1762 ret = tcp_v6_do_rcv(sk, skb);
1547 } 1763 }
1548 } else if (unlikely(sk_add_backlog(sk, skb, 1764 } else if (unlikely(sk_add_backlog(sk, skb))) {
1549 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1550 bh_unlock_sock(sk); 1765 bh_unlock_sock(sk);
1551 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1766 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1552 goto discard_and_relse; 1767 goto discard_and_relse;
@@ -1619,58 +1834,52 @@ do_time_wait:
1619 goto discard_it; 1834 goto discard_it;
1620} 1835}
1621 1836
1622static void tcp_v6_early_demux(struct sk_buff *skb) 1837static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
1623{ 1838{
1624 const struct ipv6hdr *hdr; 1839 struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
1625 const struct tcphdr *th; 1840 struct ipv6_pinfo *np = inet6_sk(sk);
1626 struct sock *sk; 1841 struct inet_peer *peer;
1627 1842
1628 if (skb->pkt_type != PACKET_HOST) 1843 if (!rt ||
1629 return; 1844 !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
1845 peer = inet_getpeer_v6(&np->daddr, 1);
1846 *release_it = true;
1847 } else {
1848 if (!rt->rt6i_peer)
1849 rt6_bind_peer(rt, 1);
1850 peer = rt->rt6i_peer;
1851 *release_it = false;
1852 }
1630 1853
1631 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1854 return peer;
1632 return; 1855}
1633 1856
1634 hdr = ipv6_hdr(skb); 1857static void *tcp_v6_tw_get_peer(struct sock *sk)
1635 th = tcp_hdr(skb); 1858{
1859 struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
1860 struct inet_timewait_sock *tw = inet_twsk(sk);
1636 1861
1637 if (th->doff < sizeof(struct tcphdr) / 4) 1862 if (tw->tw_family == AF_INET)
1638 return; 1863 return tcp_v4_tw_get_peer(sk);
1639 1864
1640 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1865 return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
1641 &hdr->saddr, th->source,
1642 &hdr->daddr, ntohs(th->dest),
1643 inet6_iif(skb));
1644 if (sk) {
1645 skb->sk = sk;
1646 skb->destructor = sock_edemux;
1647 if (sk->sk_state != TCP_TIME_WAIT) {
1648 struct dst_entry *dst = sk->sk_rx_dst;
1649
1650 if (dst)
1651 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1652 if (dst &&
1653 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1654 skb_dst_set_noref(skb, dst);
1655 }
1656 }
1657} 1866}
1658 1867
1659static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1868static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1660 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1869 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1661 .twsk_unique = tcp_twsk_unique, 1870 .twsk_unique = tcp_twsk_unique,
1662 .twsk_destructor= tcp_twsk_destructor, 1871 .twsk_destructor= tcp_twsk_destructor,
1872 .twsk_getpeer = tcp_v6_tw_get_peer,
1663}; 1873};
1664 1874
1665static const struct inet_connection_sock_af_ops ipv6_specific = { 1875static const struct inet_connection_sock_af_ops ipv6_specific = {
1666 .queue_xmit = inet6_csk_xmit, 1876 .queue_xmit = inet6_csk_xmit,
1667 .send_check = tcp_v6_send_check, 1877 .send_check = tcp_v6_send_check,
1668 .rebuild_header = inet6_sk_rebuild_header, 1878 .rebuild_header = inet6_sk_rebuild_header,
1669 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1670 .conn_request = tcp_v6_conn_request, 1879 .conn_request = tcp_v6_conn_request,
1671 .syn_recv_sock = tcp_v6_syn_recv_sock, 1880 .syn_recv_sock = tcp_v6_syn_recv_sock,
1881 .get_peer = tcp_v6_get_peer,
1672 .net_header_len = sizeof(struct ipv6hdr), 1882 .net_header_len = sizeof(struct ipv6hdr),
1673 .net_frag_header_len = sizeof(struct frag_hdr),
1674 .setsockopt = ipv6_setsockopt, 1883 .setsockopt = ipv6_setsockopt,
1675 .getsockopt = ipv6_getsockopt, 1884 .getsockopt = ipv6_getsockopt,
1676 .addr2sockaddr = inet6_csk_addr2sockaddr, 1885 .addr2sockaddr = inet6_csk_addr2sockaddr,
@@ -1686,6 +1895,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
1686static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1895static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1687 .md5_lookup = tcp_v6_md5_lookup, 1896 .md5_lookup = tcp_v6_md5_lookup,
1688 .calc_md5_hash = tcp_v6_md5_hash_skb, 1897 .calc_md5_hash = tcp_v6_md5_hash_skb,
1898 .md5_add = tcp_v6_md5_add_func,
1689 .md5_parse = tcp_v6_parse_md5_keys, 1899 .md5_parse = tcp_v6_parse_md5_keys,
1690}; 1900};
1691#endif 1901#endif
@@ -1698,9 +1908,9 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
1698 .queue_xmit = ip_queue_xmit, 1908 .queue_xmit = ip_queue_xmit,
1699 .send_check = tcp_v4_send_check, 1909 .send_check = tcp_v4_send_check,
1700 .rebuild_header = inet_sk_rebuild_header, 1910 .rebuild_header = inet_sk_rebuild_header,
1701 .sk_rx_dst_set = inet_sk_rx_dst_set,
1702 .conn_request = tcp_v6_conn_request, 1911 .conn_request = tcp_v6_conn_request,
1703 .syn_recv_sock = tcp_v6_syn_recv_sock, 1912 .syn_recv_sock = tcp_v6_syn_recv_sock,
1913 .get_peer = tcp_v4_get_peer,
1704 .net_header_len = sizeof(struct iphdr), 1914 .net_header_len = sizeof(struct iphdr),
1705 .setsockopt = ipv6_setsockopt, 1915 .setsockopt = ipv6_setsockopt,
1706 .getsockopt = ipv6_getsockopt, 1916 .getsockopt = ipv6_getsockopt,
@@ -1717,6 +1927,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
1717static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1927static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1718 .md5_lookup = tcp_v4_md5_lookup, 1928 .md5_lookup = tcp_v4_md5_lookup,
1719 .calc_md5_hash = tcp_v4_md5_hash_skb, 1929 .calc_md5_hash = tcp_v4_md5_hash_skb,
1930 .md5_add = tcp_v6_md5_add_func,
1720 .md5_parse = tcp_v6_parse_md5_keys, 1931 .md5_parse = tcp_v6_parse_md5_keys,
1721}; 1932};
1722#endif 1933#endif
@@ -1727,20 +1938,73 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1727static int tcp_v6_init_sock(struct sock *sk) 1938static int tcp_v6_init_sock(struct sock *sk)
1728{ 1939{
1729 struct inet_connection_sock *icsk = inet_csk(sk); 1940 struct inet_connection_sock *icsk = inet_csk(sk);
1941 struct tcp_sock *tp = tcp_sk(sk);
1942
1943 skb_queue_head_init(&tp->out_of_order_queue);
1944 tcp_init_xmit_timers(sk);
1945 tcp_prequeue_init(tp);
1946
1947 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1948 tp->mdev = TCP_TIMEOUT_INIT;
1730 1949
1731 tcp_init_sock(sk); 1950 /* So many TCP implementations out there (incorrectly) count the
1951 * initial SYN frame in their delayed-ACK and congestion control
1952 * algorithms that we must have the following bandaid to talk
1953 * efficiently to them. -DaveM
1954 */
1955 tp->snd_cwnd = 2;
1956
1957 /* See draft-stevens-tcpca-spec-01 for discussion of the
1958 * initialization of these values.
1959 */
1960 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1961 tp->snd_cwnd_clamp = ~0;
1962 tp->mss_cache = TCP_MSS_DEFAULT;
1963
1964 tp->reordering = sysctl_tcp_reordering;
1965
1966 sk->sk_state = TCP_CLOSE;
1732 1967
1733 icsk->icsk_af_ops = &ipv6_specific; 1968 icsk->icsk_af_ops = &ipv6_specific;
1969 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1970 icsk->icsk_sync_mss = tcp_sync_mss;
1971 sk->sk_write_space = sk_stream_write_space;
1972 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1734 1973
1735#ifdef CONFIG_TCP_MD5SIG 1974#ifdef CONFIG_TCP_MD5SIG
1736 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1975 tp->af_specific = &tcp_sock_ipv6_specific;
1737#endif 1976#endif
1738 1977
1978 /* TCP Cookie Transactions */
1979 if (sysctl_tcp_cookie_size > 0) {
1980 /* Default, cookies without s_data_payload. */
1981 tp->cookie_values =
1982 kzalloc(sizeof(*tp->cookie_values),
1983 sk->sk_allocation);
1984 if (tp->cookie_values != NULL)
1985 kref_init(&tp->cookie_values->kref);
1986 }
1987 /* Presumed zeroed, in order of appearance:
1988 * cookie_in_always, cookie_out_never,
1989 * s_data_constant, s_data_in, s_data_out
1990 */
1991 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1992 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1993
1994 local_bh_disable();
1995 percpu_counter_inc(&tcp_sockets_allocated);
1996 local_bh_enable();
1997
1739 return 0; 1998 return 0;
1740} 1999}
1741 2000
1742static void tcp_v6_destroy_sock(struct sock *sk) 2001static void tcp_v6_destroy_sock(struct sock *sk)
1743{ 2002{
2003#ifdef CONFIG_TCP_MD5SIG
2004 /* Clean up the MD5 key list */
2005 if (tcp_sk(sk)->md5sig_info)
2006 tcp_v6_clear_md5_list(sk);
2007#endif
1744 tcp_v4_destroy_sock(sk); 2008 tcp_v4_destroy_sock(sk);
1745 inet6_destroy_sock(sk); 2009 inet6_destroy_sock(sk);
1746} 2010}
@@ -1748,7 +2012,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
1748#ifdef CONFIG_PROC_FS 2012#ifdef CONFIG_PROC_FS
1749/* Proc filesystem TCPv6 sock list dumping. */ 2013/* Proc filesystem TCPv6 sock list dumping. */
1750static void get_openreq6(struct seq_file *seq, 2014static void get_openreq6(struct seq_file *seq,
1751 const struct sock *sk, struct request_sock *req, int i, kuid_t uid) 2015 struct sock *sk, struct request_sock *req, int i, int uid)
1752{ 2016{
1753 int ttd = req->expires - jiffies; 2017 int ttd = req->expires - jiffies;
1754 const struct in6_addr *src = &inet6_rsk(req)->loc_addr; 2018 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
@@ -1771,8 +2035,8 @@ static void get_openreq6(struct seq_file *seq,
1771 0,0, /* could print option size, but that is af dependent. */ 2035 0,0, /* could print option size, but that is af dependent. */
1772 1, /* timers active (only the expire timer) */ 2036 1, /* timers active (only the expire timer) */
1773 jiffies_to_clock_t(ttd), 2037 jiffies_to_clock_t(ttd),
1774 req->num_timeout, 2038 req->retrans,
1775 from_kuid_munged(seq_user_ns(seq), uid), 2039 uid,
1776 0, /* non standard timer */ 2040 0, /* non standard timer */
1777 0, /* open_requests have no inode */ 2041 0, /* open_requests have no inode */
1778 0, req); 2042 0, req);
@@ -1784,10 +2048,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1784 __u16 destp, srcp; 2048 __u16 destp, srcp;
1785 int timer_active; 2049 int timer_active;
1786 unsigned long timer_expires; 2050 unsigned long timer_expires;
1787 const struct inet_sock *inet = inet_sk(sp); 2051 struct inet_sock *inet = inet_sk(sp);
1788 const struct tcp_sock *tp = tcp_sk(sp); 2052 struct tcp_sock *tp = tcp_sk(sp);
1789 const struct inet_connection_sock *icsk = inet_csk(sp); 2053 const struct inet_connection_sock *icsk = inet_csk(sp);
1790 const struct ipv6_pinfo *np = inet6_sk(sp); 2054 struct ipv6_pinfo *np = inet6_sk(sp);
1791 2055
1792 dest = &np->daddr; 2056 dest = &np->daddr;
1793 src = &np->rcv_saddr; 2057 src = &np->rcv_saddr;
@@ -1820,9 +2084,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1820 tp->write_seq-tp->snd_una, 2084 tp->write_seq-tp->snd_una,
1821 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 2085 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1822 timer_active, 2086 timer_active,
1823 jiffies_delta_to_clock_t(timer_expires - jiffies), 2087 jiffies_to_clock_t(timer_expires - jiffies),
1824 icsk->icsk_retransmits, 2088 icsk->icsk_retransmits,
1825 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2089 sock_i_uid(sp),
1826 icsk->icsk_probes_out, 2090 icsk->icsk_probes_out,
1827 sock_i_ino(sp), 2091 sock_i_ino(sp),
1828 atomic_read(&sp->sk_refcnt), sp, 2092 atomic_read(&sp->sk_refcnt), sp,
@@ -1839,8 +2103,11 @@ static void get_timewait6_sock(struct seq_file *seq,
1839{ 2103{
1840 const struct in6_addr *dest, *src; 2104 const struct in6_addr *dest, *src;
1841 __u16 destp, srcp; 2105 __u16 destp, srcp;
1842 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); 2106 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1843 long delta = tw->tw_ttd - jiffies; 2107 int ttd = tw->tw_ttd - jiffies;
2108
2109 if (ttd < 0)
2110 ttd = 0;
1844 2111
1845 dest = &tw6->tw_v6_daddr; 2112 dest = &tw6->tw_v6_daddr;
1846 src = &tw6->tw_v6_rcv_saddr; 2113 src = &tw6->tw_v6_rcv_saddr;
@@ -1856,7 +2123,7 @@ static void get_timewait6_sock(struct seq_file *seq,
1856 dest->s6_addr32[0], dest->s6_addr32[1], 2123 dest->s6_addr32[0], dest->s6_addr32[1],
1857 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2124 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1858 tw->tw_substate, 0, 0, 2125 tw->tw_substate, 0, 0,
1859 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2126 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1860 atomic_read(&tw->tw_refcnt), tw); 2127 atomic_read(&tw->tw_refcnt), tw);
1861} 2128}
1862 2129
@@ -1891,18 +2158,12 @@ out:
1891 return 0; 2158 return 0;
1892} 2159}
1893 2160
1894static const struct file_operations tcp6_afinfo_seq_fops = {
1895 .owner = THIS_MODULE,
1896 .open = tcp_seq_open,
1897 .read = seq_read,
1898 .llseek = seq_lseek,
1899 .release = seq_release_net
1900};
1901
1902static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2161static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1903 .name = "tcp6", 2162 .name = "tcp6",
1904 .family = AF_INET6, 2163 .family = AF_INET6,
1905 .seq_fops = &tcp6_afinfo_seq_fops, 2164 .seq_fops = {
2165 .owner = THIS_MODULE,
2166 },
1906 .seq_ops = { 2167 .seq_ops = {
1907 .show = tcp6_seq_show, 2168 .show = tcp6_seq_show,
1908 }, 2169 },
@@ -1936,8 +2197,6 @@ struct proto tcpv6_prot = {
1936 .sendmsg = tcp_sendmsg, 2197 .sendmsg = tcp_sendmsg,
1937 .sendpage = tcp_sendpage, 2198 .sendpage = tcp_sendpage,
1938 .backlog_rcv = tcp_v6_do_rcv, 2199 .backlog_rcv = tcp_v6_do_rcv,
1939 .release_cb = tcp_release_cb,
1940 .mtu_reduced = tcp_v6_mtu_reduced,
1941 .hash = tcp_v6_hash, 2200 .hash = tcp_v6_hash,
1942 .unhash = inet_unhash, 2201 .unhash = inet_unhash,
1943 .get_port = inet_csk_get_port, 2202 .get_port = inet_csk_get_port,
@@ -1946,6 +2205,7 @@ struct proto tcpv6_prot = {
1946 .memory_allocated = &tcp_memory_allocated, 2205 .memory_allocated = &tcp_memory_allocated,
1947 .memory_pressure = &tcp_memory_pressure, 2206 .memory_pressure = &tcp_memory_pressure,
1948 .orphan_count = &tcp_orphan_count, 2207 .orphan_count = &tcp_orphan_count,
2208 .sysctl_mem = sysctl_tcp_mem,
1949 .sysctl_wmem = sysctl_tcp_wmem, 2209 .sysctl_wmem = sysctl_tcp_wmem,
1950 .sysctl_rmem = sysctl_tcp_rmem, 2210 .sysctl_rmem = sysctl_tcp_rmem,
1951 .max_header = MAX_TCP_HEADER, 2211 .max_header = MAX_TCP_HEADER,
@@ -1959,15 +2219,15 @@ struct proto tcpv6_prot = {
1959 .compat_setsockopt = compat_tcp_setsockopt, 2219 .compat_setsockopt = compat_tcp_setsockopt,
1960 .compat_getsockopt = compat_tcp_getsockopt, 2220 .compat_getsockopt = compat_tcp_getsockopt,
1961#endif 2221#endif
1962#ifdef CONFIG_MEMCG_KMEM
1963 .proto_cgroup = tcp_proto_cgroup,
1964#endif
1965}; 2222};
1966 2223
1967static const struct inet6_protocol tcpv6_protocol = { 2224static const struct inet6_protocol tcpv6_protocol = {
1968 .early_demux = tcp_v6_early_demux,
1969 .handler = tcp_v6_rcv, 2225 .handler = tcp_v6_rcv,
1970 .err_handler = tcp_v6_err, 2226 .err_handler = tcp_v6_err,
2227 .gso_send_check = tcp_v6_gso_send_check,
2228 .gso_segment = tcp_tso_segment,
2229 .gro_receive = tcp6_gro_receive,
2230 .gro_complete = tcp6_gro_complete,
1971 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2231 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1972}; 2232};
1973 2233
@@ -2022,10 +2282,10 @@ int __init tcpv6_init(void)
2022out: 2282out:
2023 return ret; 2283 return ret;
2024 2284
2025out_tcpv6_protosw:
2026 inet6_unregister_protosw(&tcpv6_protosw);
2027out_tcpv6_protocol: 2285out_tcpv6_protocol:
2028 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2286 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2287out_tcpv6_protosw:
2288 inet6_unregister_protosw(&tcpv6_protosw);
2029 goto out; 2289 goto out;
2030} 2290}
2031 2291
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
deleted file mode 100644
index 2ec6bf6a0aa..00000000000
--- a/net/ipv6/tcpv6_offload.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * IPV6 GSO/GRO offload support
3 * Linux INET6 implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * TCPv6 GSO/GRO support
11 */
12#include <linux/skbuff.h>
13#include <net/protocol.h>
14#include <net/tcp.h>
15#include <net/ip6_checksum.h>
16#include "ip6_offload.h"
17
18static int tcp_v6_gso_send_check(struct sk_buff *skb)
19{
20 const struct ipv6hdr *ipv6h;
21 struct tcphdr *th;
22
23 if (!pskb_may_pull(skb, sizeof(*th)))
24 return -EINVAL;
25
26 ipv6h = ipv6_hdr(skb);
27 th = tcp_hdr(skb);
28
29 th->check = 0;
30 skb->ip_summed = CHECKSUM_PARTIAL;
31 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
32 return 0;
33}
34
35static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
36 struct sk_buff *skb)
37{
38 const struct ipv6hdr *iph = skb_gro_network_header(skb);
39 __wsum wsum;
40 __sum16 sum;
41
42 switch (skb->ip_summed) {
43 case CHECKSUM_COMPLETE:
44 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
45 skb->csum)) {
46 skb->ip_summed = CHECKSUM_UNNECESSARY;
47 break;
48 }
49flush:
50 NAPI_GRO_CB(skb)->flush = 1;
51 return NULL;
52
53 case CHECKSUM_NONE:
54 wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
55 skb_gro_len(skb),
56 IPPROTO_TCP, 0));
57 sum = csum_fold(skb_checksum(skb,
58 skb_gro_offset(skb),
59 skb_gro_len(skb),
60 wsum));
61 if (sum)
62 goto flush;
63
64 skb->ip_summed = CHECKSUM_UNNECESSARY;
65 break;
66 }
67
68 return tcp_gro_receive(head, skb);
69}
70
71static int tcp6_gro_complete(struct sk_buff *skb)
72{
73 const struct ipv6hdr *iph = ipv6_hdr(skb);
74 struct tcphdr *th = tcp_hdr(skb);
75
76 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
77 &iph->saddr, &iph->daddr, 0);
78 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
79
80 return tcp_gro_complete(skb);
81}
82
83static const struct net_offload tcpv6_offload = {
84 .callbacks = {
85 .gso_send_check = tcp_v6_gso_send_check,
86 .gso_segment = tcp_tso_segment,
87 .gro_receive = tcp6_gro_receive,
88 .gro_complete = tcp6_gro_complete,
89 },
90};
91
92int __init tcpv6_offload_init(void)
93{
94 return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
95}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 4b0f50d9a96..4f3cec12aa8 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -19,8 +19,6 @@
19 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> 19 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
20 */ 20 */
21 21
22#define pr_fmt(fmt) "IPv6: " fmt
23
24#include <linux/icmpv6.h> 22#include <linux/icmpv6.h>
25#include <linux/init.h> 23#include <linux/init.h>
26#include <linux/module.h> 24#include <linux/module.h>
@@ -162,11 +160,11 @@ static const struct inet6_protocol tunnel46_protocol = {
162static int __init tunnel6_init(void) 160static int __init tunnel6_init(void)
163{ 161{
164 if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { 162 if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
165 pr_err("%s: can't add protocol\n", __func__); 163 printk(KERN_ERR "tunnel6 init(): can't add protocol\n");
166 return -EAGAIN; 164 return -EAGAIN;
167 } 165 }
168 if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) { 166 if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) {
169 pr_err("%s: can't add protocol\n", __func__); 167 printk(KERN_ERR "tunnel6 init(): can't add protocol\n");
170 inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); 168 inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
171 return -EAGAIN; 169 return -EAGAIN;
172 } 170 }
@@ -176,9 +174,9 @@ static int __init tunnel6_init(void)
176static void __exit tunnel6_fini(void) 174static void __exit tunnel6_fini(void)
177{ 175{
178 if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP)) 176 if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP))
179 pr_err("%s: can't remove protocol\n", __func__); 177 printk(KERN_ERR "tunnel6 close: can't remove protocol\n");
180 if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) 178 if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
181 pr_err("%s: can't remove protocol\n", __func__); 179 printk(KERN_ERR "tunnel6 close: can't remove protocol\n");
182} 180}
183 181
184module_init(tunnel6_init); 182module_init(tunnel6_init);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dfaa29b8b29..bb95e8e1c6f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -48,7 +48,6 @@
48 48
49#include <linux/proc_fs.h> 49#include <linux/proc_fs.h>
50#include <linux/seq_file.h> 50#include <linux/seq_file.h>
51#include <trace/events/skb.h>
52#include "udp_impl.h" 51#include "udp_impl.h"
53 52
54int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) 53int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
@@ -104,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
104{ 103{
105 unsigned int hash2_nulladdr = 104 unsigned int hash2_nulladdr =
106 udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); 105 udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
107 unsigned int hash2_partial = 106 unsigned int hash2_partial =
108 udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); 107 udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
109 108
110 /* precompute partial secondary hash */ 109 /* precompute partial secondary hash */
@@ -239,7 +238,7 @@ exact_match:
239 return result; 238 return result;
240} 239}
241 240
242struct sock *__udp6_lib_lookup(struct net *net, 241static struct sock *__udp6_lib_lookup(struct net *net,
243 const struct in6_addr *saddr, __be16 sport, 242 const struct in6_addr *saddr, __be16 sport,
244 const struct in6_addr *daddr, __be16 dport, 243 const struct in6_addr *daddr, __be16 dport,
245 int dif, struct udp_table *udptable) 244 int dif, struct udp_table *udptable)
@@ -306,7 +305,6 @@ begin:
306 rcu_read_unlock(); 305 rcu_read_unlock();
307 return result; 306 return result;
308} 307}
309EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
310 308
311static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, 309static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
312 __be16 sport, __be16 dport, 310 __be16 sport, __be16 dport,
@@ -342,15 +340,15 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
342 struct ipv6_pinfo *np = inet6_sk(sk); 340 struct ipv6_pinfo *np = inet6_sk(sk);
343 struct inet_sock *inet = inet_sk(sk); 341 struct inet_sock *inet = inet_sk(sk);
344 struct sk_buff *skb; 342 struct sk_buff *skb;
345 unsigned int ulen, copied; 343 unsigned int ulen;
346 int peeked, off = 0; 344 int peeked;
347 int err; 345 int err;
348 int is_udplite = IS_UDPLITE(sk); 346 int is_udplite = IS_UDPLITE(sk);
349 int is_udp4; 347 int is_udp4;
350 bool slow; 348 bool slow;
351 349
352 if (addr_len) 350 if (addr_len)
353 *addr_len = sizeof(struct sockaddr_in6); 351 *addr_len=sizeof(struct sockaddr_in6);
354 352
355 if (flags & MSG_ERRQUEUE) 353 if (flags & MSG_ERRQUEUE)
356 return ipv6_recv_error(sk, msg, len); 354 return ipv6_recv_error(sk, msg, len);
@@ -360,15 +358,14 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
360 358
361try_again: 359try_again:
362 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 360 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
363 &peeked, &off, &err); 361 &peeked, &err);
364 if (!skb) 362 if (!skb)
365 goto out; 363 goto out;
366 364
367 ulen = skb->len - sizeof(struct udphdr); 365 ulen = skb->len - sizeof(struct udphdr);
368 copied = len; 366 if (len > ulen)
369 if (copied > ulen) 367 len = ulen;
370 copied = ulen; 368 else if (len < ulen)
371 else if (copied < ulen)
372 msg->msg_flags |= MSG_TRUNC; 369 msg->msg_flags |= MSG_TRUNC;
373 370
374 is_udp4 = (skb->protocol == htons(ETH_P_IP)); 371 is_udp4 = (skb->protocol == htons(ETH_P_IP));
@@ -379,34 +376,22 @@ try_again:
379 * coverage checksum (UDP-Lite), do it before the copy. 376 * coverage checksum (UDP-Lite), do it before the copy.
380 */ 377 */
381 378
382 if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { 379 if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
383 if (udp_lib_checksum_complete(skb)) 380 if (udp_lib_checksum_complete(skb))
384 goto csum_copy_err; 381 goto csum_copy_err;
385 } 382 }
386 383
387 if (skb_csum_unnecessary(skb)) 384 if (skb_csum_unnecessary(skb))
388 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 385 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
389 msg->msg_iov, copied); 386 msg->msg_iov,len);
390 else { 387 else {
391 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); 388 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
392 if (err == -EINVAL) 389 if (err == -EINVAL)
393 goto csum_copy_err; 390 goto csum_copy_err;
394 } 391 }
395 if (unlikely(err)) { 392 if (err)
396 trace_kfree_skb(skb, udpv6_recvmsg);
397 if (!peeked) {
398 atomic_inc(&sk->sk_drops);
399 if (is_udp4)
400 UDP_INC_STATS_USER(sock_net(sk),
401 UDP_MIB_INERRORS,
402 is_udplite);
403 else
404 UDP6_INC_STATS_USER(sock_net(sk),
405 UDP_MIB_INERRORS,
406 is_udplite);
407 }
408 goto out_free; 393 goto out_free;
409 } 394
410 if (!peeked) { 395 if (!peeked) {
411 if (is_udp4) 396 if (is_udp4)
412 UDP_INC_STATS_USER(sock_net(sk), 397 UDP_INC_STATS_USER(sock_net(sk),
@@ -432,7 +417,8 @@ try_again:
432 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, 417 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
433 &sin6->sin6_addr); 418 &sin6->sin6_addr);
434 else { 419 else {
435 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 420 ipv6_addr_copy(&sin6->sin6_addr,
421 &ipv6_hdr(skb)->saddr);
436 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 422 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
437 sin6->sin6_scope_id = IP6CB(skb)->iif; 423 sin6->sin6_scope_id = IP6CB(skb)->iif;
438 } 424 }
@@ -446,7 +432,7 @@ try_again:
446 datagram_recv_ctl(sk, msg, skb); 432 datagram_recv_ctl(sk, msg, skb);
447 } 433 }
448 434
449 err = copied; 435 err = len;
450 if (flags & MSG_TRUNC) 436 if (flags & MSG_TRUNC)
451 err = ulen; 437 err = ulen;
452 438
@@ -492,11 +478,6 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
492 if (sk == NULL) 478 if (sk == NULL)
493 return; 479 return;
494 480
495 if (type == ICMPV6_PKT_TOOBIG)
496 ip6_sk_update_pmtu(skb, sk, info);
497 if (type == NDISC_REDIRECT)
498 ip6_sk_redirect(skb, sk);
499
500 np = inet6_sk(sk); 481 np = inet6_sk(sk);
501 482
502 if (!icmpv6_err_convert(type, code, &err) && !np->recverr) 483 if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
@@ -514,28 +495,6 @@ out:
514 sock_put(sk); 495 sock_put(sk);
515} 496}
516 497
517static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
518{
519 int rc;
520
521 if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
522 sock_rps_save_rxhash(sk, skb);
523
524 rc = sock_queue_rcv_skb(sk, skb);
525 if (rc < 0) {
526 int is_udplite = IS_UDPLITE(sk);
527
528 /* Note that an ENOMEM error is charged twice */
529 if (rc == -ENOMEM)
530 UDP6_INC_STATS_BH(sock_net(sk),
531 UDP_MIB_RCVBUFERRORS, is_udplite);
532 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
533 kfree_skb(skb);
534 return -1;
535 }
536 return 0;
537}
538
539static __inline__ void udpv6_err(struct sk_buff *skb, 498static __inline__ void udpv6_err(struct sk_buff *skb,
540 struct inet6_skb_parm *opt, u8 type, 499 struct inet6_skb_parm *opt, u8 type,
541 u8 code, int offset, __be32 info ) 500 u8 code, int offset, __be32 info )
@@ -543,54 +502,18 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
543 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); 502 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
544} 503}
545 504
546static struct static_key udpv6_encap_needed __read_mostly; 505int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
547void udpv6_encap_enable(void)
548{
549 if (!static_key_enabled(&udpv6_encap_needed))
550 static_key_slow_inc(&udpv6_encap_needed);
551}
552EXPORT_SYMBOL(udpv6_encap_enable);
553
554int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
555{ 506{
556 struct udp_sock *up = udp_sk(sk); 507 struct udp_sock *up = udp_sk(sk);
557 int rc; 508 int rc;
558 int is_udplite = IS_UDPLITE(sk); 509 int is_udplite = IS_UDPLITE(sk);
559 510
511 if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
512 sock_rps_save_rxhash(sk, skb->rxhash);
513
560 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 514 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
561 goto drop; 515 goto drop;
562 516
563 if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
564 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
565
566 /*
567 * This is an encapsulation socket so pass the skb to
568 * the socket's udp_encap_rcv() hook. Otherwise, just
569 * fall through and pass this up the UDP socket.
570 * up->encap_rcv() returns the following value:
571 * =0 if skb was successfully passed to the encap
572 * handler or was discarded by it.
573 * >0 if skb should be passed on to UDP.
574 * <0 if skb should be resubmitted as proto -N
575 */
576
577 /* if we're overly short, let UDP handle it */
578 encap_rcv = ACCESS_ONCE(up->encap_rcv);
579 if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
580 int ret;
581
582 ret = encap_rcv(sk, skb);
583 if (ret <= 0) {
584 UDP_INC_STATS_BH(sock_net(sk),
585 UDP_MIB_INDATAGRAMS,
586 is_udplite);
587 return -ret;
588 }
589 }
590
591 /* FALLTHROUGH -- it's a UDP Packet */
592 }
593
594 /* 517 /*
595 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). 518 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
596 */ 519 */
@@ -610,30 +533,24 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
610 } 533 }
611 } 534 }
612 535
613 if (rcu_access_pointer(sk->sk_filter)) { 536 if (rcu_dereference_raw(sk->sk_filter)) {
614 if (udp_lib_checksum_complete(skb)) 537 if (udp_lib_checksum_complete(skb))
615 goto drop; 538 goto drop;
616 } 539 }
617 540
618 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) 541 if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
619 goto drop; 542 /* Note that an ENOMEM error is charged twice */
620 543 if (rc == -ENOMEM)
621 skb_dst_drop(skb); 544 UDP6_INC_STATS_BH(sock_net(sk),
622 545 UDP_MIB_RCVBUFERRORS, is_udplite);
623 bh_lock_sock(sk); 546 goto drop_no_sk_drops_inc;
624 rc = 0;
625 if (!sock_owned_by_user(sk))
626 rc = __udpv6_queue_rcv_skb(sk, skb);
627 else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
628 bh_unlock_sock(sk);
629 goto drop;
630 } 547 }
631 bh_unlock_sock(sk);
632 548
633 return rc; 549 return 0;
634drop: 550drop:
635 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
636 atomic_inc(&sk->sk_drops); 551 atomic_inc(&sk->sk_drops);
552drop_no_sk_drops_inc:
553 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
637 kfree_skb(skb); 554 kfree_skb(skb);
638 return -1; 555 return -1;
639} 556}
@@ -682,27 +599,37 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
682static void flush_stack(struct sock **stack, unsigned int count, 599static void flush_stack(struct sock **stack, unsigned int count,
683 struct sk_buff *skb, unsigned int final) 600 struct sk_buff *skb, unsigned int final)
684{ 601{
685 struct sk_buff *skb1 = NULL;
686 struct sock *sk;
687 unsigned int i; 602 unsigned int i;
603 struct sock *sk;
604 struct sk_buff *skb1;
688 605
689 for (i = 0; i < count; i++) { 606 for (i = 0; i < count; i++) {
607 skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
608
690 sk = stack[i]; 609 sk = stack[i];
691 if (likely(skb1 == NULL)) 610 if (skb1) {
692 skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); 611 if (sk_rcvqueues_full(sk, skb1)) {
693 if (!skb1) { 612 kfree_skb(skb1);
694 atomic_inc(&sk->sk_drops); 613 goto drop;
695 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 614 }
696 IS_UDPLITE(sk)); 615 bh_lock_sock(sk);
697 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 616 if (!sock_owned_by_user(sk))
698 IS_UDPLITE(sk)); 617 udpv6_queue_rcv_skb(sk, skb1);
618 else if (sk_add_backlog(sk, skb1)) {
619 kfree_skb(skb1);
620 bh_unlock_sock(sk);
621 goto drop;
622 }
623 bh_unlock_sock(sk);
624 continue;
699 } 625 }
700 626drop:
701 if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) 627 atomic_inc(&sk->sk_drops);
702 skb1 = NULL; 628 UDP6_INC_STATS_BH(sock_net(sk),
629 UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
630 UDP6_INC_STATS_BH(sock_net(sk),
631 UDP_MIB_INERRORS, IS_UDPLITE(sk));
703 } 632 }
704 if (unlikely(skb1))
705 kfree_skb(skb1);
706} 633}
707/* 634/*
708 * Note: called only from the BH handler context, 635 * Note: called only from the BH handler context,
@@ -842,29 +769,39 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
842 * for sock caches... i'll skip this for now. 769 * for sock caches... i'll skip this for now.
843 */ 770 */
844 sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 771 sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
845 if (sk != NULL) {
846 int ret = udpv6_queue_rcv_skb(sk, skb);
847 sock_put(sk);
848 772
849 /* a return value > 0 means to resubmit the input, but 773 if (sk == NULL) {
850 * it wants the return to be -protocol, or 0 774 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
851 */ 775 goto discard;
852 if (ret > 0) 776
853 return -ret; 777 if (udp_lib_checksum_complete(skb))
778 goto discard;
779 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
780 proto == IPPROTO_UDPLITE);
854 781
782 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
783
784 kfree_skb(skb);
855 return 0; 785 return 0;
856 } 786 }
857 787
858 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 788 /* deliver */
859 goto discard;
860 789
861 if (udp_lib_checksum_complete(skb)) 790 if (sk_rcvqueues_full(sk, skb)) {
791 sock_put(sk);
862 goto discard; 792 goto discard;
863 793 }
864 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 794 bh_lock_sock(sk);
865 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 795 if (!sock_owned_by_user(sk))
866 796 udpv6_queue_rcv_skb(sk, skb);
867 kfree_skb(skb); 797 else if (sk_add_backlog(sk, skb)) {
798 atomic_inc(&sk->sk_drops);
799 bh_unlock_sock(sk);
800 sock_put(sk);
801 goto discard;
802 }
803 bh_unlock_sock(sk);
804 sock_put(sk);
868 return 0; 805 return 0;
869 806
870short_packet: 807short_packet:
@@ -1176,11 +1113,11 @@ do_udp_sendmsg:
1176 1113
1177 fl6.flowi6_proto = sk->sk_protocol; 1114 fl6.flowi6_proto = sk->sk_protocol;
1178 if (!ipv6_addr_any(daddr)) 1115 if (!ipv6_addr_any(daddr))
1179 fl6.daddr = *daddr; 1116 ipv6_addr_copy(&fl6.daddr, daddr);
1180 else 1117 else
1181 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ 1118 fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
1182 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) 1119 if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
1183 fl6.saddr = np->saddr; 1120 ipv6_addr_copy(&fl6.saddr, &np->saddr);
1184 fl6.fl6_sport = inet->inet_sport; 1121 fl6.fl6_sport = inet->inet_sport;
1185 1122
1186 final_p = fl6_update_dst(&fl6, opt, &final); 1123 final_p = fl6_update_dst(&fl6, opt, &final);
@@ -1190,8 +1127,7 @@ do_udp_sendmsg:
1190 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { 1127 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) {
1191 fl6.flowi6_oif = np->mcast_oif; 1128 fl6.flowi6_oif = np->mcast_oif;
1192 connected = 0; 1129 connected = 0;
1193 } else if (!fl6.flowi6_oif) 1130 }
1194 fl6.flowi6_oif = np->ucast_oif;
1195 1131
1196 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); 1132 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
1197 1133
@@ -1343,9 +1279,102 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
1343} 1279}
1344#endif 1280#endif
1345 1281
1282static int udp6_ufo_send_check(struct sk_buff *skb)
1283{
1284 const struct ipv6hdr *ipv6h;
1285 struct udphdr *uh;
1286
1287 if (!pskb_may_pull(skb, sizeof(*uh)))
1288 return -EINVAL;
1289
1290 ipv6h = ipv6_hdr(skb);
1291 uh = udp_hdr(skb);
1292
1293 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
1294 IPPROTO_UDP, 0);
1295 skb->csum_start = skb_transport_header(skb) - skb->head;
1296 skb->csum_offset = offsetof(struct udphdr, check);
1297 skb->ip_summed = CHECKSUM_PARTIAL;
1298 return 0;
1299}
1300
1301static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
1302{
1303 struct sk_buff *segs = ERR_PTR(-EINVAL);
1304 unsigned int mss;
1305 unsigned int unfrag_ip6hlen, unfrag_len;
1306 struct frag_hdr *fptr;
1307 u8 *mac_start, *prevhdr;
1308 u8 nexthdr;
1309 u8 frag_hdr_sz = sizeof(struct frag_hdr);
1310 int offset;
1311 __wsum csum;
1312
1313 mss = skb_shinfo(skb)->gso_size;
1314 if (unlikely(skb->len <= mss))
1315 goto out;
1316
1317 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
1318 /* Packet is from an untrusted source, reset gso_segs. */
1319 int type = skb_shinfo(skb)->gso_type;
1320
1321 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
1322 !(type & (SKB_GSO_UDP))))
1323 goto out;
1324
1325 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
1326
1327 segs = NULL;
1328 goto out;
1329 }
1330
1331 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
1332 * do checksum of UDP packets sent as multiple IP fragments.
1333 */
1334 offset = skb_checksum_start_offset(skb);
1335 csum = skb_checksum(skb, offset, skb->len- offset, 0);
1336 offset += skb->csum_offset;
1337 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1338 skb->ip_summed = CHECKSUM_NONE;
1339
1340 /* Check if there is enough headroom to insert fragment header. */
1341 if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
1342 pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
1343 goto out;
1344
1345 /* Find the unfragmentable header and shift it left by frag_hdr_sz
1346 * bytes to insert fragment header.
1347 */
1348 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
1349 nexthdr = *prevhdr;
1350 *prevhdr = NEXTHDR_FRAGMENT;
1351 unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
1352 unfrag_ip6hlen;
1353 mac_start = skb_mac_header(skb);
1354 memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
1355
1356 skb->mac_header -= frag_hdr_sz;
1357 skb->network_header -= frag_hdr_sz;
1358
1359 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
1360 fptr->nexthdr = nexthdr;
1361 fptr->reserved = 0;
1362 ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
1363
1364 /* Fragment the skb. ipv6 header and the remaining fields of the
1365 * fragment header are updated in ipv6_gso_segment()
1366 */
1367 segs = skb_segment(skb, features);
1368
1369out:
1370 return segs;
1371}
1372
1346static const struct inet6_protocol udpv6_protocol = { 1373static const struct inet6_protocol udpv6_protocol = {
1347 .handler = udpv6_rcv, 1374 .handler = udpv6_rcv,
1348 .err_handler = udpv6_err, 1375 .err_handler = udpv6_err,
1376 .gso_send_check = udp6_ufo_send_check,
1377 .gso_segment = udp6_ufo_fragment,
1349 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 1378 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1350}; 1379};
1351 1380
@@ -1375,8 +1404,7 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
1375 sk_wmem_alloc_get(sp), 1404 sk_wmem_alloc_get(sp),
1376 sk_rmem_alloc_get(sp), 1405 sk_rmem_alloc_get(sp),
1377 0, 0L, 0, 1406 0, 0L, 0,
1378 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 1407 sock_i_uid(sp), 0,
1379 0,
1380 sock_i_ino(sp), 1408 sock_i_ino(sp),
1381 atomic_read(&sp->sk_refcnt), sp, 1409 atomic_read(&sp->sk_refcnt), sp,
1382 atomic_read(&sp->sk_drops)); 1410 atomic_read(&sp->sk_drops));
@@ -1396,19 +1424,13 @@ int udp6_seq_show(struct seq_file *seq, void *v)
1396 return 0; 1424 return 0;
1397} 1425}
1398 1426
1399static const struct file_operations udp6_afinfo_seq_fops = {
1400 .owner = THIS_MODULE,
1401 .open = udp_seq_open,
1402 .read = seq_read,
1403 .llseek = seq_lseek,
1404 .release = seq_release_net
1405};
1406
1407static struct udp_seq_afinfo udp6_seq_afinfo = { 1427static struct udp_seq_afinfo udp6_seq_afinfo = {
1408 .name = "udp6", 1428 .name = "udp6",
1409 .family = AF_INET6, 1429 .family = AF_INET6,
1410 .udp_table = &udp_table, 1430 .udp_table = &udp_table,
1411 .seq_fops = &udp6_afinfo_seq_fops, 1431 .seq_fops = {
1432 .owner = THIS_MODULE,
1433 },
1412 .seq_ops = { 1434 .seq_ops = {
1413 .show = udp6_seq_show, 1435 .show = udp6_seq_show,
1414 }, 1436 },
@@ -1438,7 +1460,7 @@ struct proto udpv6_prot = {
1438 .getsockopt = udpv6_getsockopt, 1460 .getsockopt = udpv6_getsockopt,
1439 .sendmsg = udpv6_sendmsg, 1461 .sendmsg = udpv6_sendmsg,
1440 .recvmsg = udpv6_recvmsg, 1462 .recvmsg = udpv6_recvmsg,
1441 .backlog_rcv = __udpv6_queue_rcv_skb, 1463 .backlog_rcv = udpv6_queue_rcv_skb,
1442 .hash = udp_lib_hash, 1464 .hash = udp_lib_hash,
1443 .unhash = udp_lib_unhash, 1465 .unhash = udp_lib_unhash,
1444 .rehash = udp_v6_rehash, 1466 .rehash = udp_v6_rehash,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
deleted file mode 100644
index 0c8934a317c..00000000000
--- a/net/ipv6/udp_offload.c
+++ /dev/null
@@ -1,120 +0,0 @@
1/*
2 * IPV6 GSO/GRO offload support
3 * Linux INET6 implementation
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * UDPv6 GSO support
11 */
12#include <linux/skbuff.h>
13#include <net/protocol.h>
14#include <net/ipv6.h>
15#include <net/udp.h>
16#include <net/ip6_checksum.h>
17#include "ip6_offload.h"
18
19static int udp6_ufo_send_check(struct sk_buff *skb)
20{
21 const struct ipv6hdr *ipv6h;
22 struct udphdr *uh;
23
24 if (!pskb_may_pull(skb, sizeof(*uh)))
25 return -EINVAL;
26
27 ipv6h = ipv6_hdr(skb);
28 uh = udp_hdr(skb);
29
30 uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
31 IPPROTO_UDP, 0);
32 skb->csum_start = skb_transport_header(skb) - skb->head;
33 skb->csum_offset = offsetof(struct udphdr, check);
34 skb->ip_summed = CHECKSUM_PARTIAL;
35 return 0;
36}
37
38static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
39 netdev_features_t features)
40{
41 struct sk_buff *segs = ERR_PTR(-EINVAL);
42 unsigned int mss;
43 unsigned int unfrag_ip6hlen, unfrag_len;
44 struct frag_hdr *fptr;
45 u8 *mac_start, *prevhdr;
46 u8 nexthdr;
47 u8 frag_hdr_sz = sizeof(struct frag_hdr);
48 int offset;
49 __wsum csum;
50
51 mss = skb_shinfo(skb)->gso_size;
52 if (unlikely(skb->len <= mss))
53 goto out;
54
55 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
56 /* Packet is from an untrusted source, reset gso_segs. */
57 int type = skb_shinfo(skb)->gso_type;
58
59 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
60 !(type & (SKB_GSO_UDP))))
61 goto out;
62
63 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
64
65 segs = NULL;
66 goto out;
67 }
68
69 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
70 * do checksum of UDP packets sent as multiple IP fragments.
71 */
72 offset = skb_checksum_start_offset(skb);
73 csum = skb_checksum(skb, offset, skb->len - offset, 0);
74 offset += skb->csum_offset;
75 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
76 skb->ip_summed = CHECKSUM_NONE;
77
78 /* Check if there is enough headroom to insert fragment header. */
79 if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
80 pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
81 goto out;
82
83 /* Find the unfragmentable header and shift it left by frag_hdr_sz
84 * bytes to insert fragment header.
85 */
86 unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
87 nexthdr = *prevhdr;
88 *prevhdr = NEXTHDR_FRAGMENT;
89 unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
90 unfrag_ip6hlen;
91 mac_start = skb_mac_header(skb);
92 memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
93
94 skb->mac_header -= frag_hdr_sz;
95 skb->network_header -= frag_hdr_sz;
96
97 fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
98 fptr->nexthdr = nexthdr;
99 fptr->reserved = 0;
100 ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
101
102 /* Fragment the skb. ipv6 header and the remaining fields of the
103 * fragment header are updated in ipv6_gso_segment()
104 */
105 segs = skb_segment(skb, features);
106
107out:
108 return segs;
109}
110static const struct net_offload udpv6_offload = {
111 .callbacks = {
112 .gso_send_check = udp6_ufo_send_check,
113 .gso_segment = udp6_ufo_fragment,
114 },
115};
116
117int __init udp_offload_init(void)
118{
119 return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
120}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 1d08e21d9f6..986c4de5292 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -11,7 +11,6 @@
11 * as published by the Free Software Foundation; either version 11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version. 12 * 2 of the License, or (at your option) any later version.
13 */ 13 */
14#include <linux/export.h>
15#include "udp_impl.h" 14#include "udp_impl.h"
16 15
17static int udplitev6_rcv(struct sk_buff *skb) 16static int udplitev6_rcv(struct sk_buff *skb)
@@ -94,20 +93,13 @@ void udplitev6_exit(void)
94} 93}
95 94
96#ifdef CONFIG_PROC_FS 95#ifdef CONFIG_PROC_FS
97
98static const struct file_operations udplite6_afinfo_seq_fops = {
99 .owner = THIS_MODULE,
100 .open = udp_seq_open,
101 .read = seq_read,
102 .llseek = seq_lseek,
103 .release = seq_release_net
104};
105
106static struct udp_seq_afinfo udplite6_seq_afinfo = { 96static struct udp_seq_afinfo udplite6_seq_afinfo = {
107 .name = "udplite6", 97 .name = "udplite6",
108 .family = AF_INET6, 98 .family = AF_INET6,
109 .udp_table = &udplite_table, 99 .udp_table = &udplite_table,
110 .seq_fops = &udplite6_afinfo_seq_fops, 100 .seq_fops = {
101 .owner = THIS_MODULE,
102 },
111 .seq_ops = { 103 .seq_ops = {
112 .show = udp6_seq_show, 104 .show = udp6_seq_show,
113 }, 105 },
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 9949a356d62..3437d7d4eed 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -72,14 +72,15 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
72 top_iph->nexthdr = IPPROTO_BEETPH; 72 top_iph->nexthdr = IPPROTO_BEETPH;
73 } 73 }
74 74
75 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 75 ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
76 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 76 ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
77 return 0; 77 return 0;
78} 78}
79 79
80static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) 80static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
81{ 81{
82 struct ipv6hdr *ip6h; 82 struct ipv6hdr *ip6h;
83 const unsigned char *old_mac;
83 int size = sizeof(struct ipv6hdr); 84 int size = sizeof(struct ipv6hdr);
84 int err; 85 int err;
85 86
@@ -89,14 +90,17 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
89 90
90 __skb_push(skb, size); 91 __skb_push(skb, size);
91 skb_reset_network_header(skb); 92 skb_reset_network_header(skb);
92 skb_mac_header_rebuild(skb); 93
94 old_mac = skb_mac_header(skb);
95 skb_set_mac_header(skb, -skb->mac_len);
96 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
93 97
94 xfrm6_beet_make_header(skb); 98 xfrm6_beet_make_header(skb);
95 99
96 ip6h = ipv6_hdr(skb); 100 ip6h = ipv6_hdr(skb);
97 ip6h->payload_len = htons(skb->len - size); 101 ip6h->payload_len = htons(skb->len - size);
98 ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6; 102 ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
99 ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6; 103 ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
100 err = 0; 104 err = 0;
101out: 105out:
102 return err; 106 return err;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b19ad..4d6edff0498 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -55,14 +55,15 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
55 dsfield &= ~INET_ECN_MASK; 55 dsfield &= ~INET_ECN_MASK;
56 ipv6_change_dsfield(top_iph, 0, dsfield); 56 ipv6_change_dsfield(top_iph, 0, dsfield);
57 top_iph->hop_limit = ip6_dst_hoplimit(dst->child); 57 top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
58 top_iph->saddr = *(struct in6_addr *)&x->props.saddr; 58 ipv6_addr_copy(&top_iph->saddr, (const struct in6_addr *)&x->props.saddr);
59 top_iph->daddr = *(struct in6_addr *)&x->id.daddr; 59 ipv6_addr_copy(&top_iph->daddr, (const struct in6_addr *)&x->id.daddr);
60 return 0; 60 return 0;
61} 61}
62 62
63static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) 63static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
64{ 64{
65 int err = -EINVAL; 65 int err = -EINVAL;
66 const unsigned char *old_mac;
66 67
67 if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) 68 if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
68 goto out; 69 goto out;
@@ -79,9 +80,10 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
79 if (!(x->props.flags & XFRM_STATE_NOECN)) 80 if (!(x->props.flags & XFRM_STATE_NOECN))
80 ipip6_ecn_decapsulate(skb); 81 ipip6_ecn_decapsulate(skb);
81 82
83 old_mac = skb_mac_header(skb);
84 skb_set_mac_header(skb, -skb->mac_len);
85 memmove(skb_mac_header(skb), old_mac, skb->mac_len);
82 skb_reset_network_header(skb); 86 skb_reset_network_header(skb);
83 skb_mac_header_rebuild(skb);
84
85 err = 0; 87 err = 0;
86 88
87out: 89out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8755a3079d0..49a91c5f562 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -28,43 +28,6 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
28 28
29EXPORT_SYMBOL(xfrm6_find_1stfragopt); 29EXPORT_SYMBOL(xfrm6_find_1stfragopt);
30 30
31static int xfrm6_local_dontfrag(struct sk_buff *skb)
32{
33 int proto;
34 struct sock *sk = skb->sk;
35
36 if (sk) {
37 proto = sk->sk_protocol;
38
39 if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
40 return inet6_sk(sk)->dontfrag;
41 }
42
43 return 0;
44}
45
46static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
47{
48 struct flowi6 fl6;
49 struct sock *sk = skb->sk;
50
51 fl6.flowi6_oif = sk->sk_bound_dev_if;
52 fl6.daddr = ipv6_hdr(skb)->daddr;
53
54 ipv6_local_rxpmtu(sk, &fl6, mtu);
55}
56
57static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
58{
59 struct flowi6 fl6;
60 struct sock *sk = skb->sk;
61
62 fl6.fl6_dport = inet_sk(sk)->inet_dport;
63 fl6.daddr = ipv6_hdr(skb)->daddr;
64
65 ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
66}
67
68static int xfrm6_tunnel_check_size(struct sk_buff *skb) 31static int xfrm6_tunnel_check_size(struct sk_buff *skb)
69{ 32{
70 int mtu, ret = 0; 33 int mtu, ret = 0;
@@ -76,13 +39,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
76 39
77 if (!skb->local_df && skb->len > mtu) { 40 if (!skb->local_df && skb->len > mtu) {
78 skb->dev = dst->dev; 41 skb->dev = dst->dev;
79 42 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
80 if (xfrm6_local_dontfrag(skb))
81 xfrm6_local_rxpmtu(skb, mtu);
82 else if (skb->sk)
83 xfrm6_local_error(skb, mtu);
84 else
85 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
86 ret = -EMSGSIZE; 43 ret = -EMSGSIZE;
87 } 44 }
88 45
@@ -136,18 +93,9 @@ static int __xfrm6_output(struct sk_buff *skb)
136{ 93{
137 struct dst_entry *dst = skb_dst(skb); 94 struct dst_entry *dst = skb_dst(skb);
138 struct xfrm_state *x = dst->xfrm; 95 struct xfrm_state *x = dst->xfrm;
139 int mtu = ip6_skb_dst_mtu(skb);
140
141 if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
142 xfrm6_local_rxpmtu(skb, mtu);
143 return -EMSGSIZE;
144 } else if (!skb->local_df && skb->len > mtu && skb->sk) {
145 xfrm6_local_error(skb, mtu);
146 return -EMSGSIZE;
147 }
148 96
149 if (x->props.mode == XFRM_MODE_TUNNEL && 97 if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
150 ((skb->len > mtu && !skb_is_gso(skb)) || 98 ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 dst_allfrag(skb_dst(skb)))) { 99 dst_allfrag(skb_dst(skb)))) {
152 return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); 100 return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
153 } 101 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c9844135c9c..d879f7efbd1 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
20#include <net/ip.h> 20#include <net/ip.h>
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/ip6_route.h> 22#include <net/ip6_route.h>
23#if IS_ENABLED(CONFIG_IPV6_MIP6) 23#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
24#include <net/mip6.h> 24#include <net/mip6.h>
25#endif 25#endif
26 26
@@ -73,13 +73,6 @@ static int xfrm6_get_tos(const struct flowi *fl)
73 return 0; 73 return 0;
74} 74}
75 75
76static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
77{
78 struct rt6_info *rt = (struct rt6_info *)xdst;
79
80 rt6_init_peer(rt, net->ipv6.peers);
81}
82
83static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, 76static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
84 int nfheader_len) 77 int nfheader_len)
85{ 78{
@@ -106,11 +99,12 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
106 if (!xdst->u.rt6.rt6i_idev) 99 if (!xdst->u.rt6.rt6i_idev)
107 return -ENODEV; 100 return -ENODEV;
108 101
109 rt6_transfer_peer(&xdst->u.rt6, rt); 102 xdst->u.rt6.rt6i_peer = rt->rt6i_peer;
103 if (rt->rt6i_peer)
104 atomic_inc(&rt->rt6i_peer->refcnt);
110 105
111 /* Sheit... I remember I did this right. Apparently, 106 /* Sheit... I remember I did this right. Apparently,
112 * it was magically lost, so this code needs audit */ 107 * it was magically lost, so this code needs audit */
113 xdst->u.rt6.n = neigh_clone(rt->n);
114 xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | 108 xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
115 RTF_LOCAL); 109 RTF_LOCAL);
116 xdst->u.rt6.rt6i_metric = rt->rt6i_metric; 110 xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
@@ -138,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
138 memset(fl6, 0, sizeof(struct flowi6)); 132 memset(fl6, 0, sizeof(struct flowi6));
139 fl6->flowi6_mark = skb->mark; 133 fl6->flowi6_mark = skb->mark;
140 134
141 fl6->daddr = reverse ? hdr->saddr : hdr->daddr; 135 ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr);
142 fl6->saddr = reverse ? hdr->daddr : hdr->saddr; 136 ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr);
143 137
144 while (nh + offset + 1 < skb->data || 138 while (nh + offset + 1 < skb->data ||
145 pskb_may_pull(skb, nh + offset + 1 - skb->data)) { 139 pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
@@ -182,7 +176,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
182 fl6->flowi6_proto = nexthdr; 176 fl6->flowi6_proto = nexthdr;
183 return; 177 return;
184 178
185#if IS_ENABLED(CONFIG_IPV6_MIP6) 179#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
186 case IPPROTO_MH: 180 case IPPROTO_MH:
187 if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { 181 if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
188 struct ip6_mh *mh; 182 struct ip6_mh *mh;
@@ -214,22 +208,12 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
214 return dst_entries_get_fast(ops) > ops->gc_thresh * 2; 208 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
215} 209}
216 210
217static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, 211static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
218 struct sk_buff *skb, u32 mtu)
219{
220 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
221 struct dst_entry *path = xdst->route;
222
223 path->ops->update_pmtu(path, sk, skb, mtu);
224}
225
226static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
227 struct sk_buff *skb)
228{ 212{
229 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 213 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
230 struct dst_entry *path = xdst->route; 214 struct dst_entry *path = xdst->route;
231 215
232 path->ops->redirect(path, sk, skb); 216 path->ops->update_pmtu(path, mtu);
233} 217}
234 218
235static void xfrm6_dst_destroy(struct dst_entry *dst) 219static void xfrm6_dst_destroy(struct dst_entry *dst)
@@ -239,10 +223,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
239 if (likely(xdst->u.rt6.rt6i_idev)) 223 if (likely(xdst->u.rt6.rt6i_idev))
240 in6_dev_put(xdst->u.rt6.rt6i_idev); 224 in6_dev_put(xdst->u.rt6.rt6i_idev);
241 dst_destroy_metrics_generic(dst); 225 dst_destroy_metrics_generic(dst);
242 if (rt6_has_peer(&xdst->u.rt6)) { 226 if (likely(xdst->u.rt6.rt6i_peer))
243 struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); 227 inet_putpeer(xdst->u.rt6.rt6i_peer);
244 inet_putpeer(peer);
245 }
246 xfrm_dst_destroy(xdst); 228 xfrm_dst_destroy(xdst);
247} 229}
248 230
@@ -278,7 +260,6 @@ static struct dst_ops xfrm6_dst_ops = {
278 .protocol = cpu_to_be16(ETH_P_IPV6), 260 .protocol = cpu_to_be16(ETH_P_IPV6),
279 .gc = xfrm6_garbage_collect, 261 .gc = xfrm6_garbage_collect,
280 .update_pmtu = xfrm6_update_pmtu, 262 .update_pmtu = xfrm6_update_pmtu,
281 .redirect = xfrm6_redirect,
282 .cow_metrics = dst_cow_metrics_generic, 263 .cow_metrics = dst_cow_metrics_generic,
283 .destroy = xfrm6_dst_destroy, 264 .destroy = xfrm6_dst_destroy,
284 .ifdown = xfrm6_dst_ifdown, 265 .ifdown = xfrm6_dst_ifdown,
@@ -293,7 +274,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
293 .get_saddr = xfrm6_get_saddr, 274 .get_saddr = xfrm6_get_saddr,
294 .decode_session = _decode_session6, 275 .decode_session = _decode_session6,
295 .get_tos = xfrm6_get_tos, 276 .get_tos = xfrm6_get_tos,
296 .init_dst = xfrm6_init_dst,
297 .init_path = xfrm6_init_path, 277 .init_path = xfrm6_init_path,
298 .fill_dst = xfrm6_fill_dst, 278 .fill_dst = xfrm6_fill_dst,
299 .blackhole_route = ip6_blackhole_route, 279 .blackhole_route = ip6_blackhole_route,
@@ -327,7 +307,21 @@ static struct ctl_table_header *sysctl_hdr;
327int __init xfrm6_init(void) 307int __init xfrm6_init(void)
328{ 308{
329 int ret; 309 int ret;
330 310 unsigned int gc_thresh;
311
312 /*
313 * We need a good default value for the xfrm6 gc threshold.
314 * In ipv4 we set it to the route hash table size * 8, which
315 * is half the size of the maximaum route cache for ipv4. It
316 * would be good to do the same thing for v6, except the table is
317 * constructed differently here. Here each table for a net namespace
318 * can have FIB_TABLE_HASHSZ entries, so lets go with the same
319 * computation that we used for ipv4 here. Also, lets keep the initial
320 * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
321 * to that as a minimum as well
322 */
323 gc_thresh = FIB6_TABLE_HASHSZ * 8;
324 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
331 dst_entries_init(&xfrm6_dst_ops); 325 dst_entries_init(&xfrm6_dst_ops);
332 326
333 ret = xfrm6_policy_init(); 327 ret = xfrm6_policy_init();
@@ -340,8 +334,8 @@ int __init xfrm6_init(void)
340 goto out_policy; 334 goto out_policy;
341 335
342#ifdef CONFIG_SYSCTL 336#ifdef CONFIG_SYSCTL
343 sysctl_hdr = register_net_sysctl(&init_net, "net/ipv6", 337 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
344 xfrm6_policy_table); 338 xfrm6_policy_table);
345#endif 339#endif
346out: 340out:
347 return ret; 341 return ret;
@@ -356,6 +350,7 @@ void xfrm6_fini(void)
356 if (sysctl_hdr) 350 if (sysctl_hdr)
357 unregister_net_sysctl_table(sysctl_hdr); 351 unregister_net_sysctl_table(sysctl_hdr);
358#endif 352#endif
353 //xfrm6_input_fini();
359 xfrm6_policy_fini(); 354 xfrm6_policy_fini();
360 xfrm6_state_fini(); 355 xfrm6_state_fini();
361 dst_entries_destroy(&xfrm6_dst_ops); 356 dst_entries_destroy(&xfrm6_dst_ops);
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index d8c70b8efc2..248f0b2a7ee 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -15,7 +15,6 @@
15#include <linux/pfkeyv2.h> 15#include <linux/pfkeyv2.h>
16#include <linux/ipsec.h> 16#include <linux/ipsec.h>
17#include <linux/netfilter_ipv6.h> 17#include <linux/netfilter_ipv6.h>
18#include <linux/export.h>
19#include <net/dsfield.h> 18#include <net/dsfield.h>
20#include <net/ipv6.h> 19#include <net/ipv6.h>
21#include <net/addrconf.h> 20#include <net/addrconf.h>
@@ -27,8 +26,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
27 26
28 /* Initialize temporary selector matching only 27 /* Initialize temporary selector matching only
29 * to current session. */ 28 * to current session. */
30 *(struct in6_addr *)&sel->daddr = fl6->daddr; 29 ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr);
31 *(struct in6_addr *)&sel->saddr = fl6->saddr; 30 ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr);
32 sel->dport = xfrm_flowi_dport(fl, &fl6->uli); 31 sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
33 sel->dport_mask = htons(0xffff); 32 sel->dport_mask = htons(0xffff);
34 sel->sport = xfrm_flowi_sport(fl, &fl6->uli); 33 sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
@@ -101,7 +100,7 @@ static int __xfrm6_state_sort_cmp(void *p)
101 return 1; 100 return 1;
102 else 101 else
103 return 3; 102 return 3;
104#if IS_ENABLED(CONFIG_IPV6_MIP6) 103#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
105 case XFRM_MODE_ROUTEOPTIMIZATION: 104 case XFRM_MODE_ROUTEOPTIMIZATION:
106 case XFRM_MODE_IN_TRIGGER: 105 case XFRM_MODE_IN_TRIGGER:
107 return 2; 106 return 2;
@@ -134,7 +133,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p)
134 switch (v->mode) { 133 switch (v->mode) {
135 case XFRM_MODE_TRANSPORT: 134 case XFRM_MODE_TRANSPORT:
136 return 1; 135 return 1;
137#if IS_ENABLED(CONFIG_IPV6_MIP6) 136#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
138 case XFRM_MODE_ROUTEOPTIMIZATION: 137 case XFRM_MODE_ROUTEOPTIMIZATION:
139 case XFRM_MODE_IN_TRIGGER: 138 case XFRM_MODE_IN_TRIGGER:
140 return 2; 139 return 2;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ee5a7065aac..4fe1db12d2a 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -68,9 +68,9 @@ static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
68 68
69static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; 69static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
70 70
71static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr) 71static inline unsigned xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *addr)
72{ 72{
73 unsigned int h; 73 unsigned h;
74 74
75 h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]); 75 h = (__force u32)(addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3]);
76 h ^= h >> 16; 76 h ^= h >> 16;
@@ -80,7 +80,7 @@ static inline unsigned int xfrm6_tunnel_spi_hash_byaddr(const xfrm_address_t *ad
80 return h; 80 return h;
81} 81}
82 82
83static inline unsigned int xfrm6_tunnel_spi_hash_byspi(u32 spi) 83static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
84{ 84{
85 return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE; 85 return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
86} 86}