aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c102
-rw-r--r--net/ipv6/addrlabel.c26
-rw-r--r--net/ipv6/esp6.c6
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c23
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_flowlabel.c47
-rw-r--r--net/ipv6/ip6_gre.c1770
-rw-r--r--net/ipv6/ip6_output.c85
-rw-r--r--net/ipv6/ip6_tunnel.c91
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/mip6.c20
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/Kconfig37
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c135
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c165
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c321
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c137
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c218
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c24
-rw-r--r--net/ipv6/reassembly.c89
-rw-r--r--net/ipv6/route.c126
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c76
-rw-r--r--net/ipv6/udp.c14
-rw-r--r--net/ipv6/xfrm6_policy.c8
37 files changed, 3440 insertions, 549 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5728695b544..4f7fe7270e3 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -201,6 +201,22 @@ config IPV6_TUNNEL
201 201
202 If unsure, say N. 202 If unsure, say N.
203 203
204config IPV6_GRE
205 tristate "IPv6: GRE tunnel"
206 select IPV6_TUNNEL
207 ---help---
208 Tunneling means encapsulating data of one protocol type within
209 another protocol and sending it over a channel that understands the
210 encapsulating protocol. This particular tunneling driver implements
211 GRE (Generic Routing Encapsulation) and at this time allows
212 encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
213 This driver is useful if the other endpoint is a Cisco router: Cisco
214 likes GRE much better than the other Linux tunneling driver ("IP
215 tunneling" above). In addition, GRE allows multicast redistribution
216 through the tunnel.
217
218 Saying M here will produce a module called ip6_gre. If unsure, say N.
219
204config IPV6_MULTIPLE_TABLES 220config IPV6_MULTIPLE_TABLES
205 bool "IPv6: Multiple Routing Tables" 221 bool "IPv6: Multiple Routing Tables"
206 depends on EXPERIMENTAL 222 depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac..b6d3f79151e 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
36 36
37obj-$(CONFIG_IPV6_SIT) += sit.o 37obj-$(CONFIG_IPV6_SIT) += sit.o
38obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o 38obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
39obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
39 40
40obj-y += addrconf_core.o exthdrs_core.o 41obj-y += addrconf_core.o exthdrs_core.o
41 42
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 79181819a24..d7c56f8a5b4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
127#endif 127#endif
128 128
129#ifdef CONFIG_IPV6_PRIVACY 129#ifdef CONFIG_IPV6_PRIVACY
130static int __ipv6_regen_rndid(struct inet6_dev *idev); 130static void __ipv6_regen_rndid(struct inet6_dev *idev);
131static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 131static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
132static void ipv6_regen_rndid(unsigned long data); 132static void ipv6_regen_rndid(unsigned long data);
133#endif 133#endif
134 134
@@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
494 struct net_device *dev; 494 struct net_device *dev;
495 struct inet6_dev *idev; 495 struct inet6_dev *idev;
496 496
497 rcu_read_lock(); 497 for_each_netdev(net, dev) {
498 for_each_netdev_rcu(net, dev) {
499 idev = __in6_dev_get(dev); 498 idev = __in6_dev_get(dev);
500 if (idev) { 499 if (idev) {
501 int changed = (!idev->cnf.forwarding) ^ (!newf); 500 int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
504 dev_forward_change(idev); 503 dev_forward_change(idev);
505 } 504 }
506 } 505 }
507 rcu_read_unlock();
508} 506}
509 507
510static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) 508static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
@@ -790,10 +788,16 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
790 struct in6_addr prefix; 788 struct in6_addr prefix;
791 struct rt6_info *rt; 789 struct rt6_info *rt;
792 struct net *net = dev_net(ifp->idev->dev); 790 struct net *net = dev_net(ifp->idev->dev);
791 struct flowi6 fl6 = {};
792
793 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); 793 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
794 rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); 794 fl6.flowi6_oif = ifp->idev->dev->ifindex;
795 fl6.daddr = prefix;
796 rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
797 RT6_LOOKUP_F_IFACE);
795 798
796 if (rt && addrconf_is_prefix_route(rt)) { 799 if (rt != net->ipv6.ip6_null_entry &&
800 addrconf_is_prefix_route(rt)) {
797 if (onlink == 0) { 801 if (onlink == 0) {
798 ip6_del_rt(rt); 802 ip6_del_rt(rt);
799 rt = NULL; 803 rt = NULL;
@@ -854,16 +858,7 @@ retry:
854 } 858 }
855 in6_ifa_hold(ifp); 859 in6_ifa_hold(ifp);
856 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); 860 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
857 if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { 861 __ipv6_try_regen_rndid(idev, tmpaddr);
858 spin_unlock_bh(&ifp->lock);
859 write_unlock(&idev->lock);
860 pr_warn("%s: regeneration of randomized interface id failed\n",
861 __func__);
862 in6_ifa_put(ifp);
863 in6_dev_put(idev);
864 ret = -1;
865 goto out;
866 }
867 memcpy(&addr.s6_addr[8], idev->rndid, 8); 862 memcpy(&addr.s6_addr[8], idev->rndid, 8);
868 age = (now - ifp->tstamp) / HZ; 863 age = (now - ifp->tstamp) / HZ;
869 tmp_valid_lft = min_t(__u32, 864 tmp_valid_lft = min_t(__u32,
@@ -1081,8 +1076,10 @@ static int ipv6_get_saddr_eval(struct net *net,
1081 break; 1076 break;
1082 case IPV6_SADDR_RULE_PREFIX: 1077 case IPV6_SADDR_RULE_PREFIX:
1083 /* Rule 8: Use longest matching prefix */ 1078 /* Rule 8: Use longest matching prefix */
1084 score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, 1079 ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
1085 dst->addr); 1080 if (ret > score->ifa->prefix_len)
1081 ret = score->ifa->prefix_len;
1082 score->matchlen = ret;
1086 break; 1083 break;
1087 default: 1084 default:
1088 ret = 0; 1085 ret = 0;
@@ -1095,7 +1092,7 @@ out:
1095 return ret; 1092 return ret;
1096} 1093}
1097 1094
1098int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, 1095int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
1099 const struct in6_addr *daddr, unsigned int prefs, 1096 const struct in6_addr *daddr, unsigned int prefs,
1100 struct in6_addr *saddr) 1097 struct in6_addr *saddr)
1101{ 1098{
@@ -1602,7 +1599,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
1602 1599
1603#ifdef CONFIG_IPV6_PRIVACY 1600#ifdef CONFIG_IPV6_PRIVACY
1604/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ 1601/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
1605static int __ipv6_regen_rndid(struct inet6_dev *idev) 1602static void __ipv6_regen_rndid(struct inet6_dev *idev)
1606{ 1603{
1607regen: 1604regen:
1608 get_random_bytes(idev->rndid, sizeof(idev->rndid)); 1605 get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1629,8 +1626,6 @@ regen:
1629 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) 1626 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
1630 goto regen; 1627 goto regen;
1631 } 1628 }
1632
1633 return 0;
1634} 1629}
1635 1630
1636static void ipv6_regen_rndid(unsigned long data) 1631static void ipv6_regen_rndid(unsigned long data)
@@ -1644,8 +1639,7 @@ static void ipv6_regen_rndid(unsigned long data)
1644 if (idev->dead) 1639 if (idev->dead)
1645 goto out; 1640 goto out;
1646 1641
1647 if (__ipv6_regen_rndid(idev) < 0) 1642 __ipv6_regen_rndid(idev);
1648 goto out;
1649 1643
1650 expires = jiffies + 1644 expires = jiffies +
1651 idev->cnf.temp_prefered_lft * HZ - 1645 idev->cnf.temp_prefered_lft * HZ -
@@ -1666,13 +1660,10 @@ out:
1666 in6_dev_put(idev); 1660 in6_dev_put(idev);
1667} 1661}
1668 1662
1669static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) 1663static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
1670{ 1664{
1671 int ret = 0;
1672
1673 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) 1665 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
1674 ret = __ipv6_regen_rndid(idev); 1666 __ipv6_regen_rndid(idev);
1675 return ret;
1676} 1667}
1677#endif 1668#endif
1678 1669
@@ -1723,7 +1714,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
1723 if (table == NULL) 1714 if (table == NULL)
1724 return NULL; 1715 return NULL;
1725 1716
1726 write_lock_bh(&table->tb6_lock); 1717 read_lock_bh(&table->tb6_lock);
1727 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); 1718 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
1728 if (!fn) 1719 if (!fn)
1729 goto out; 1720 goto out;
@@ -1738,7 +1729,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
1738 break; 1729 break;
1739 } 1730 }
1740out: 1731out:
1741 write_unlock_bh(&table->tb6_lock); 1732 read_unlock_bh(&table->tb6_lock);
1742 return rt; 1733 return rt;
1743} 1734}
1744 1735
@@ -1778,14 +1769,6 @@ static void sit_route_add(struct net_device *dev)
1778} 1769}
1779#endif 1770#endif
1780 1771
1781static void addrconf_add_lroute(struct net_device *dev)
1782{
1783 struct in6_addr addr;
1784
1785 ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
1786 addrconf_prefix_route(&addr, 64, dev, 0, 0);
1787}
1788
1789static struct inet6_dev *addrconf_add_dev(struct net_device *dev) 1772static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1790{ 1773{
1791 struct inet6_dev *idev; 1774 struct inet6_dev *idev;
@@ -1803,8 +1786,6 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1803 if (!(dev->flags & IFF_LOOPBACK)) 1786 if (!(dev->flags & IFF_LOOPBACK))
1804 addrconf_add_mroute(dev); 1787 addrconf_add_mroute(dev);
1805 1788
1806 /* Add link local route */
1807 addrconf_add_lroute(dev);
1808 return idev; 1789 return idev;
1809} 1790}
1810 1791
@@ -2483,10 +2464,9 @@ static void addrconf_sit_config(struct net_device *dev)
2483 2464
2484 sit_add_v4_addrs(idev); 2465 sit_add_v4_addrs(idev);
2485 2466
2486 if (dev->flags&IFF_POINTOPOINT) { 2467 if (dev->flags&IFF_POINTOPOINT)
2487 addrconf_add_mroute(dev); 2468 addrconf_add_mroute(dev);
2488 addrconf_add_lroute(dev); 2469 else
2489 } else
2490 sit_route_add(dev); 2470 sit_route_add(dev);
2491} 2471}
2492#endif 2472#endif
@@ -3551,12 +3531,12 @@ static inline int inet6_ifaddr_msgsize(void)
3551} 3531}
3552 3532
3553static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 3533static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3554 u32 pid, u32 seq, int event, unsigned int flags) 3534 u32 portid, u32 seq, int event, unsigned int flags)
3555{ 3535{
3556 struct nlmsghdr *nlh; 3536 struct nlmsghdr *nlh;
3557 u32 preferred, valid; 3537 u32 preferred, valid;
3558 3538
3559 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); 3539 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
3560 if (nlh == NULL) 3540 if (nlh == NULL)
3561 return -EMSGSIZE; 3541 return -EMSGSIZE;
3562 3542
@@ -3594,7 +3574,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3594} 3574}
3595 3575
3596static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 3576static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3597 u32 pid, u32 seq, int event, u16 flags) 3577 u32 portid, u32 seq, int event, u16 flags)
3598{ 3578{
3599 struct nlmsghdr *nlh; 3579 struct nlmsghdr *nlh;
3600 u8 scope = RT_SCOPE_UNIVERSE; 3580 u8 scope = RT_SCOPE_UNIVERSE;
@@ -3603,7 +3583,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3603 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) 3583 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
3604 scope = RT_SCOPE_SITE; 3584 scope = RT_SCOPE_SITE;
3605 3585
3606 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); 3586 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
3607 if (nlh == NULL) 3587 if (nlh == NULL)
3608 return -EMSGSIZE; 3588 return -EMSGSIZE;
3609 3589
@@ -3619,7 +3599,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3619} 3599}
3620 3600
3621static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 3601static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3622 u32 pid, u32 seq, int event, unsigned int flags) 3602 u32 portid, u32 seq, int event, unsigned int flags)
3623{ 3603{
3624 struct nlmsghdr *nlh; 3604 struct nlmsghdr *nlh;
3625 u8 scope = RT_SCOPE_UNIVERSE; 3605 u8 scope = RT_SCOPE_UNIVERSE;
@@ -3628,7 +3608,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3628 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) 3608 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
3629 scope = RT_SCOPE_SITE; 3609 scope = RT_SCOPE_SITE;
3630 3610
3631 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); 3611 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
3632 if (nlh == NULL) 3612 if (nlh == NULL)
3633 return -EMSGSIZE; 3613 return -EMSGSIZE;
3634 3614
@@ -3669,7 +3649,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3669 if (++ip_idx < s_ip_idx) 3649 if (++ip_idx < s_ip_idx)
3670 continue; 3650 continue;
3671 err = inet6_fill_ifaddr(skb, ifa, 3651 err = inet6_fill_ifaddr(skb, ifa,
3672 NETLINK_CB(cb->skb).pid, 3652 NETLINK_CB(cb->skb).portid,
3673 cb->nlh->nlmsg_seq, 3653 cb->nlh->nlmsg_seq,
3674 RTM_NEWADDR, 3654 RTM_NEWADDR,
3675 NLM_F_MULTI); 3655 NLM_F_MULTI);
@@ -3685,7 +3665,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3685 if (ip_idx < s_ip_idx) 3665 if (ip_idx < s_ip_idx)
3686 continue; 3666 continue;
3687 err = inet6_fill_ifmcaddr(skb, ifmca, 3667 err = inet6_fill_ifmcaddr(skb, ifmca,
3688 NETLINK_CB(cb->skb).pid, 3668 NETLINK_CB(cb->skb).portid,
3689 cb->nlh->nlmsg_seq, 3669 cb->nlh->nlmsg_seq,
3690 RTM_GETMULTICAST, 3670 RTM_GETMULTICAST,
3691 NLM_F_MULTI); 3671 NLM_F_MULTI);
@@ -3700,7 +3680,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3700 if (ip_idx < s_ip_idx) 3680 if (ip_idx < s_ip_idx)
3701 continue; 3681 continue;
3702 err = inet6_fill_ifacaddr(skb, ifaca, 3682 err = inet6_fill_ifacaddr(skb, ifaca,
3703 NETLINK_CB(cb->skb).pid, 3683 NETLINK_CB(cb->skb).portid,
3704 cb->nlh->nlmsg_seq, 3684 cb->nlh->nlmsg_seq,
3705 RTM_GETANYCAST, 3685 RTM_GETANYCAST,
3706 NLM_F_MULTI); 3686 NLM_F_MULTI);
@@ -3822,7 +3802,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3822 goto errout_ifa; 3802 goto errout_ifa;
3823 } 3803 }
3824 3804
3825 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, 3805 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
3826 nlh->nlmsg_seq, RTM_NEWADDR, 0); 3806 nlh->nlmsg_seq, RTM_NEWADDR, 0);
3827 if (err < 0) { 3807 if (err < 0) {
3828 /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ 3808 /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3830,7 +3810,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3830 kfree_skb(skb); 3810 kfree_skb(skb);
3831 goto errout_ifa; 3811 goto errout_ifa;
3832 } 3812 }
3833 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 3813 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3834errout_ifa: 3814errout_ifa:
3835 in6_ifa_put(ifa); 3815 in6_ifa_put(ifa);
3836errout: 3816errout:
@@ -4032,14 +4012,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
4032} 4012}
4033 4013
4034static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 4014static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4035 u32 pid, u32 seq, int event, unsigned int flags) 4015 u32 portid, u32 seq, int event, unsigned int flags)
4036{ 4016{
4037 struct net_device *dev = idev->dev; 4017 struct net_device *dev = idev->dev;
4038 struct ifinfomsg *hdr; 4018 struct ifinfomsg *hdr;
4039 struct nlmsghdr *nlh; 4019 struct nlmsghdr *nlh;
4040 void *protoinfo; 4020 void *protoinfo;
4041 4021
4042 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); 4022 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
4043 if (nlh == NULL) 4023 if (nlh == NULL)
4044 return -EMSGSIZE; 4024 return -EMSGSIZE;
4045 4025
@@ -4097,7 +4077,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
4097 if (!idev) 4077 if (!idev)
4098 goto cont; 4078 goto cont;
4099 if (inet6_fill_ifinfo(skb, idev, 4079 if (inet6_fill_ifinfo(skb, idev,
4100 NETLINK_CB(cb->skb).pid, 4080 NETLINK_CB(cb->skb).portid,
4101 cb->nlh->nlmsg_seq, 4081 cb->nlh->nlmsg_seq,
4102 RTM_NEWLINK, NLM_F_MULTI) <= 0) 4082 RTM_NEWLINK, NLM_F_MULTI) <= 0)
4103 goto out; 4083 goto out;
@@ -4145,14 +4125,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
4145} 4125}
4146 4126
4147static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 4127static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
4148 struct prefix_info *pinfo, u32 pid, u32 seq, 4128 struct prefix_info *pinfo, u32 portid, u32 seq,
4149 int event, unsigned int flags) 4129 int event, unsigned int flags)
4150{ 4130{
4151 struct prefixmsg *pmsg; 4131 struct prefixmsg *pmsg;
4152 struct nlmsghdr *nlh; 4132 struct nlmsghdr *nlh;
4153 struct prefix_cacheinfo ci; 4133 struct prefix_cacheinfo ci;
4154 4134
4155 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); 4135 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
4156 if (nlh == NULL) 4136 if (nlh == NULL)
4157 return -EMSGSIZE; 4137 return -EMSGSIZE;
4158 4138
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index eb6a63632d3..ff76eecfd62 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
57} 57}
58 58
59/* 59/*
60 * Default policy table (RFC3484 + extensions) 60 * Default policy table (RFC6724 + extensions)
61 * 61 *
62 * prefix addr_type label 62 * prefix addr_type label
63 * ------------------------------------------------------------------------- 63 * -------------------------------------------------------------------------
@@ -69,13 +69,17 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
69 * fc00::/7 N/A 5 ULA (RFC 4193) 69 * fc00::/7 N/A 5 ULA (RFC 4193)
70 * 2001::/32 N/A 6 Teredo (RFC 4380) 70 * 2001::/32 N/A 6 Teredo (RFC 4380)
71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843)
72 * fec0::/10 N/A 11 Site-local
73 * (deprecated by RFC3879)
74 * 3ffe::/16 N/A 12 6bone
72 * 75 *
73 * Note: 0xffffffff is used if we do not have any policies. 76 * Note: 0xffffffff is used if we do not have any policies.
77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
74 */ 78 */
75 79
76#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 80#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
77 81
78static const __net_initdata struct ip6addrlbl_init_table 82static const __net_initconst struct ip6addrlbl_init_table
79{ 83{
80 const struct in6_addr *prefix; 84 const struct in6_addr *prefix;
81 int prefixlen; 85 int prefixlen;
@@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table
88 .prefix = &(struct in6_addr){{{ 0xfc }}}, 92 .prefix = &(struct in6_addr){{{ 0xfc }}},
89 .prefixlen = 7, 93 .prefixlen = 7,
90 .label = 5, 94 .label = 5,
95 },{ /* fec0::/10 */
96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
97 .prefixlen = 10,
98 .label = 11,
91 },{ /* 2002::/16 */ 99 },{ /* 2002::/16 */
92 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
93 .prefixlen = 16, 101 .prefixlen = 16,
94 .label = 2, 102 .label = 2,
103 },{ /* 3ffe::/16 */
104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
105 .prefixlen = 16,
106 .label = 12,
95 },{ /* 2001::/32 */ 107 },{ /* 2001::/32 */
96 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
97 .prefixlen = 32, 109 .prefixlen = 32,
@@ -470,10 +482,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
470static int ip6addrlbl_fill(struct sk_buff *skb, 482static int ip6addrlbl_fill(struct sk_buff *skb,
471 struct ip6addrlbl_entry *p, 483 struct ip6addrlbl_entry *p,
472 u32 lseq, 484 u32 lseq,
473 u32 pid, u32 seq, int event, 485 u32 portid, u32 seq, int event,
474 unsigned int flags) 486 unsigned int flags)
475{ 487{
476 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, 488 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
477 sizeof(struct ifaddrlblmsg), flags); 489 sizeof(struct ifaddrlblmsg), flags);
478 if (!nlh) 490 if (!nlh)
479 return -EMSGSIZE; 491 return -EMSGSIZE;
@@ -503,7 +515,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
503 net_eq(ip6addrlbl_net(p), net)) { 515 net_eq(ip6addrlbl_net(p), net)) {
504 if ((err = ip6addrlbl_fill(skb, p, 516 if ((err = ip6addrlbl_fill(skb, p,
505 ip6addrlbl_table.seq, 517 ip6addrlbl_table.seq,
506 NETLINK_CB(cb->skb).pid, 518 NETLINK_CB(cb->skb).portid,
507 cb->nlh->nlmsg_seq, 519 cb->nlh->nlmsg_seq,
508 RTM_NEWADDRLABEL, 520 RTM_NEWADDRLABEL,
509 NLM_F_MULTI)) <= 0) 521 NLM_F_MULTI)) <= 0)
@@ -574,7 +586,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
574 } 586 }
575 587
576 err = ip6addrlbl_fill(skb, p, lseq, 588 err = ip6addrlbl_fill(skb, p, lseq,
577 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 589 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
578 RTM_NEWADDRLABEL, 0); 590 RTM_NEWADDRLABEL, 0);
579 591
580 ip6addrlbl_put(p); 592 ip6addrlbl_put(p);
@@ -585,7 +597,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
585 goto out; 597 goto out;
586 } 598 }
587 599
588 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 600 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
589out: 601out:
590 return err; 602 return err;
591} 603}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6dc7fd353ef..282f3723ee1 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
167 struct esp_data *esp = x->data; 167 struct esp_data *esp = x->data;
168 168
169 /* skb is pure payload to encrypt */ 169 /* skb is pure payload to encrypt */
170 err = -ENOMEM;
171
172 aead = esp->aead; 170 aead = esp->aead;
173 alen = crypto_aead_authsize(aead); 171 alen = crypto_aead_authsize(aead);
174 172
@@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
203 } 201 }
204 202
205 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 203 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
206 if (!tmp) 204 if (!tmp) {
205 err = -ENOMEM;
207 goto error; 206 goto error;
207 }
208 208
209 seqhi = esp_tmp_seqhi(tmp); 209 seqhi = esp_tmp_seqhi(tmp);
210 iv = esp_tmp_iv(aead, tmp, seqhilen); 210 iv = esp_tmp_iv(aead, tmp, seqhilen);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 0ff1cfd55bc..d9fb9110f60 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
238 + nla_total_size(16); /* src */ 238 + nla_total_size(16); /* src */
239} 239}
240 240
241static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = { 241static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
242 .family = AF_INET6, 242 .family = AF_INET6,
243 .rule_size = sizeof(struct fib6_rule), 243 .rule_size = sizeof(struct fib6_rule),
244 .addr_size = sizeof(struct in6_addr), 244 .addr_size = sizeof(struct in6_addr),
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0251a6005be..c4f934176ca 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
175 const struct in6_addr *saddr) 175 const struct in6_addr *saddr)
176{ 176{
177 __ip6_dst_store(sk, dst, daddr, saddr); 177 __ip6_dst_store(sk, dst, daddr, saddr);
178
179#ifdef CONFIG_XFRM
180 {
181 struct rt6_info *rt = (struct rt6_info *)dst;
182 rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
183 }
184#endif
185} 178}
186 179
187static inline 180static inline
188struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) 181struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
189{ 182{
190 struct dst_entry *dst; 183 return __sk_dst_check(sk, cookie);
191
192 dst = __sk_dst_check(sk, cookie);
193
194#ifdef CONFIG_XFRM
195 if (dst) {
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
198 __sk_dst_reset(sk);
199 dst = NULL;
200 }
201 }
202#endif
203
204 return dst;
205} 184}
206 185
207static struct dst_entry *inet6_csk_route_socket(struct sock *sk, 186static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 13690d650c3..24995a93ef8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -514,7 +514,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
514 ln = node_alloc(); 514 ln = node_alloc();
515 515
516 if (!ln) 516 if (!ln)
517 return NULL; 517 return ERR_PTR(-ENOMEM);
518 ln->fn_bit = plen; 518 ln->fn_bit = plen;
519 519
520 ln->parent = pn; 520 ln->parent = pn;
@@ -561,7 +561,7 @@ insert_above:
561 node_free(in); 561 node_free(in);
562 if (ln) 562 if (ln)
563 node_free(ln); 563 node_free(ln);
564 return NULL; 564 return ERR_PTR(-ENOMEM);
565 } 565 }
566 566
567 /* 567 /*
@@ -611,7 +611,7 @@ insert_above:
611 ln = node_alloc(); 611 ln = node_alloc();
612 612
613 if (!ln) 613 if (!ln)
614 return NULL; 614 return ERR_PTR(-ENOMEM);
615 615
616 ln->fn_bit = plen; 616 ln->fn_bit = plen;
617 617
@@ -777,11 +777,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
777 777
778 if (IS_ERR(fn)) { 778 if (IS_ERR(fn)) {
779 err = PTR_ERR(fn); 779 err = PTR_ERR(fn);
780 fn = NULL;
781 }
782
783 if (!fn)
784 goto out; 780 goto out;
781 }
785 782
786 pn = fn; 783 pn = fn;
787 784
@@ -819,12 +816,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
819 offsetof(struct rt6_info, rt6i_src), 816 offsetof(struct rt6_info, rt6i_src),
820 allow_create, replace_required); 817 allow_create, replace_required);
821 818
822 if (!sn) { 819 if (IS_ERR(sn)) {
823 /* If it is failed, discard just allocated 820 /* If it is failed, discard just allocated
824 root, and then (in st_failure) stale node 821 root, and then (in st_failure) stale node
825 in main tree. 822 in main tree.
826 */ 823 */
827 node_free(sfn); 824 node_free(sfn);
825 err = PTR_ERR(sn);
828 goto st_failure; 826 goto st_failure;
829 } 827 }
830 828
@@ -839,10 +837,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
839 837
840 if (IS_ERR(sn)) { 838 if (IS_ERR(sn)) {
841 err = PTR_ERR(sn); 839 err = PTR_ERR(sn);
842 sn = NULL;
843 }
844 if (!sn)
845 goto st_failure; 840 goto st_failure;
841 }
846 } 842 }
847 843
848 if (!fn->leaf) { 844 if (!fn->leaf) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 9772fbd8a3f..90bbefb5794 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -22,6 +22,7 @@
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/export.h> 24#include <linux/export.h>
25#include <linux/pid_namespace.h>
25 26
26#include <net/net_namespace.h> 27#include <net/net_namespace.h>
27#include <net/sock.h> 28#include <net/sock.h>
@@ -91,6 +92,8 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
91static void fl_free(struct ip6_flowlabel *fl) 92static void fl_free(struct ip6_flowlabel *fl)
92{ 93{
93 if (fl) { 94 if (fl) {
95 if (fl->share == IPV6_FL_S_PROCESS)
96 put_pid(fl->owner.pid);
94 release_net(fl->fl_net); 97 release_net(fl->fl_net);
95 kfree(fl->opt); 98 kfree(fl->opt);
96 } 99 }
@@ -394,10 +397,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
394 case IPV6_FL_S_ANY: 397 case IPV6_FL_S_ANY:
395 break; 398 break;
396 case IPV6_FL_S_PROCESS: 399 case IPV6_FL_S_PROCESS:
397 fl->owner = current->pid; 400 fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
398 break; 401 break;
399 case IPV6_FL_S_USER: 402 case IPV6_FL_S_USER:
400 fl->owner = current_euid(); 403 fl->owner.uid = current_euid();
401 break; 404 break;
402 default: 405 default:
403 err = -EINVAL; 406 err = -EINVAL;
@@ -561,7 +564,10 @@ recheck:
561 err = -EPERM; 564 err = -EPERM;
562 if (fl1->share == IPV6_FL_S_EXCL || 565 if (fl1->share == IPV6_FL_S_EXCL ||
563 fl1->share != fl->share || 566 fl1->share != fl->share ||
564 fl1->owner != fl->owner) 567 ((fl1->share == IPV6_FL_S_PROCESS) &&
568 (fl1->owner.pid == fl->owner.pid)) ||
569 ((fl1->share == IPV6_FL_S_USER) &&
570 uid_eq(fl1->owner.uid, fl->owner.uid)))
565 goto release; 571 goto release;
566 572
567 err = -EINVAL; 573 err = -EINVAL;
@@ -621,6 +627,7 @@ done:
621 627
622struct ip6fl_iter_state { 628struct ip6fl_iter_state {
623 struct seq_net_private p; 629 struct seq_net_private p;
630 struct pid_namespace *pid_ns;
624 int bucket; 631 int bucket;
625}; 632};
626 633
@@ -699,6 +706,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
699 706
700static int ip6fl_seq_show(struct seq_file *seq, void *v) 707static int ip6fl_seq_show(struct seq_file *seq, void *v)
701{ 708{
709 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
702 if (v == SEQ_START_TOKEN) 710 if (v == SEQ_START_TOKEN)
703 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", 711 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
704 "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); 712 "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
@@ -708,7 +716,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
708 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", 716 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
709 (unsigned int)ntohl(fl->label), 717 (unsigned int)ntohl(fl->label),
710 fl->share, 718 fl->share,
711 (int)fl->owner, 719 ((fl->share == IPV6_FL_S_PROCESS) ?
720 pid_nr_ns(fl->owner.pid, state->pid_ns) :
721 ((fl->share == IPV6_FL_S_USER) ?
722 from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
723 0)),
712 atomic_read(&fl->users), 724 atomic_read(&fl->users),
713 fl->linger/HZ, 725 fl->linger/HZ,
714 (long)(fl->expires - jiffies)/HZ, 726 (long)(fl->expires - jiffies)/HZ,
@@ -727,8 +739,29 @@ static const struct seq_operations ip6fl_seq_ops = {
727 739
728static int ip6fl_seq_open(struct inode *inode, struct file *file) 740static int ip6fl_seq_open(struct inode *inode, struct file *file)
729{ 741{
730 return seq_open_net(inode, file, &ip6fl_seq_ops, 742 struct seq_file *seq;
731 sizeof(struct ip6fl_iter_state)); 743 struct ip6fl_iter_state *state;
744 int err;
745
746 err = seq_open_net(inode, file, &ip6fl_seq_ops,
747 sizeof(struct ip6fl_iter_state));
748
749 if (!err) {
750 seq = file->private_data;
751 state = ip6fl_seq_private(seq);
752 rcu_read_lock();
753 state->pid_ns = get_pid_ns(task_active_pid_ns(current));
754 rcu_read_unlock();
755 }
756 return err;
757}
758
759static int ip6fl_seq_release(struct inode *inode, struct file *file)
760{
761 struct seq_file *seq = file->private_data;
762 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
763 put_pid_ns(state->pid_ns);
764 return seq_release_net(inode, file);
732} 765}
733 766
734static const struct file_operations ip6fl_seq_fops = { 767static const struct file_operations ip6fl_seq_fops = {
@@ -736,7 +769,7 @@ static const struct file_operations ip6fl_seq_fops = {
736 .open = ip6fl_seq_open, 769 .open = ip6fl_seq_open,
737 .read = seq_read, 770 .read = seq_read,
738 .llseek = seq_lseek, 771 .llseek = seq_lseek,
739 .release = seq_release_net, 772 .release = ip6fl_seq_release,
740}; 773};
741 774
742static int __net_init ip6_flowlabel_proc_init(struct net *net) 775static int __net_init ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 00000000000..0185679c5f5
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1770 @@
1/*
2 * GRE over IPv6 protocol decoder.
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <linux/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35#include <linux/hash.h>
36#include <linux/if_tunnel.h>
37#include <linux/ip6_tunnel.h>
38
39#include <net/sock.h>
40#include <net/ip.h>
41#include <net/icmp.h>
42#include <net/protocol.h>
43#include <net/addrconf.h>
44#include <net/arp.h>
45#include <net/checksum.h>
46#include <net/dsfield.h>
47#include <net/inet_ecn.h>
48#include <net/xfrm.h>
49#include <net/net_namespace.h>
50#include <net/netns/generic.h>
51#include <net/rtnetlink.h>
52
53#include <net/ipv6.h>
54#include <net/ip6_fib.h>
55#include <net/ip6_route.h>
56#include <net/ip6_tunnel.h>
57
58
59static bool log_ecn_error = true;
60module_param(log_ecn_error, bool, 0644);
61MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
62
63#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
64#define IPV6_TCLASS_SHIFT 20
65
66#define HASH_SIZE_SHIFT 5
67#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
68
69static int ip6gre_net_id __read_mostly;
70struct ip6gre_net {
71 struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
72
73 struct net_device *fb_tunnel_dev;
74};
75
76static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
77static int ip6gre_tunnel_init(struct net_device *dev);
78static void ip6gre_tunnel_setup(struct net_device *dev);
79static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
80static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
81
82/* Tunnel hash table */
83
84/*
85 4 hash tables:
86
87 3: (remote,local)
88 2: (remote,*)
89 1: (*,local)
90 0: (*,*)
91
92 We require exact key match i.e. if a key is present in packet
93 it will match only tunnel with the same key; if it is not present,
94 it will match only keyless tunnel.
95
96 All keysless packets, if not matched configured keyless tunnels
97 will match fallback tunnel.
98 */
99
100#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
101static u32 HASH_ADDR(const struct in6_addr *addr)
102{
103 u32 hash = ipv6_addr_hash(addr);
104
105 return hash_32(hash, HASH_SIZE_SHIFT);
106}
107
108#define tunnels_r_l tunnels[3]
109#define tunnels_r tunnels[2]
110#define tunnels_l tunnels[1]
111#define tunnels_wc tunnels[0]
112/*
113 * Locking : hash tables are protected by RCU and RTNL
114 */
115
116#define for_each_ip_tunnel_rcu(start) \
117 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
118
119/* often modified stats are per cpu, other are shared (netdev->stats) */
120struct pcpu_tstats {
121 u64 rx_packets;
122 u64 rx_bytes;
123 u64 tx_packets;
124 u64 tx_bytes;
125 struct u64_stats_sync syncp;
126};
127
128static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
129 struct rtnl_link_stats64 *tot)
130{
131 int i;
132
133 for_each_possible_cpu(i) {
134 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
135 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
136 unsigned int start;
137
138 do {
139 start = u64_stats_fetch_begin_bh(&tstats->syncp);
140 rx_packets = tstats->rx_packets;
141 tx_packets = tstats->tx_packets;
142 rx_bytes = tstats->rx_bytes;
143 tx_bytes = tstats->tx_bytes;
144 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
145
146 tot->rx_packets += rx_packets;
147 tot->tx_packets += tx_packets;
148 tot->rx_bytes += rx_bytes;
149 tot->tx_bytes += tx_bytes;
150 }
151
152 tot->multicast = dev->stats.multicast;
153 tot->rx_crc_errors = dev->stats.rx_crc_errors;
154 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
155 tot->rx_length_errors = dev->stats.rx_length_errors;
156 tot->rx_frame_errors = dev->stats.rx_frame_errors;
157 tot->rx_errors = dev->stats.rx_errors;
158
159 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
160 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
161 tot->tx_dropped = dev->stats.tx_dropped;
162 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
163 tot->tx_errors = dev->stats.tx_errors;
164
165 return tot;
166}
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
170static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
171 const struct in6_addr *remote, const struct in6_addr *local,
172 __be32 key, __be16 gre_proto)
173{
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
176 unsigned int h0 = HASH_ADDR(remote);
177 unsigned int h1 = HASH_KEY(key);
178 struct ip6_tnl *t, *cand = NULL;
179 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IP6GRE;
182 int score, cand_score = 4;
183
184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185 if (!ipv6_addr_equal(local, &t->parms.laddr) ||
186 !ipv6_addr_equal(remote, &t->parms.raddr) ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IP6GRE &&
192 t->dev->type != dev_type)
193 continue;
194
195 score = 0;
196 if (t->parms.link != link)
197 score |= 1;
198 if (t->dev->type != dev_type)
199 score |= 2;
200 if (score == 0)
201 return t;
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
207 }
208
209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210 if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IP6GRE &&
216 t->dev->type != dev_type)
217 continue;
218
219 score = 0;
220 if (t->parms.link != link)
221 score |= 1;
222 if (t->dev->type != dev_type)
223 score |= 2;
224 if (score == 0)
225 return t;
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
231 }
232
233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234 if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
235 (!ipv6_addr_equal(local, &t->parms.raddr) ||
236 !ipv6_addr_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IP6GRE &&
242 t->dev->type != dev_type)
243 continue;
244
245 score = 0;
246 if (t->parms.link != link)
247 score |= 1;
248 if (t->dev->type != dev_type)
249 score |= 2;
250 if (score == 0)
251 return t;
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
257 }
258
259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IP6GRE &&
265 t->dev->type != dev_type)
266 continue;
267
268 score = 0;
269 if (t->parms.link != link)
270 score |= 1;
271 if (t->dev->type != dev_type)
272 score |= 2;
273 if (score == 0)
274 return t;
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
280 }
281
282 if (cand != NULL)
283 return cand;
284
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
288
289 return NULL;
290}
291
292static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
293 const struct __ip6_tnl_parm *p)
294{
295 const struct in6_addr *remote = &p->raddr;
296 const struct in6_addr *local = &p->laddr;
297 unsigned int h = HASH_KEY(p->i_key);
298 int prio = 0;
299
300 if (!ipv6_addr_any(local))
301 prio |= 1;
302 if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
303 prio |= 2;
304 h ^= HASH_ADDR(remote);
305 }
306
307 return &ign->tunnels[prio][h];
308}
309
310static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
311 const struct ip6_tnl *t)
312{
313 return __ip6gre_bucket(ign, &t->parms);
314}
315
316static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
317{
318 struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
319
320 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
321 rcu_assign_pointer(*tp, t);
322}
323
324static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
325{
326 struct ip6_tnl __rcu **tp;
327 struct ip6_tnl *iter;
328
329 for (tp = ip6gre_bucket(ign, t);
330 (iter = rtnl_dereference(*tp)) != NULL;
331 tp = &iter->next) {
332 if (t == iter) {
333 rcu_assign_pointer(*tp, t->next);
334 break;
335 }
336 }
337}
338
339static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
340 const struct __ip6_tnl_parm *parms,
341 int type)
342{
343 const struct in6_addr *remote = &parms->raddr;
344 const struct in6_addr *local = &parms->laddr;
345 __be32 key = parms->i_key;
346 int link = parms->link;
347 struct ip6_tnl *t;
348 struct ip6_tnl __rcu **tp;
349 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
350
351 for (tp = __ip6gre_bucket(ign, parms);
352 (t = rtnl_dereference(*tp)) != NULL;
353 tp = &t->next)
354 if (ipv6_addr_equal(local, &t->parms.laddr) &&
355 ipv6_addr_equal(remote, &t->parms.raddr) &&
356 key == t->parms.i_key &&
357 link == t->parms.link &&
358 type == t->dev->type)
359 break;
360
361 return t;
362}
363
364static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
365 const struct __ip6_tnl_parm *parms, int create)
366{
367 struct ip6_tnl *t, *nt;
368 struct net_device *dev;
369 char name[IFNAMSIZ];
370 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
371
372 t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
373 if (t || !create)
374 return t;
375
376 if (parms->name[0])
377 strlcpy(name, parms->name, IFNAMSIZ);
378 else
379 strcpy(name, "ip6gre%d");
380
381 dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
382 if (!dev)
383 return NULL;
384
385 dev_net_set(dev, net);
386
387 nt = netdev_priv(dev);
388 nt->parms = *parms;
389 dev->rtnl_link_ops = &ip6gre_link_ops;
390
391 nt->dev = dev;
392 ip6gre_tnl_link_config(nt, 1);
393
394 if (register_netdevice(dev) < 0)
395 goto failed_free;
396
397 /* Can use a lockless transmit, unless we generate output sequences */
398 if (!(nt->parms.o_flags & GRE_SEQ))
399 dev->features |= NETIF_F_LLTX;
400
401 dev_hold(dev);
402 ip6gre_tunnel_link(ign, nt);
403 return nt;
404
405failed_free:
406 free_netdev(dev);
407 return NULL;
408}
409
410static void ip6gre_tunnel_uninit(struct net_device *dev)
411{
412 struct net *net = dev_net(dev);
413 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
414
415 ip6gre_tunnel_unlink(ign, netdev_priv(dev));
416 dev_put(dev);
417}
418
419
420static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
421 u8 type, u8 code, int offset, __be32 info)
422{
423 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
424 __be16 *p = (__be16 *)(skb->data + offset);
425 int grehlen = offset + 4;
426 struct ip6_tnl *t;
427 __be16 flags;
428
429 flags = p[0];
430 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
431 if (flags&(GRE_VERSION|GRE_ROUTING))
432 return;
433 if (flags&GRE_KEY) {
434 grehlen += 4;
435 if (flags&GRE_CSUM)
436 grehlen += 4;
437 }
438 }
439
440 /* If only 8 bytes returned, keyed message will be dropped here */
441 if (!pskb_may_pull(skb, grehlen))
442 return;
443 ipv6h = (const struct ipv6hdr *)skb->data;
444 p = (__be16 *)(skb->data + offset);
445
446 t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
447 flags & GRE_KEY ?
448 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
449 p[1]);
450 if (t == NULL)
451 return;
452
453 switch (type) {
454 __u32 teli;
455 struct ipv6_tlv_tnl_enc_lim *tel;
456 __u32 mtu;
457 case ICMPV6_DEST_UNREACH:
458 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
459 t->parms.name);
460 break;
461 case ICMPV6_TIME_EXCEED:
462 if (code == ICMPV6_EXC_HOPLIMIT) {
463 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
464 t->parms.name);
465 }
466 break;
467 case ICMPV6_PARAMPROB:
468 teli = 0;
469 if (code == ICMPV6_HDR_FIELD)
470 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
471
472 if (teli && teli == info - 2) {
473 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
474 if (tel->encap_limit == 0) {
475 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
476 t->parms.name);
477 }
478 } else {
479 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
480 t->parms.name);
481 }
482 break;
483 case ICMPV6_PKT_TOOBIG:
484 mtu = info - offset;
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487 t->dev->mtu = mtu;
488 break;
489 }
490
491 if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
492 t->err_count++;
493 else
494 t->err_count = 1;
495 t->err_time = jiffies;
496}
497
498static int ip6gre_rcv(struct sk_buff *skb)
499{
500 const struct ipv6hdr *ipv6h;
501 u8 *h;
502 __be16 flags;
503 __sum16 csum = 0;
504 __be32 key = 0;
505 u32 seqno = 0;
506 struct ip6_tnl *tunnel;
507 int offset = 4;
508 __be16 gre_proto;
509 int err;
510
511 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
512 goto drop;
513
514 ipv6h = ipv6_hdr(skb);
515 h = skb->data;
516 flags = *(__be16 *)h;
517
518 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
519 /* - Version must be 0.
520 - We do not support routing headers.
521 */
522 if (flags&(GRE_VERSION|GRE_ROUTING))
523 goto drop;
524
525 if (flags&GRE_CSUM) {
526 switch (skb->ip_summed) {
527 case CHECKSUM_COMPLETE:
528 csum = csum_fold(skb->csum);
529 if (!csum)
530 break;
531 /* fall through */
532 case CHECKSUM_NONE:
533 skb->csum = 0;
534 csum = __skb_checksum_complete(skb);
535 skb->ip_summed = CHECKSUM_COMPLETE;
536 }
537 offset += 4;
538 }
539 if (flags&GRE_KEY) {
540 key = *(__be32 *)(h + offset);
541 offset += 4;
542 }
543 if (flags&GRE_SEQ) {
544 seqno = ntohl(*(__be32 *)(h + offset));
545 offset += 4;
546 }
547 }
548
549 gre_proto = *(__be16 *)(h + 2);
550
551 tunnel = ip6gre_tunnel_lookup(skb->dev,
552 &ipv6h->saddr, &ipv6h->daddr, key,
553 gre_proto);
554 if (tunnel) {
555 struct pcpu_tstats *tstats;
556
557 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
558 goto drop;
559
560 if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
561 tunnel->dev->stats.rx_dropped++;
562 goto drop;
563 }
564
565 secpath_reset(skb);
566
567 skb->protocol = gre_proto;
568 /* WCCP version 1 and 2 protocol decoding.
569 * - Change protocol to IP
570 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
571 */
572 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
573 skb->protocol = htons(ETH_P_IP);
574 if ((*(h + offset) & 0xF0) != 0x40)
575 offset += 4;
576 }
577
578 skb->mac_header = skb->network_header;
579 __pskb_pull(skb, offset);
580 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
581 skb->pkt_type = PACKET_HOST;
582
583 if (((flags&GRE_CSUM) && csum) ||
584 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
585 tunnel->dev->stats.rx_crc_errors++;
586 tunnel->dev->stats.rx_errors++;
587 goto drop;
588 }
589 if (tunnel->parms.i_flags&GRE_SEQ) {
590 if (!(flags&GRE_SEQ) ||
591 (tunnel->i_seqno &&
592 (s32)(seqno - tunnel->i_seqno) < 0)) {
593 tunnel->dev->stats.rx_fifo_errors++;
594 tunnel->dev->stats.rx_errors++;
595 goto drop;
596 }
597 tunnel->i_seqno = seqno + 1;
598 }
599
600 /* Warning: All skb pointers will be invalidated! */
601 if (tunnel->dev->type == ARPHRD_ETHER) {
602 if (!pskb_may_pull(skb, ETH_HLEN)) {
603 tunnel->dev->stats.rx_length_errors++;
604 tunnel->dev->stats.rx_errors++;
605 goto drop;
606 }
607
608 ipv6h = ipv6_hdr(skb);
609 skb->protocol = eth_type_trans(skb, tunnel->dev);
610 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
611 }
612
613 __skb_tunnel_rx(skb, tunnel->dev);
614
615 skb_reset_network_header(skb);
616
617 err = IP6_ECN_decapsulate(ipv6h, skb);
618 if (unlikely(err)) {
619 if (log_ecn_error)
620 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
621 &ipv6h->saddr,
622 ipv6_get_dsfield(ipv6h));
623 if (err > 1) {
624 ++tunnel->dev->stats.rx_frame_errors;
625 ++tunnel->dev->stats.rx_errors;
626 goto drop;
627 }
628 }
629
630 tstats = this_cpu_ptr(tunnel->dev->tstats);
631 u64_stats_update_begin(&tstats->syncp);
632 tstats->rx_packets++;
633 tstats->rx_bytes += skb->len;
634 u64_stats_update_end(&tstats->syncp);
635
636 netif_rx(skb);
637
638 return 0;
639 }
640 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
641
642drop:
643 kfree_skb(skb);
644 return 0;
645}
646
647struct ipv6_tel_txoption {
648 struct ipv6_txoptions ops;
649 __u8 dst_opt[8];
650};
651
652static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
653{
654 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
655
656 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
657 opt->dst_opt[3] = 1;
658 opt->dst_opt[4] = encap_limit;
659 opt->dst_opt[5] = IPV6_TLV_PADN;
660 opt->dst_opt[6] = 1;
661
662 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
663 opt->ops.opt_nflen = 8;
664}
665
666static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
667 struct net_device *dev,
668 __u8 dsfield,
669 struct flowi6 *fl6,
670 int encap_limit,
671 __u32 *pmtu)
672{
673 struct net *net = dev_net(dev);
674 struct ip6_tnl *tunnel = netdev_priv(dev);
675 struct net_device *tdev; /* Device to other host */
676 struct ipv6hdr *ipv6h; /* Our new IP header */
677 unsigned int max_headroom; /* The extra header space needed */
678 int gre_hlen;
679 struct ipv6_tel_txoption opt;
680 int mtu;
681 struct dst_entry *dst = NULL, *ndst = NULL;
682 struct net_device_stats *stats = &tunnel->dev->stats;
683 int err = -1;
684 u8 proto;
685 int pkt_len;
686 struct sk_buff *new_skb;
687
688 if (dev->type == ARPHRD_ETHER)
689 IPCB(skb)->flags = 0;
690
691 if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
692 gre_hlen = 0;
693 ipv6h = (struct ipv6hdr *)skb->data;
694 fl6->daddr = ipv6h->daddr;
695 } else {
696 gre_hlen = tunnel->hlen;
697 fl6->daddr = tunnel->parms.raddr;
698 }
699
700 if (!fl6->flowi6_mark)
701 dst = ip6_tnl_dst_check(tunnel);
702
703 if (!dst) {
704 ndst = ip6_route_output(net, NULL, fl6);
705
706 if (ndst->error)
707 goto tx_err_link_failure;
708 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
709 if (IS_ERR(ndst)) {
710 err = PTR_ERR(ndst);
711 ndst = NULL;
712 goto tx_err_link_failure;
713 }
714 dst = ndst;
715 }
716
717 tdev = dst->dev;
718
719 if (tdev == dev) {
720 stats->collisions++;
721 net_warn_ratelimited("%s: Local routing loop detected!\n",
722 tunnel->parms.name);
723 goto tx_err_dst_release;
724 }
725
726 mtu = dst_mtu(dst) - sizeof(*ipv6h);
727 if (encap_limit >= 0) {
728 max_headroom += 8;
729 mtu -= 8;
730 }
731 if (mtu < IPV6_MIN_MTU)
732 mtu = IPV6_MIN_MTU;
733 if (skb_dst(skb))
734 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
735 if (skb->len > mtu) {
736 *pmtu = mtu;
737 err = -EMSGSIZE;
738 goto tx_err_dst_release;
739 }
740
741 if (tunnel->err_count > 0) {
742 if (time_before(jiffies,
743 tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
744 tunnel->err_count--;
745
746 dst_link_failure(skb);
747 } else
748 tunnel->err_count = 0;
749 }
750
751 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
752
753 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
754 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
755 new_skb = skb_realloc_headroom(skb, max_headroom);
756 if (max_headroom > dev->needed_headroom)
757 dev->needed_headroom = max_headroom;
758 if (!new_skb)
759 goto tx_err_dst_release;
760
761 if (skb->sk)
762 skb_set_owner_w(new_skb, skb->sk);
763 consume_skb(skb);
764 skb = new_skb;
765 }
766
767 skb_dst_drop(skb);
768
769 if (fl6->flowi6_mark) {
770 skb_dst_set(skb, dst);
771 ndst = NULL;
772 } else {
773 skb_dst_set_noref(skb, dst);
774 }
775
776 skb->transport_header = skb->network_header;
777
778 proto = NEXTHDR_GRE;
779 if (encap_limit >= 0) {
780 init_tel_txopt(&opt, encap_limit);
781 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
782 }
783
784 skb_push(skb, gre_hlen);
785 skb_reset_network_header(skb);
786
787 /*
788 * Push down and install the IP header.
789 */
790 ipv6h = ipv6_hdr(skb);
791 *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
792 dsfield = INET_ECN_encapsulate(0, dsfield);
793 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
794 ipv6h->hop_limit = tunnel->parms.hop_limit;
795 ipv6h->nexthdr = proto;
796 ipv6h->saddr = fl6->saddr;
797 ipv6h->daddr = fl6->daddr;
798
799 ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
800 ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
801 htons(ETH_P_TEB) : skb->protocol;
802
803 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
804 __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
805
806 if (tunnel->parms.o_flags&GRE_SEQ) {
807 ++tunnel->o_seqno;
808 *ptr = htonl(tunnel->o_seqno);
809 ptr--;
810 }
811 if (tunnel->parms.o_flags&GRE_KEY) {
812 *ptr = tunnel->parms.o_key;
813 ptr--;
814 }
815 if (tunnel->parms.o_flags&GRE_CSUM) {
816 *ptr = 0;
817 *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
818 skb->len - sizeof(struct ipv6hdr));
819 }
820 }
821
822 nf_reset(skb);
823 pkt_len = skb->len;
824 err = ip6_local_out(skb);
825
826 if (net_xmit_eval(err) == 0) {
827 struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
828
829 tstats->tx_bytes += pkt_len;
830 tstats->tx_packets++;
831 } else {
832 stats->tx_errors++;
833 stats->tx_aborted_errors++;
834 }
835
836 if (ndst)
837 ip6_tnl_dst_store(tunnel, ndst);
838
839 return 0;
840tx_err_link_failure:
841 stats->tx_carrier_errors++;
842 dst_link_failure(skb);
843tx_err_dst_release:
844 dst_release(ndst);
845 return err;
846}
847
848static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
849{
850 struct ip6_tnl *t = netdev_priv(dev);
851 const struct iphdr *iph = ip_hdr(skb);
852 int encap_limit = -1;
853 struct flowi6 fl6;
854 __u8 dsfield;
855 __u32 mtu;
856 int err;
857
858 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
859 encap_limit = t->parms.encap_limit;
860
861 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
862 fl6.flowi6_proto = IPPROTO_IPIP;
863
864 dsfield = ipv4_get_dsfield(iph);
865
866 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
867 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
868 & IPV6_TCLASS_MASK;
869 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
870 fl6.flowi6_mark = skb->mark;
871
872 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
873 if (err != 0) {
874 /* XXX: send ICMP error even if DF is not set. */
875 if (err == -EMSGSIZE)
876 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
877 htonl(mtu));
878 return -1;
879 }
880
881 return 0;
882}
883
884static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
885{
886 struct ip6_tnl *t = netdev_priv(dev);
887 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
888 int encap_limit = -1;
889 __u16 offset;
890 struct flowi6 fl6;
891 __u8 dsfield;
892 __u32 mtu;
893 int err;
894
895 if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
896 return -1;
897
898 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
899 if (offset > 0) {
900 struct ipv6_tlv_tnl_enc_lim *tel;
901 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
902 if (tel->encap_limit == 0) {
903 icmpv6_send(skb, ICMPV6_PARAMPROB,
904 ICMPV6_HDR_FIELD, offset + 2);
905 return -1;
906 }
907 encap_limit = tel->encap_limit - 1;
908 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
909 encap_limit = t->parms.encap_limit;
910
911 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
912 fl6.flowi6_proto = IPPROTO_IPV6;
913
914 dsfield = ipv6_get_dsfield(ipv6h);
915 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
916 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
917 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
918 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
919 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
920 fl6.flowi6_mark = skb->mark;
921
922 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
923 if (err != 0) {
924 if (err == -EMSGSIZE)
925 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
926 return -1;
927 }
928
929 return 0;
930}
931
932/**
933 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
934 * @t: the outgoing tunnel device
935 * @hdr: IPv6 header from the incoming packet
936 *
937 * Description:
938 * Avoid trivial tunneling loop by checking that tunnel exit-point
939 * doesn't match source of incoming packet.
940 *
941 * Return:
942 * 1 if conflict,
943 * 0 else
944 **/
945
946static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
947 const struct ipv6hdr *hdr)
948{
949 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
950}
951
952static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
953{
954 struct ip6_tnl *t = netdev_priv(dev);
955 int encap_limit = -1;
956 struct flowi6 fl6;
957 __u32 mtu;
958 int err;
959
960 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
961 encap_limit = t->parms.encap_limit;
962
963 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
964 fl6.flowi6_proto = skb->protocol;
965
966 err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
967
968 return err;
969}
970
971static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
972 struct net_device *dev)
973{
974 struct ip6_tnl *t = netdev_priv(dev);
975 struct net_device_stats *stats = &t->dev->stats;
976 int ret;
977
978 if (!ip6_tnl_xmit_ctl(t))
979 return -1;
980
981 switch (skb->protocol) {
982 case htons(ETH_P_IP):
983 ret = ip6gre_xmit_ipv4(skb, dev);
984 break;
985 case htons(ETH_P_IPV6):
986 ret = ip6gre_xmit_ipv6(skb, dev);
987 break;
988 default:
989 ret = ip6gre_xmit_other(skb, dev);
990 break;
991 }
992
993 if (ret < 0)
994 goto tx_err;
995
996 return NETDEV_TX_OK;
997
998tx_err:
999 stats->tx_errors++;
1000 stats->tx_dropped++;
1001 kfree_skb(skb);
1002 return NETDEV_TX_OK;
1003}
1004
1005static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
1006{
1007 struct net_device *dev = t->dev;
1008 struct __ip6_tnl_parm *p = &t->parms;
1009 struct flowi6 *fl6 = &t->fl.u.ip6;
1010 int addend = sizeof(struct ipv6hdr) + 4;
1011
1012 if (dev->type != ARPHRD_ETHER) {
1013 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1014 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1015 }
1016
1017 /* Set up flowi template */
1018 fl6->saddr = p->laddr;
1019 fl6->daddr = p->raddr;
1020 fl6->flowi6_oif = p->link;
1021 fl6->flowlabel = 0;
1022
1023 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1024 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1025 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1026 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1027
1028 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1029 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1030
1031 if (p->flags&IP6_TNL_F_CAP_XMIT &&
1032 p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
1033 dev->flags |= IFF_POINTOPOINT;
1034 else
1035 dev->flags &= ~IFF_POINTOPOINT;
1036
1037 dev->iflink = p->link;
1038
1039 /* Precalculate GRE options length */
1040 if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1041 if (t->parms.o_flags&GRE_CSUM)
1042 addend += 4;
1043 if (t->parms.o_flags&GRE_KEY)
1044 addend += 4;
1045 if (t->parms.o_flags&GRE_SEQ)
1046 addend += 4;
1047 }
1048
1049 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1050 int strict = (ipv6_addr_type(&p->raddr) &
1051 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1052
1053 struct rt6_info *rt = rt6_lookup(dev_net(dev),
1054 &p->raddr, &p->laddr,
1055 p->link, strict);
1056
1057 if (rt == NULL)
1058 return;
1059
1060 if (rt->dst.dev) {
1061 dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
1062
1063 if (set_mtu) {
1064 dev->mtu = rt->dst.dev->mtu - addend;
1065 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1066 dev->mtu -= 8;
1067
1068 if (dev->mtu < IPV6_MIN_MTU)
1069 dev->mtu = IPV6_MIN_MTU;
1070 }
1071 }
1072 dst_release(&rt->dst);
1073 }
1074
1075 t->hlen = addend;
1076}
1077
1078static int ip6gre_tnl_change(struct ip6_tnl *t,
1079 const struct __ip6_tnl_parm *p, int set_mtu)
1080{
1081 t->parms.laddr = p->laddr;
1082 t->parms.raddr = p->raddr;
1083 t->parms.flags = p->flags;
1084 t->parms.hop_limit = p->hop_limit;
1085 t->parms.encap_limit = p->encap_limit;
1086 t->parms.flowinfo = p->flowinfo;
1087 t->parms.link = p->link;
1088 t->parms.proto = p->proto;
1089 t->parms.i_key = p->i_key;
1090 t->parms.o_key = p->o_key;
1091 t->parms.i_flags = p->i_flags;
1092 t->parms.o_flags = p->o_flags;
1093 ip6_tnl_dst_reset(t);
1094 ip6gre_tnl_link_config(t, set_mtu);
1095 return 0;
1096}
1097
1098static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
1099 const struct ip6_tnl_parm2 *u)
1100{
1101 p->laddr = u->laddr;
1102 p->raddr = u->raddr;
1103 p->flags = u->flags;
1104 p->hop_limit = u->hop_limit;
1105 p->encap_limit = u->encap_limit;
1106 p->flowinfo = u->flowinfo;
1107 p->link = u->link;
1108 p->i_key = u->i_key;
1109 p->o_key = u->o_key;
1110 p->i_flags = u->i_flags;
1111 p->o_flags = u->o_flags;
1112 memcpy(p->name, u->name, sizeof(u->name));
1113}
1114
1115static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
1116 const struct __ip6_tnl_parm *p)
1117{
1118 u->proto = IPPROTO_GRE;
1119 u->laddr = p->laddr;
1120 u->raddr = p->raddr;
1121 u->flags = p->flags;
1122 u->hop_limit = p->hop_limit;
1123 u->encap_limit = p->encap_limit;
1124 u->flowinfo = p->flowinfo;
1125 u->link = p->link;
1126 u->i_key = p->i_key;
1127 u->o_key = p->o_key;
1128 u->i_flags = p->i_flags;
1129 u->o_flags = p->o_flags;
1130 memcpy(u->name, p->name, sizeof(u->name));
1131}
1132
1133static int ip6gre_tunnel_ioctl(struct net_device *dev,
1134 struct ifreq *ifr, int cmd)
1135{
1136 int err = 0;
1137 struct ip6_tnl_parm2 p;
1138 struct __ip6_tnl_parm p1;
1139 struct ip6_tnl *t;
1140 struct net *net = dev_net(dev);
1141 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1142
1143 switch (cmd) {
1144 case SIOCGETTUNNEL:
1145 t = NULL;
1146 if (dev == ign->fb_tunnel_dev) {
1147 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1148 err = -EFAULT;
1149 break;
1150 }
1151 ip6gre_tnl_parm_from_user(&p1, &p);
1152 t = ip6gre_tunnel_locate(net, &p1, 0);
1153 }
1154 if (t == NULL)
1155 t = netdev_priv(dev);
1156 ip6gre_tnl_parm_to_user(&p, &t->parms);
1157 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1158 err = -EFAULT;
1159 break;
1160
1161 case SIOCADDTUNNEL:
1162 case SIOCCHGTUNNEL:
1163 err = -EPERM;
1164 if (!capable(CAP_NET_ADMIN))
1165 goto done;
1166
1167 err = -EFAULT;
1168 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1169 goto done;
1170
1171 err = -EINVAL;
1172 if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
1173 goto done;
1174
1175 if (!(p.i_flags&GRE_KEY))
1176 p.i_key = 0;
1177 if (!(p.o_flags&GRE_KEY))
1178 p.o_key = 0;
1179
1180 ip6gre_tnl_parm_from_user(&p1, &p);
1181 t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
1182
1183 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1184 if (t != NULL) {
1185 if (t->dev != dev) {
1186 err = -EEXIST;
1187 break;
1188 }
1189 } else {
1190 t = netdev_priv(dev);
1191
1192 ip6gre_tunnel_unlink(ign, t);
1193 synchronize_net();
1194 ip6gre_tnl_change(t, &p1, 1);
1195 ip6gre_tunnel_link(ign, t);
1196 netdev_state_change(dev);
1197 }
1198 }
1199
1200 if (t) {
1201 err = 0;
1202
1203 ip6gre_tnl_parm_to_user(&p, &t->parms);
1204 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1205 err = -EFAULT;
1206 } else
1207 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1208 break;
1209
1210 case SIOCDELTUNNEL:
1211 err = -EPERM;
1212 if (!capable(CAP_NET_ADMIN))
1213 goto done;
1214
1215 if (dev == ign->fb_tunnel_dev) {
1216 err = -EFAULT;
1217 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1218 goto done;
1219 err = -ENOENT;
1220 ip6gre_tnl_parm_from_user(&p1, &p);
1221 t = ip6gre_tunnel_locate(net, &p1, 0);
1222 if (t == NULL)
1223 goto done;
1224 err = -EPERM;
1225 if (t == netdev_priv(ign->fb_tunnel_dev))
1226 goto done;
1227 dev = t->dev;
1228 }
1229 unregister_netdevice(dev);
1230 err = 0;
1231 break;
1232
1233 default:
1234 err = -EINVAL;
1235 }
1236
1237done:
1238 return err;
1239}
1240
1241static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1242{
1243 struct ip6_tnl *tunnel = netdev_priv(dev);
1244 if (new_mtu < 68 ||
1245 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1246 return -EINVAL;
1247 dev->mtu = new_mtu;
1248 return 0;
1249}
1250
1251static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
1252 unsigned short type,
1253 const void *daddr, const void *saddr, unsigned int len)
1254{
1255 struct ip6_tnl *t = netdev_priv(dev);
1256 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
1257 __be16 *p = (__be16 *)(ipv6h+1);
1258
1259 *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
1260 ipv6h->hop_limit = t->parms.hop_limit;
1261 ipv6h->nexthdr = NEXTHDR_GRE;
1262 ipv6h->saddr = t->parms.laddr;
1263 ipv6h->daddr = t->parms.raddr;
1264
1265 p[0] = t->parms.o_flags;
1266 p[1] = htons(type);
1267
1268 /*
1269 * Set the source hardware address.
1270 */
1271
1272 if (saddr)
1273 memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
1274 if (daddr)
1275 memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
1276 if (!ipv6_addr_any(&ipv6h->daddr))
1277 return t->hlen;
1278
1279 return -t->hlen;
1280}
1281
1282static const struct header_ops ip6gre_header_ops = {
1283 .create = ip6gre_header,
1284};
1285
1286static const struct net_device_ops ip6gre_netdev_ops = {
1287 .ndo_init = ip6gre_tunnel_init,
1288 .ndo_uninit = ip6gre_tunnel_uninit,
1289 .ndo_start_xmit = ip6gre_tunnel_xmit,
1290 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1291 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1292 .ndo_get_stats64 = ip6gre_get_stats64,
1293};
1294
1295static void ip6gre_dev_free(struct net_device *dev)
1296{
1297 free_percpu(dev->tstats);
1298 free_netdev(dev);
1299}
1300
1301static void ip6gre_tunnel_setup(struct net_device *dev)
1302{
1303 struct ip6_tnl *t;
1304
1305 dev->netdev_ops = &ip6gre_netdev_ops;
1306 dev->destructor = ip6gre_dev_free;
1307
1308 dev->type = ARPHRD_IP6GRE;
1309 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
1310 dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
1311 t = netdev_priv(dev);
1312 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1313 dev->mtu -= 8;
1314 dev->flags |= IFF_NOARP;
1315 dev->iflink = 0;
1316 dev->addr_len = sizeof(struct in6_addr);
1317 dev->features |= NETIF_F_NETNS_LOCAL;
1318 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1319}
1320
1321static int ip6gre_tunnel_init(struct net_device *dev)
1322{
1323 struct ip6_tnl *tunnel;
1324
1325 tunnel = netdev_priv(dev);
1326
1327 tunnel->dev = dev;
1328 strcpy(tunnel->parms.name, dev->name);
1329
1330 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1331 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1332
1333 if (ipv6_addr_any(&tunnel->parms.raddr))
1334 dev->header_ops = &ip6gre_header_ops;
1335
1336 dev->tstats = alloc_percpu(struct pcpu_tstats);
1337 if (!dev->tstats)
1338 return -ENOMEM;
1339
1340 return 0;
1341}
1342
1343static void ip6gre_fb_tunnel_init(struct net_device *dev)
1344{
1345 struct ip6_tnl *tunnel = netdev_priv(dev);
1346
1347 tunnel->dev = dev;
1348 strcpy(tunnel->parms.name, dev->name);
1349
1350 tunnel->hlen = sizeof(struct ipv6hdr) + 4;
1351
1352 dev_hold(dev);
1353}
1354
1355
1356static struct inet6_protocol ip6gre_protocol __read_mostly = {
1357 .handler = ip6gre_rcv,
1358 .err_handler = ip6gre_err,
1359 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1360};
1361
1362static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
1363 struct list_head *head)
1364{
1365 int prio;
1366
1367 for (prio = 0; prio < 4; prio++) {
1368 int h;
1369 for (h = 0; h < HASH_SIZE; h++) {
1370 struct ip6_tnl *t;
1371
1372 t = rtnl_dereference(ign->tunnels[prio][h]);
1373
1374 while (t != NULL) {
1375 unregister_netdevice_queue(t->dev, head);
1376 t = rtnl_dereference(t->next);
1377 }
1378 }
1379 }
1380}
1381
1382static int __net_init ip6gre_init_net(struct net *net)
1383{
1384 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1385 int err;
1386
1387 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
1388 ip6gre_tunnel_setup);
1389 if (!ign->fb_tunnel_dev) {
1390 err = -ENOMEM;
1391 goto err_alloc_dev;
1392 }
1393 dev_net_set(ign->fb_tunnel_dev, net);
1394
1395 ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
1396 ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
1397
1398 err = register_netdev(ign->fb_tunnel_dev);
1399 if (err)
1400 goto err_reg_dev;
1401
1402 rcu_assign_pointer(ign->tunnels_wc[0],
1403 netdev_priv(ign->fb_tunnel_dev));
1404 return 0;
1405
1406err_reg_dev:
1407 ip6gre_dev_free(ign->fb_tunnel_dev);
1408err_alloc_dev:
1409 return err;
1410}
1411
1412static void __net_exit ip6gre_exit_net(struct net *net)
1413{
1414 struct ip6gre_net *ign;
1415 LIST_HEAD(list);
1416
1417 ign = net_generic(net, ip6gre_net_id);
1418 rtnl_lock();
1419 ip6gre_destroy_tunnels(ign, &list);
1420 unregister_netdevice_many(&list);
1421 rtnl_unlock();
1422}
1423
1424static struct pernet_operations ip6gre_net_ops = {
1425 .init = ip6gre_init_net,
1426 .exit = ip6gre_exit_net,
1427 .id = &ip6gre_net_id,
1428 .size = sizeof(struct ip6gre_net),
1429};
1430
1431static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1432{
1433 __be16 flags;
1434
1435 if (!data)
1436 return 0;
1437
1438 flags = 0;
1439 if (data[IFLA_GRE_IFLAGS])
1440 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1441 if (data[IFLA_GRE_OFLAGS])
1442 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1443 if (flags & (GRE_VERSION|GRE_ROUTING))
1444 return -EINVAL;
1445
1446 return 0;
1447}
1448
1449static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1450{
1451 struct in6_addr daddr;
1452
1453 if (tb[IFLA_ADDRESS]) {
1454 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1455 return -EINVAL;
1456 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1457 return -EADDRNOTAVAIL;
1458 }
1459
1460 if (!data)
1461 goto out;
1462
1463 if (data[IFLA_GRE_REMOTE]) {
1464 nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1465 if (ipv6_addr_any(&daddr))
1466 return -EINVAL;
1467 }
1468
1469out:
1470 return ip6gre_tunnel_validate(tb, data);
1471}
1472
1473
1474static void ip6gre_netlink_parms(struct nlattr *data[],
1475 struct __ip6_tnl_parm *parms)
1476{
1477 memset(parms, 0, sizeof(*parms));
1478
1479 if (!data)
1480 return;
1481
1482 if (data[IFLA_GRE_LINK])
1483 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1484
1485 if (data[IFLA_GRE_IFLAGS])
1486 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1487
1488 if (data[IFLA_GRE_OFLAGS])
1489 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1490
1491 if (data[IFLA_GRE_IKEY])
1492 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1493
1494 if (data[IFLA_GRE_OKEY])
1495 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1496
1497 if (data[IFLA_GRE_LOCAL])
1498 nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
1499
1500 if (data[IFLA_GRE_REMOTE])
1501 nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1502
1503 if (data[IFLA_GRE_TTL])
1504 parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
1505
1506 if (data[IFLA_GRE_ENCAP_LIMIT])
1507 parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
1508
1509 if (data[IFLA_GRE_FLOWINFO])
1510 parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
1511
1512 if (data[IFLA_GRE_FLAGS])
1513 parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
1514}
1515
1516static int ip6gre_tap_init(struct net_device *dev)
1517{
1518 struct ip6_tnl *tunnel;
1519
1520 tunnel = netdev_priv(dev);
1521
1522 tunnel->dev = dev;
1523 strcpy(tunnel->parms.name, dev->name);
1524
1525 ip6gre_tnl_link_config(tunnel, 1);
1526
1527 dev->tstats = alloc_percpu(struct pcpu_tstats);
1528 if (!dev->tstats)
1529 return -ENOMEM;
1530
1531 return 0;
1532}
1533
1534static const struct net_device_ops ip6gre_tap_netdev_ops = {
1535 .ndo_init = ip6gre_tap_init,
1536 .ndo_uninit = ip6gre_tunnel_uninit,
1537 .ndo_start_xmit = ip6gre_tunnel_xmit,
1538 .ndo_set_mac_address = eth_mac_addr,
1539 .ndo_validate_addr = eth_validate_addr,
1540 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1541 .ndo_get_stats64 = ip6gre_get_stats64,
1542};
1543
1544static void ip6gre_tap_setup(struct net_device *dev)
1545{
1546
1547 ether_setup(dev);
1548
1549 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1550 dev->destructor = ip6gre_dev_free;
1551
1552 dev->iflink = 0;
1553 dev->features |= NETIF_F_NETNS_LOCAL;
1554}
1555
1556static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1557 struct nlattr *tb[], struct nlattr *data[])
1558{
1559 struct ip6_tnl *nt;
1560 struct net *net = dev_net(dev);
1561 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1562 int err;
1563
1564 nt = netdev_priv(dev);
1565 ip6gre_netlink_parms(data, &nt->parms);
1566
1567 if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
1568 return -EEXIST;
1569
1570 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1571 eth_hw_addr_random(dev);
1572
1573 nt->dev = dev;
1574 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1575
1576 /* Can use a lockless transmit, unless we generate output sequences */
1577 if (!(nt->parms.o_flags & GRE_SEQ))
1578 dev->features |= NETIF_F_LLTX;
1579
1580 err = register_netdevice(dev);
1581 if (err)
1582 goto out;
1583
1584 dev_hold(dev);
1585 ip6gre_tunnel_link(ign, nt);
1586
1587out:
1588 return err;
1589}
1590
1591static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
1592 struct nlattr *data[])
1593{
1594 struct ip6_tnl *t, *nt;
1595 struct net *net = dev_net(dev);
1596 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1597 struct __ip6_tnl_parm p;
1598
1599 if (dev == ign->fb_tunnel_dev)
1600 return -EINVAL;
1601
1602 nt = netdev_priv(dev);
1603 ip6gre_netlink_parms(data, &p);
1604
1605 t = ip6gre_tunnel_locate(net, &p, 0);
1606
1607 if (t) {
1608 if (t->dev != dev)
1609 return -EEXIST;
1610 } else {
1611 t = nt;
1612
1613 ip6gre_tunnel_unlink(ign, t);
1614 ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
1615 ip6gre_tunnel_link(ign, t);
1616 netdev_state_change(dev);
1617 }
1618
1619 return 0;
1620}
1621
1622static size_t ip6gre_get_size(const struct net_device *dev)
1623{
1624 return
1625 /* IFLA_GRE_LINK */
1626 nla_total_size(4) +
1627 /* IFLA_GRE_IFLAGS */
1628 nla_total_size(2) +
1629 /* IFLA_GRE_OFLAGS */
1630 nla_total_size(2) +
1631 /* IFLA_GRE_IKEY */
1632 nla_total_size(4) +
1633 /* IFLA_GRE_OKEY */
1634 nla_total_size(4) +
1635 /* IFLA_GRE_LOCAL */
1636 nla_total_size(4) +
1637 /* IFLA_GRE_REMOTE */
1638 nla_total_size(4) +
1639 /* IFLA_GRE_TTL */
1640 nla_total_size(1) +
1641 /* IFLA_GRE_TOS */
1642 nla_total_size(1) +
1643 /* IFLA_GRE_ENCAP_LIMIT */
1644 nla_total_size(1) +
1645 /* IFLA_GRE_FLOWINFO */
1646 nla_total_size(4) +
1647 /* IFLA_GRE_FLAGS */
1648 nla_total_size(4) +
1649 0;
1650}
1651
1652static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1653{
1654 struct ip6_tnl *t = netdev_priv(dev);
1655 struct __ip6_tnl_parm *p = &t->parms;
1656
1657 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1658 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1659 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1660 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1661 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1662 nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
1663 nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
1664 nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
1665 /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
1666 nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
1667 nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
1668 nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
1669 goto nla_put_failure;
1670 return 0;
1671
1672nla_put_failure:
1673 return -EMSGSIZE;
1674}
1675
1676static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
1677 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1678 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1679 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1680 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1681 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1682 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
1683 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
1684 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1685 [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
1686 [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
1687 [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
1688};
1689
1690static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1691 .kind = "ip6gre",
1692 .maxtype = IFLA_GRE_MAX,
1693 .policy = ip6gre_policy,
1694 .priv_size = sizeof(struct ip6_tnl),
1695 .setup = ip6gre_tunnel_setup,
1696 .validate = ip6gre_tunnel_validate,
1697 .newlink = ip6gre_newlink,
1698 .changelink = ip6gre_changelink,
1699 .get_size = ip6gre_get_size,
1700 .fill_info = ip6gre_fill_info,
1701};
1702
1703static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
1704 .kind = "ip6gretap",
1705 .maxtype = IFLA_GRE_MAX,
1706 .policy = ip6gre_policy,
1707 .priv_size = sizeof(struct ip6_tnl),
1708 .setup = ip6gre_tap_setup,
1709 .validate = ip6gre_tap_validate,
1710 .newlink = ip6gre_newlink,
1711 .changelink = ip6gre_changelink,
1712 .get_size = ip6gre_get_size,
1713 .fill_info = ip6gre_fill_info,
1714};
1715
1716/*
1717 * And now the modules code and kernel interface.
1718 */
1719
1720static int __init ip6gre_init(void)
1721{
1722 int err;
1723
1724 pr_info("GRE over IPv6 tunneling driver\n");
1725
1726 err = register_pernet_device(&ip6gre_net_ops);
1727 if (err < 0)
1728 return err;
1729
1730 err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
1731 if (err < 0) {
1732 pr_info("%s: can't add protocol\n", __func__);
1733 goto add_proto_failed;
1734 }
1735
1736 err = rtnl_link_register(&ip6gre_link_ops);
1737 if (err < 0)
1738 goto rtnl_link_failed;
1739
1740 err = rtnl_link_register(&ip6gre_tap_ops);
1741 if (err < 0)
1742 goto tap_ops_failed;
1743
1744out:
1745 return err;
1746
1747tap_ops_failed:
1748 rtnl_link_unregister(&ip6gre_link_ops);
1749rtnl_link_failed:
1750 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1751add_proto_failed:
1752 unregister_pernet_device(&ip6gre_net_ops);
1753 goto out;
1754}
1755
1756static void __exit ip6gre_fini(void)
1757{
1758 rtnl_link_unregister(&ip6gre_tap_ops);
1759 rtnl_link_unregister(&ip6gre_link_ops);
1760 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1761 unregister_pernet_device(&ip6gre_net_ops);
1762}
1763
1764module_init(ip6gre_init);
1765module_exit(ip6gre_fini);
1766MODULE_LICENSE("GPL");
1767MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
1768MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
1769MODULE_ALIAS_RTNL_LINK("ip6gre");
1770MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5b2d63ed793..aece3e792f8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -123,16 +123,11 @@ static int ip6_finish_output2(struct sk_buff *skb)
123 skb->len); 123 skb->len);
124 } 124 }
125 125
126 rcu_read_lock();
127 rt = (struct rt6_info *) dst; 126 rt = (struct rt6_info *) dst;
128 neigh = rt->n; 127 neigh = rt->n;
129 if (neigh) { 128 if (neigh)
130 int res = dst_neigh_output(dst, neigh, skb); 129 return dst_neigh_output(dst, neigh, skb);
131 130
132 rcu_read_unlock();
133 return res;
134 }
135 rcu_read_unlock();
136 IP6_INC_STATS_BH(dev_net(dst->dev), 131 IP6_INC_STATS_BH(dev_net(dst->dev),
137 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
138 kfree_skb(skb); 133 kfree_skb(skb);
@@ -493,7 +488,8 @@ int ip6_forward(struct sk_buff *skb)
493 if (mtu < IPV6_MIN_MTU) 488 if (mtu < IPV6_MIN_MTU)
494 mtu = IPV6_MIN_MTU; 489 mtu = IPV6_MIN_MTU;
495 490
496 if (skb->len > mtu && !skb_is_gso(skb)) { 491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
497 /* Again, force OUTPUT device used as source address */ 493 /* Again, force OUTPUT device used as source address */
498 skb->dev = dst->dev; 494 skb->dev = dst->dev;
499 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -636,7 +632,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
636 /* We must not fragment if the socket is set to force MTU discovery 632 /* We must not fragment if the socket is set to force MTU discovery
637 * or if the skb it not generated by a local socket. 633 * or if the skb it not generated by a local socket.
638 */ 634 */
639 if (unlikely(!skb->local_df && skb->len > mtu)) { 635 if (unlikely(!skb->local_df && skb->len > mtu) ||
636 (IP6CB(skb)->frag_max_size &&
637 IP6CB(skb)->frag_max_size > mtu)) {
640 if (skb->sk && dst_allfrag(skb_dst(skb))) 638 if (skb->sk && dst_allfrag(skb_dst(skb)))
641 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 639 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
642 640
@@ -980,7 +978,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
980 * dst entry and replace it instead with the 978 * dst entry and replace it instead with the
981 * dst entry of the nexthop router 979 * dst entry of the nexthop router
982 */ 980 */
983 rcu_read_lock();
984 rt = (struct rt6_info *) *dst; 981 rt = (struct rt6_info *) *dst;
985 n = rt->n; 982 n = rt->n;
986 if (n && !(n->nud_state & NUD_VALID)) { 983 if (n && !(n->nud_state & NUD_VALID)) {
@@ -988,7 +985,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
988 struct flowi6 fl_gw6; 985 struct flowi6 fl_gw6;
989 int redirect; 986 int redirect;
990 987
991 rcu_read_unlock();
992 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 988 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
993 (*dst)->dev, 1); 989 (*dst)->dev, 1);
994 990
@@ -1008,8 +1004,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
1008 if ((err = (*dst)->error)) 1004 if ((err = (*dst)->error))
1009 goto out_err_release; 1005 goto out_err_release;
1010 } 1006 }
1011 } else {
1012 rcu_read_unlock();
1013 } 1007 }
1014#endif 1008#endif
1015 1009
@@ -1285,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1285 if (dst_allfrag(rt->dst.path)) 1279 if (dst_allfrag(rt->dst.path))
1286 cork->flags |= IPCORK_ALLFRAG; 1280 cork->flags |= IPCORK_ALLFRAG;
1287 cork->length = 0; 1281 cork->length = 0;
1288 sk->sk_sndmsg_page = NULL;
1289 sk->sk_sndmsg_off = 0;
1290 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 1282 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1291 length += exthdrlen; 1283 length += exthdrlen;
1292 transhdrlen += exthdrlen; 1284 transhdrlen += exthdrlen;
@@ -1510,48 +1502,31 @@ alloc_new_skb:
1510 } 1502 }
1511 } else { 1503 } else {
1512 int i = skb_shinfo(skb)->nr_frags; 1504 int i = skb_shinfo(skb)->nr_frags;
1513 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1505 struct page_frag *pfrag = sk_page_frag(sk);
1514 struct page *page = sk->sk_sndmsg_page;
1515 int off = sk->sk_sndmsg_off;
1516 unsigned int left;
1517
1518 if (page && (left = PAGE_SIZE - off) > 0) {
1519 if (copy >= left)
1520 copy = left;
1521 if (page != skb_frag_page(frag)) {
1522 if (i == MAX_SKB_FRAGS) {
1523 err = -EMSGSIZE;
1524 goto error;
1525 }
1526 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1527 skb_frag_ref(skb, i);
1528 frag = &skb_shinfo(skb)->frags[i];
1529 }
1530 } else if(i < MAX_SKB_FRAGS) {
1531 if (copy > PAGE_SIZE)
1532 copy = PAGE_SIZE;
1533 page = alloc_pages(sk->sk_allocation, 0);
1534 if (page == NULL) {
1535 err = -ENOMEM;
1536 goto error;
1537 }
1538 sk->sk_sndmsg_page = page;
1539 sk->sk_sndmsg_off = 0;
1540 1506
1541 skb_fill_page_desc(skb, i, page, 0, 0); 1507 err = -ENOMEM;
1542 frag = &skb_shinfo(skb)->frags[i]; 1508 if (!sk_page_frag_refill(sk, pfrag))
1543 } else {
1544 err = -EMSGSIZE;
1545 goto error; 1509 goto error;
1510
1511 if (!skb_can_coalesce(skb, i, pfrag->page,
1512 pfrag->offset)) {
1513 err = -EMSGSIZE;
1514 if (i == MAX_SKB_FRAGS)
1515 goto error;
1516
1517 __skb_fill_page_desc(skb, i, pfrag->page,
1518 pfrag->offset, 0);
1519 skb_shinfo(skb)->nr_frags = ++i;
1520 get_page(pfrag->page);
1546 } 1521 }
1522 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1547 if (getfrag(from, 1523 if (getfrag(from,
1548 skb_frag_address(frag) + skb_frag_size(frag), 1524 page_address(pfrag->page) + pfrag->offset,
1549 offset, copy, skb->len, skb) < 0) { 1525 offset, copy, skb->len, skb) < 0)
1550 err = -EFAULT; 1526 goto error_efault;
1551 goto error; 1527
1552 } 1528 pfrag->offset += copy;
1553 sk->sk_sndmsg_off += copy; 1529 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1554 skb_frag_size_add(frag, copy);
1555 skb->len += copy; 1530 skb->len += copy;
1556 skb->data_len += copy; 1531 skb->data_len += copy;
1557 skb->truesize += copy; 1532 skb->truesize += copy;
@@ -1560,7 +1535,11 @@ alloc_new_skb:
1560 offset += copy; 1535 offset += copy;
1561 length -= copy; 1536 length -= copy;
1562 } 1537 }
1538
1563 return 0; 1539 return 0;
1540
1541error_efault:
1542 err = -EFAULT;
1564error: 1543error:
1565 cork->length -= length; 1544 cork->length -= length;
1566 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1545 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9a1d5fe6aef..cb7e2ded6f0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
126 * Locking : hash tables are protected by RCU and RTNL 126 * Locking : hash tables are protected by RCU and RTNL
127 */ 127 */
128 128
129static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 129struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
130{ 130{
131 struct dst_entry *dst = t->dst_cache; 131 struct dst_entry *dst = t->dst_cache;
132 132
@@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
139 139
140 return dst; 140 return dst;
141} 141}
142EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
142 143
143static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) 144void ip6_tnl_dst_reset(struct ip6_tnl *t)
144{ 145{
145 dst_release(t->dst_cache); 146 dst_release(t->dst_cache);
146 t->dst_cache = NULL; 147 t->dst_cache = NULL;
147} 148}
149EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
148 150
149static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 151void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
150{ 152{
151 struct rt6_info *rt = (struct rt6_info *) dst; 153 struct rt6_info *rt = (struct rt6_info *) dst;
152 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 154 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
153 dst_release(t->dst_cache); 155 dst_release(t->dst_cache);
154 t->dst_cache = dst; 156 t->dst_cache = dst;
155} 157}
158EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
156 159
157/** 160/**
158 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 161 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
200 **/ 203 **/
201 204
202static struct ip6_tnl __rcu ** 205static struct ip6_tnl __rcu **
203ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) 206ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
204{ 207{
205 const struct in6_addr *remote = &p->raddr; 208 const struct in6_addr *remote = &p->raddr;
206 const struct in6_addr *local = &p->laddr; 209 const struct in6_addr *local = &p->laddr;
@@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev)
267 * created tunnel or NULL 270 * created tunnel or NULL
268 **/ 271 **/
269 272
270static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) 273static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
271{ 274{
272 struct net_device *dev; 275 struct net_device *dev;
273 struct ip6_tnl *t; 276 struct ip6_tnl *t;
@@ -322,7 +325,7 @@ failed:
322 **/ 325 **/
323 326
324static struct ip6_tnl *ip6_tnl_locate(struct net *net, 327static struct ip6_tnl *ip6_tnl_locate(struct net *net,
325 struct ip6_tnl_parm *p, int create) 328 struct __ip6_tnl_parm *p, int create)
326{ 329{
327 const struct in6_addr *remote = &p->raddr; 330 const struct in6_addr *remote = &p->raddr;
328 const struct in6_addr *local = &p->laddr; 331 const struct in6_addr *local = &p->laddr;
@@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
374 * else index to encapsulation limit 377 * else index to encapsulation limit
375 **/ 378 **/
376 379
377static __u16 380__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
378parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
379{ 381{
380 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; 382 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
381 __u8 nexthdr = ipv6h->nexthdr; 383 __u8 nexthdr = ipv6h->nexthdr;
@@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
425 } 427 }
426 return 0; 428 return 0;
427} 429}
430EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
428 431
429/** 432/**
430 * ip6_tnl_err - tunnel error handler 433 * ip6_tnl_err - tunnel error handler
@@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
480 case ICMPV6_PARAMPROB: 483 case ICMPV6_PARAMPROB:
481 teli = 0; 484 teli = 0;
482 if ((*code) == ICMPV6_HDR_FIELD) 485 if ((*code) == ICMPV6_HDR_FIELD)
483 teli = parse_tlv_tnl_enc_lim(skb, skb->data); 486 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
484 487
485 if (teli && teli == *info - 2) { 488 if (teli && teli == *info - 2) {
486 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 489 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
@@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
693 IP6_ECN_set_ce(ipv6_hdr(skb)); 696 IP6_ECN_set_ce(ipv6_hdr(skb));
694} 697}
695 698
696static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, 699__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
697 const struct in6_addr *laddr, 700 const struct in6_addr *laddr,
698 const struct in6_addr *raddr) 701 const struct in6_addr *raddr)
699{ 702{
700 struct ip6_tnl_parm *p = &t->parms; 703 struct __ip6_tnl_parm *p = &t->parms;
701 int ltype = ipv6_addr_type(laddr); 704 int ltype = ipv6_addr_type(laddr);
702 int rtype = ipv6_addr_type(raddr); 705 int rtype = ipv6_addr_type(raddr);
703 __u32 flags = 0; 706 __u32 flags = 0;
@@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
715 } 718 }
716 return flags; 719 return flags;
717} 720}
721EXPORT_SYMBOL(ip6_tnl_get_cap);
718 722
719/* called with rcu_read_lock() */ 723/* called with rcu_read_lock() */
720static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, 724int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
721 const struct in6_addr *laddr, 725 const struct in6_addr *laddr,
722 const struct in6_addr *raddr) 726 const struct in6_addr *raddr)
723{ 727{
724 struct ip6_tnl_parm *p = &t->parms; 728 struct __ip6_tnl_parm *p = &t->parms;
725 int ret = 0; 729 int ret = 0;
726 struct net *net = dev_net(t->dev); 730 struct net *net = dev_net(t->dev);
727 731
@@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
740 } 744 }
741 return ret; 745 return ret;
742} 746}
747EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
743 748
744/** 749/**
745 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 750 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
859 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 864 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
860} 865}
861 866
862static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) 867int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
863{ 868{
864 struct ip6_tnl_parm *p = &t->parms; 869 struct __ip6_tnl_parm *p = &t->parms;
865 int ret = 0; 870 int ret = 0;
866 struct net *net = dev_net(t->dev); 871 struct net *net = dev_net(t->dev);
867 872
@@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
885 } 890 }
886 return ret; 891 return ret;
887} 892}
893EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
894
888/** 895/**
889 * ip6_tnl_xmit2 - encapsulate packet and send 896 * ip6_tnl_xmit2 - encapsulate packet and send
890 * @skb: the outgoing socket buffer 897 * @skb: the outgoing socket buffer
@@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1085 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) 1092 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1086 return -1; 1093 return -1;
1087 1094
1088 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); 1095 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1089 if (offset > 0) { 1096 if (offset > 0) {
1090 struct ipv6_tlv_tnl_enc_lim *tel; 1097 struct ipv6_tlv_tnl_enc_lim *tel;
1091 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; 1098 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1152,7 +1159,7 @@ tx_err:
1152static void ip6_tnl_link_config(struct ip6_tnl *t) 1159static void ip6_tnl_link_config(struct ip6_tnl *t)
1153{ 1160{
1154 struct net_device *dev = t->dev; 1161 struct net_device *dev = t->dev;
1155 struct ip6_tnl_parm *p = &t->parms; 1162 struct __ip6_tnl_parm *p = &t->parms;
1156 struct flowi6 *fl6 = &t->fl.u.ip6; 1163 struct flowi6 *fl6 = &t->fl.u.ip6;
1157 1164
1158 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1165 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1215 **/ 1222 **/
1216 1223
1217static int 1224static int
1218ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) 1225ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1219{ 1226{
1220 t->parms.laddr = p->laddr; 1227 t->parms.laddr = p->laddr;
1221 t->parms.raddr = p->raddr; 1228 t->parms.raddr = p->raddr;
@@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
1230 return 0; 1237 return 0;
1231} 1238}
1232 1239
1240static void
1241ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1242{
1243 p->laddr = u->laddr;
1244 p->raddr = u->raddr;
1245 p->flags = u->flags;
1246 p->hop_limit = u->hop_limit;
1247 p->encap_limit = u->encap_limit;
1248 p->flowinfo = u->flowinfo;
1249 p->link = u->link;
1250 p->proto = u->proto;
1251 memcpy(p->name, u->name, sizeof(u->name));
1252}
1253
1254static void
1255ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1256{
1257 u->laddr = p->laddr;
1258 u->raddr = p->raddr;
1259 u->flags = p->flags;
1260 u->hop_limit = p->hop_limit;
1261 u->encap_limit = p->encap_limit;
1262 u->flowinfo = p->flowinfo;
1263 u->link = p->link;
1264 u->proto = p->proto;
1265 memcpy(u->name, p->name, sizeof(u->name));
1266}
1267
1233/** 1268/**
1234 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1269 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1235 * @dev: virtual device associated with tunnel 1270 * @dev: virtual device associated with tunnel
@@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1263{ 1298{
1264 int err = 0; 1299 int err = 0;
1265 struct ip6_tnl_parm p; 1300 struct ip6_tnl_parm p;
1301 struct __ip6_tnl_parm p1;
1266 struct ip6_tnl *t = NULL; 1302 struct ip6_tnl *t = NULL;
1267 struct net *net = dev_net(dev); 1303 struct net *net = dev_net(dev);
1268 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1304 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -1274,11 +1310,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1274 err = -EFAULT; 1310 err = -EFAULT;
1275 break; 1311 break;
1276 } 1312 }
1277 t = ip6_tnl_locate(net, &p, 0); 1313 ip6_tnl_parm_from_user(&p1, &p);
1314 t = ip6_tnl_locate(net, &p1, 0);
1315 } else {
1316 memset(&p, 0, sizeof(p));
1278 } 1317 }
1279 if (t == NULL) 1318 if (t == NULL)
1280 t = netdev_priv(dev); 1319 t = netdev_priv(dev);
1281 memcpy(&p, &t->parms, sizeof (p)); 1320 ip6_tnl_parm_to_user(&p, &t->parms);
1282 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 1321 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1283 err = -EFAULT; 1322 err = -EFAULT;
1284 } 1323 }
@@ -1295,7 +1334,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1295 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1334 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1296 p.proto != 0) 1335 p.proto != 0)
1297 break; 1336 break;
1298 t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); 1337 ip6_tnl_parm_from_user(&p1, &p);
1338 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1299 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1339 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1300 if (t != NULL) { 1340 if (t != NULL) {
1301 if (t->dev != dev) { 1341 if (t->dev != dev) {
@@ -1307,13 +1347,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1307 1347
1308 ip6_tnl_unlink(ip6n, t); 1348 ip6_tnl_unlink(ip6n, t);
1309 synchronize_net(); 1349 synchronize_net();
1310 err = ip6_tnl_change(t, &p); 1350 err = ip6_tnl_change(t, &p1);
1311 ip6_tnl_link(ip6n, t); 1351 ip6_tnl_link(ip6n, t);
1312 netdev_state_change(dev); 1352 netdev_state_change(dev);
1313 } 1353 }
1314 if (t) { 1354 if (t) {
1315 err = 0; 1355 err = 0;
1316 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) 1356 ip6_tnl_parm_to_user(&p, &t->parms);
1357 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1317 err = -EFAULT; 1358 err = -EFAULT;
1318 1359
1319 } else 1360 } else
@@ -1329,7 +1370,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1329 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1370 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1330 break; 1371 break;
1331 err = -ENOENT; 1372 err = -ENOENT;
1332 if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) 1373 ip6_tnl_parm_from_user(&p1, &p);
1374 t = ip6_tnl_locate(net, &p1, 0);
1375 if (t == NULL)
1333 break; 1376 break;
1334 err = -EPERM; 1377 err = -EPERM;
1335 if (t->dev == ip6n->fb_tnl_dev) 1378 if (t->dev == ip6n->fb_tnl_dev)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 4532973f0dd..f7c7c631972 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -205,7 +205,7 @@ static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
205 return 0; 205 return 0;
206} 206}
207 207
208static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { 208static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
209 .family = RTNL_FAMILY_IP6MR, 209 .family = RTNL_FAMILY_IP6MR,
210 .rule_size = sizeof(struct ip6mr_rule), 210 .rule_size = sizeof(struct ip6mr_rule),
211 .addr_size = sizeof(struct in6_addr), 211 .addr_size = sizeof(struct in6_addr),
@@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
838 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 838 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
839 skb_trim(skb, nlh->nlmsg_len); 839 skb_trim(skb, nlh->nlmsg_len);
840 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 840 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
841 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 841 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
842 } else 842 } else
843 kfree_skb(skb); 843 kfree_skb(skb);
844 } 844 }
@@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1052 skb_trim(skb, nlh->nlmsg_len); 1052 skb_trim(skb, nlh->nlmsg_len);
1053 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1053 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1054 } 1054 }
1055 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 1055 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1056 } else 1056 } else
1057 ip6_mr_forward(net, mrt, skb, c); 1057 ip6_mr_forward(net, mrt, skb, c);
1058 } 1058 }
@@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net,
2202} 2202}
2203 2203
2204static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2204static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2205 u32 pid, u32 seq, struct mfc6_cache *c) 2205 u32 portid, u32 seq, struct mfc6_cache *c)
2206{ 2206{
2207 struct nlmsghdr *nlh; 2207 struct nlmsghdr *nlh;
2208 struct rtmsg *rtm; 2208 struct rtmsg *rtm;
2209 2209
2210 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2210 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2211 if (nlh == NULL) 2211 if (nlh == NULL)
2212 return -EMSGSIZE; 2212 return -EMSGSIZE;
2213 2213
@@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2260 if (e < s_e) 2260 if (e < s_e)
2261 goto next_entry; 2261 goto next_entry;
2262 if (ip6mr_fill_mroute(mrt, skb, 2262 if (ip6mr_fill_mroute(mrt, skb,
2263 NETLINK_CB(cb->skb).pid, 2263 NETLINK_CB(cb->skb).portid,
2264 cb->nlh->nlmsg_seq, 2264 cb->nlh->nlmsg_seq,
2265 mfc) < 0) 2265 mfc) < 0)
2266 goto done; 2266 goto done;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 5b087c31d87..0f9bdc5ee9f 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -86,28 +86,30 @@ static int mip6_mh_len(int type)
86 86
87static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) 87static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
88{ 88{
89 struct ip6_mh *mh; 89 struct ip6_mh _hdr;
90 const struct ip6_mh *mh;
90 91
91 if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || 92 mh = skb_header_pointer(skb, skb_transport_offset(skb),
92 !pskb_may_pull(skb, (skb_transport_offset(skb) + 93 sizeof(_hdr), &_hdr);
93 ((skb_transport_header(skb)[1] + 1) << 3)))) 94 if (!mh)
94 return -1; 95 return -1;
95 96
96 mh = (struct ip6_mh *)skb_transport_header(skb); 97 if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len)
98 return -1;
97 99
98 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { 100 if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
99 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", 101 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
100 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); 102 mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
101 mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - 103 mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
102 skb_network_header(skb))); 104 skb_network_header_len(skb));
103 return -1; 105 return -1;
104 } 106 }
105 107
106 if (mh->ip6mh_proto != IPPROTO_NONE) { 108 if (mh->ip6mh_proto != IPPROTO_NONE) {
107 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", 109 LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
108 mh->ip6mh_proto); 110 mh->ip6mh_proto);
109 mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - 111 mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
110 skb_network_header(skb))); 112 skb_network_header_len(skb));
111 return -1; 113 return -1;
112 } 114 }
113 115
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index db31561cc8d..429089cb073 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -15,6 +15,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
15{ 15{
16 struct net *net = dev_net(skb_dst(skb)->dev); 16 struct net *net = dev_net(skb_dst(skb)->dev);
17 const struct ipv6hdr *iph = ipv6_hdr(skb); 17 const struct ipv6hdr *iph = ipv6_hdr(skb);
18 unsigned int hh_len;
18 struct dst_entry *dst; 19 struct dst_entry *dst;
19 struct flowi6 fl6 = { 20 struct flowi6 fl6 = {
20 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 21 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -47,6 +48,13 @@ int ip6_route_me_harder(struct sk_buff *skb)
47 } 48 }
48#endif 49#endif
49 50
51 /* Change in oif may mean change in hh_len. */
52 hh_len = skb_dst(skb)->dev->hard_header_len;
53 if (skb_headroom(skb) < hh_len &&
54 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
55 0, GFP_ATOMIC))
56 return -1;
57
50 return 0; 58 return 0;
51} 59}
52EXPORT_SYMBOL(ip6_route_me_harder); 60EXPORT_SYMBOL(ip6_route_me_harder);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 10135342799..c72532a60d8 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -181,9 +181,44 @@ config IP6_NF_SECURITY
181 help 181 help
182 This option adds a `security' table to iptables, for use 182 This option adds a `security' table to iptables, for use
183 with Mandatory Access Control (MAC) policy. 183 with Mandatory Access Control (MAC) policy.
184 184
185 If unsure, say N. 185 If unsure, say N.
186 186
187config NF_NAT_IPV6
188 tristate "IPv6 NAT"
189 depends on NF_CONNTRACK_IPV6
190 depends on NETFILTER_ADVANCED
191 select NF_NAT
192 help
193 The IPv6 NAT option allows masquerading, port forwarding and other
194 forms of full Network Address Port Translation. It is controlled by
195 the `nat' table in ip6tables, see the man page for ip6tables(8).
196
197 To compile it as a module, choose M here. If unsure, say N.
198
199if NF_NAT_IPV6
200
201config IP6_NF_TARGET_MASQUERADE
202 tristate "MASQUERADE target support"
203 help
204 Masquerading is a special case of NAT: all outgoing connections are
205 changed to seem to come from a particular interface's address, and
206 if the interface goes down, those connections are lost. This is
207 only useful for dialup accounts with dynamic IP address (ie. your IP
208 address will be different on next dialup).
209
210 To compile it as a module, choose M here. If unsure, say N.
211
212config IP6_NF_TARGET_NPT
213 tristate "NPT (Network Prefix translation) target support"
214 help
215 This option adds the `SNPT' and `DNPT' target, which perform
216 stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
217
218 To compile it as a module, choose M here. If unsure, say N.
219
220endif # NF_NAT_IPV6
221
187endif # IP6_NF_IPTABLES 222endif # IP6_NF_IPTABLES
188 223
189endmenu 224endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 534d3f216f7..2d11fcc2cf3 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o 8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
9obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 9obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
10obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 10obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
11obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
11 12
12# objects for l3 independent conntrack 13# objects for l3 independent conntrack
13nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15# l3 independent conntrack 16# l3 independent conntrack
16obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
17 18
19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
21
18# defrag 22# defrag
19nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o 23nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
20obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o 24obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -30,4 +34,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
30obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 34obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
31 35
32# targets 36# targets
37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
33obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 00000000000..60e9053bab0
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,135 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
9 * NAT funded by Astaro.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/netdevice.h>
15#include <linux/ipv6.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv6.h>
18#include <linux/netfilter/x_tables.h>
19#include <net/netfilter/nf_nat.h>
20#include <net/addrconf.h>
21#include <net/ipv6.h>
22
23static unsigned int
24masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
25{
26 const struct nf_nat_range *range = par->targinfo;
27 enum ip_conntrack_info ctinfo;
28 struct in6_addr src;
29 struct nf_conn *ct;
30 struct nf_nat_range newrange;
31
32 ct = nf_ct_get(skb, &ctinfo);
33 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
34 ctinfo == IP_CT_RELATED_REPLY));
35
36 if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
37 &ipv6_hdr(skb)->daddr, 0, &src) < 0)
38 return NF_DROP;
39
40 nfct_nat(ct)->masq_index = par->out->ifindex;
41
42 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
43 newrange.min_addr.in6 = src;
44 newrange.max_addr.in6 = src;
45 newrange.min_proto = range->min_proto;
46 newrange.max_proto = range->max_proto;
47
48 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
49}
50
51static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
52{
53 const struct nf_nat_range *range = par->targinfo;
54
55 if (range->flags & NF_NAT_RANGE_MAP_IPS)
56 return -EINVAL;
57 return 0;
58}
59
60static int device_cmp(struct nf_conn *ct, void *ifindex)
61{
62 const struct nf_conn_nat *nat = nfct_nat(ct);
63
64 if (!nat)
65 return 0;
66 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
67 return 0;
68 return nat->masq_index == (int)(long)ifindex;
69}
70
71static int masq_device_event(struct notifier_block *this,
72 unsigned long event, void *ptr)
73{
74 const struct net_device *dev = ptr;
75 struct net *net = dev_net(dev);
76
77 if (event == NETDEV_DOWN)
78 nf_ct_iterate_cleanup(net, device_cmp,
79 (void *)(long)dev->ifindex);
80
81 return NOTIFY_DONE;
82}
83
84static struct notifier_block masq_dev_notifier = {
85 .notifier_call = masq_device_event,
86};
87
88static int masq_inet_event(struct notifier_block *this,
89 unsigned long event, void *ptr)
90{
91 struct inet6_ifaddr *ifa = ptr;
92
93 return masq_device_event(this, event, ifa->idev->dev);
94}
95
96static struct notifier_block masq_inet_notifier = {
97 .notifier_call = masq_inet_event,
98};
99
100static struct xt_target masquerade_tg6_reg __read_mostly = {
101 .name = "MASQUERADE",
102 .family = NFPROTO_IPV6,
103 .checkentry = masquerade_tg6_checkentry,
104 .target = masquerade_tg6,
105 .targetsize = sizeof(struct nf_nat_range),
106 .table = "nat",
107 .hooks = 1 << NF_INET_POST_ROUTING,
108 .me = THIS_MODULE,
109};
110
111static int __init masquerade_tg6_init(void)
112{
113 int err;
114
115 err = xt_register_target(&masquerade_tg6_reg);
116 if (err == 0) {
117 register_netdevice_notifier(&masq_dev_notifier);
118 register_inet6addr_notifier(&masq_inet_notifier);
119 }
120
121 return err;
122}
123static void __exit masquerade_tg6_exit(void)
124{
125 unregister_inet6addr_notifier(&masq_inet_notifier);
126 unregister_netdevice_notifier(&masq_dev_notifier);
127 xt_unregister_target(&masquerade_tg6_reg);
128}
129
130module_init(masquerade_tg6_init);
131module_exit(masquerade_tg6_exit);
132
133MODULE_LICENSE("GPL");
134MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
135MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 00000000000..e9486915eff
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,165 @@
1/*
2 * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/ipv6.h>
12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv6.h>
14#include <linux/netfilter_ipv6/ip6t_NPT.h>
15#include <linux/netfilter/x_tables.h>
16
17static __sum16 csum16_complement(__sum16 a)
18{
19 return (__force __sum16)(0xffff - (__force u16)a);
20}
21
22static __sum16 csum16_add(__sum16 a, __sum16 b)
23{
24 u16 sum;
25
26 sum = (__force u16)a + (__force u16)b;
27 sum += (__force u16)a < (__force u16)b;
28 return (__force __sum16)sum;
29}
30
31static __sum16 csum16_sub(__sum16 a, __sum16 b)
32{
33 return csum16_add(a, csum16_complement(b));
34}
35
36static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
37{
38 struct ip6t_npt_tginfo *npt = par->targinfo;
39 __sum16 src_sum = 0, dst_sum = 0;
40 unsigned int i;
41
42 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
43 return -EINVAL;
44
45 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
46 src_sum = csum16_add(src_sum,
47 (__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
48 dst_sum = csum16_add(dst_sum,
49 (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
50 }
51
52 npt->adjustment = csum16_sub(src_sum, dst_sum);
53 return 0;
54}
55
56static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
57 struct in6_addr *addr)
58{
59 unsigned int pfx_len;
60 unsigned int i, idx;
61 __be32 mask;
62 __sum16 sum;
63
64 pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
65 for (i = 0; i < pfx_len; i += 32) {
66 if (pfx_len - i >= 32)
67 mask = 0;
68 else
69 mask = htonl(~((1 << (pfx_len - i)) - 1));
70
71 idx = i / 32;
72 addr->s6_addr32[idx] &= mask;
73 addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
74 }
75
76 if (pfx_len <= 48)
77 idx = 3;
78 else {
79 for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
80 if ((__force __sum16)addr->s6_addr16[idx] !=
81 CSUM_MANGLED_0)
82 break;
83 }
84 if (idx == ARRAY_SIZE(addr->s6_addr16))
85 return false;
86 }
87
88 sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
89 npt->adjustment);
90 if (sum == CSUM_MANGLED_0)
91 sum = 0;
92 *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
93
94 return true;
95}
96
97static unsigned int
98ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
99{
100 const struct ip6t_npt_tginfo *npt = par->targinfo;
101
102 if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
103 icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
104 offsetof(struct ipv6hdr, saddr));
105 return NF_DROP;
106 }
107 return XT_CONTINUE;
108}
109
110static unsigned int
111ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
112{
113 const struct ip6t_npt_tginfo *npt = par->targinfo;
114
115 if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
116 icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
117 offsetof(struct ipv6hdr, daddr));
118 return NF_DROP;
119 }
120 return XT_CONTINUE;
121}
122
123static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
124 {
125 .name = "SNPT",
126 .target = ip6t_snpt_tg,
127 .targetsize = sizeof(struct ip6t_npt_tginfo),
128 .checkentry = ip6t_npt_checkentry,
129 .family = NFPROTO_IPV6,
130 .hooks = (1 << NF_INET_LOCAL_IN) |
131 (1 << NF_INET_POST_ROUTING),
132 .me = THIS_MODULE,
133 },
134 {
135 .name = "DNPT",
136 .target = ip6t_dnpt_tg,
137 .targetsize = sizeof(struct ip6t_npt_tginfo),
138 .checkentry = ip6t_npt_checkentry,
139 .family = NFPROTO_IPV6,
140 .hooks = (1 << NF_INET_PRE_ROUTING) |
141 (1 << NF_INET_LOCAL_OUT),
142 .me = THIS_MODULE,
143 },
144};
145
146static int __init ip6t_npt_init(void)
147{
148 return xt_register_targets(ip6t_npt_target_reg,
149 ARRAY_SIZE(ip6t_npt_target_reg));
150}
151
152static void __exit ip6t_npt_exit(void)
153{
154 xt_unregister_targets(ip6t_npt_target_reg,
155 ARRAY_SIZE(ip6t_npt_target_reg));
156}
157
158module_init(ip6t_npt_init);
159module_exit(ip6t_npt_exit);
160
161MODULE_LICENSE("GPL");
162MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
163MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
164MODULE_ALIAS("ip6t_SNPT");
165MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 325e59a0224..beb5777d204 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
61 net->ipv6.ip6table_filter = 61 net->ipv6.ip6table_filter =
62 ip6t_register_table(net, &packet_filter, repl); 62 ip6t_register_table(net, &packet_filter, repl);
63 kfree(repl); 63 kfree(repl);
64 if (IS_ERR(net->ipv6.ip6table_filter)) 64 return PTR_RET(net->ipv6.ip6table_filter);
65 return PTR_ERR(net->ipv6.ip6table_filter);
66 return 0;
67} 65}
68 66
69static void __net_exit ip6table_filter_net_exit(struct net *net) 67static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 4d782405f12..7431121b87d 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
97 net->ipv6.ip6table_mangle = 97 net->ipv6.ip6table_mangle =
98 ip6t_register_table(net, &packet_mangler, repl); 98 ip6t_register_table(net, &packet_mangler, repl);
99 kfree(repl); 99 kfree(repl);
100 if (IS_ERR(net->ipv6.ip6table_mangle)) 100 return PTR_RET(net->ipv6.ip6table_mangle);
101 return PTR_ERR(net->ipv6.ip6table_mangle);
102 return 0;
103} 101}
104 102
105static void __net_exit ip6table_mangle_net_exit(struct net *net) 103static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 00000000000..e418bd6350a
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,321 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
9 * funded by Astaro.
10 */
11
12#include <linux/module.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter_ipv6.h>
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <linux/ipv6.h>
17#include <net/ipv6.h>
18
19#include <net/netfilter/nf_nat.h>
20#include <net/netfilter/nf_nat_core.h>
21#include <net/netfilter/nf_nat_l3proto.h>
22
23static const struct xt_table nf_nat_ipv6_table = {
24 .name = "nat",
25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
26 (1 << NF_INET_POST_ROUTING) |
27 (1 << NF_INET_LOCAL_OUT) |
28 (1 << NF_INET_LOCAL_IN),
29 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6,
31};
32
33static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
34{
35 /* Force range to this IP; let proto decide mapping for
36 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
37 */
38 struct nf_nat_range range;
39
40 range.flags = 0;
41 pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
42 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
43 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
44 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
45
46 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
47}
48
49static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
50 const struct net_device *in,
51 const struct net_device *out,
52 struct nf_conn *ct)
53{
54 struct net *net = nf_ct_net(ct);
55 unsigned int ret;
56
57 ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
58 if (ret == NF_ACCEPT) {
59 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
60 ret = alloc_null_binding(ct, hooknum);
61 }
62 return ret;
63}
64
65static unsigned int
66nf_nat_ipv6_fn(unsigned int hooknum,
67 struct sk_buff *skb,
68 const struct net_device *in,
69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *))
71{
72 struct nf_conn *ct;
73 enum ip_conntrack_info ctinfo;
74 struct nf_conn_nat *nat;
75 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
76 __be16 frag_off;
77 int hdrlen;
78 u8 nexthdr;
79
80 ct = nf_ct_get(skb, &ctinfo);
81 /* Can't track? It's not due to stress, or conntrack would
82 * have dropped it. Hence it's the user's responsibilty to
83 * packet filter it out, or implement conntrack/NAT for that
84 * protocol. 8) --RR
85 */
86 if (!ct)
87 return NF_ACCEPT;
88
89 /* Don't try to NAT if this packet is not conntracked */
90 if (nf_ct_is_untracked(ct))
91 return NF_ACCEPT;
92
93 nat = nfct_nat(ct);
94 if (!nat) {
95 /* NAT module was loaded late. */
96 if (nf_ct_is_confirmed(ct))
97 return NF_ACCEPT;
98 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
99 if (nat == NULL) {
100 pr_debug("failed to add NAT extension\n");
101 return NF_ACCEPT;
102 }
103 }
104
105 switch (ctinfo) {
106 case IP_CT_RELATED:
107 case IP_CT_RELATED_REPLY:
108 nexthdr = ipv6_hdr(skb)->nexthdr;
109 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
110 &nexthdr, &frag_off);
111
112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
114 hooknum, hdrlen))
115 return NF_DROP;
116 else
117 return NF_ACCEPT;
118 }
119 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
120 case IP_CT_NEW:
121 /* Seen it before? This can happen for loopback, retrans,
122 * or local packets.
123 */
124 if (!nf_nat_initialized(ct, maniptype)) {
125 unsigned int ret;
126
127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
128 if (ret != NF_ACCEPT)
129 return ret;
130 } else
131 pr_debug("Already setup manip %s for ct %p\n",
132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
133 ct);
134 break;
135
136 default:
137 /* ESTABLISHED */
138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 ctinfo == IP_CT_ESTABLISHED_REPLY);
140 }
141
142 return nf_nat_packet(ct, ctinfo, hooknum, skb);
143}
144
145static unsigned int
146nf_nat_ipv6_in(unsigned int hooknum,
147 struct sk_buff *skb,
148 const struct net_device *in,
149 const struct net_device *out,
150 int (*okfn)(struct sk_buff *))
151{
152 unsigned int ret;
153 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
154
155 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
156 if (ret != NF_DROP && ret != NF_STOLEN &&
157 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
158 skb_dst_drop(skb);
159
160 return ret;
161}
162
163static unsigned int
164nf_nat_ipv6_out(unsigned int hooknum,
165 struct sk_buff *skb,
166 const struct net_device *in,
167 const struct net_device *out,
168 int (*okfn)(struct sk_buff *))
169{
170#ifdef CONFIG_XFRM
171 const struct nf_conn *ct;
172 enum ip_conntrack_info ctinfo;
173#endif
174 unsigned int ret;
175
176 /* root is playing with raw sockets. */
177 if (skb->len < sizeof(struct ipv6hdr))
178 return NF_ACCEPT;
179
180 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
181#ifdef CONFIG_XFRM
182 if (ret != NF_DROP && ret != NF_STOLEN &&
183 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
184 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
185 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
186
187 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
188 &ct->tuplehash[!dir].tuple.dst.u3) ||
189 (ct->tuplehash[dir].tuple.src.u.all !=
190 ct->tuplehash[!dir].tuple.dst.u.all))
191 if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
192 ret = NF_DROP;
193 }
194#endif
195 return ret;
196}
197
198static unsigned int
199nf_nat_ipv6_local_fn(unsigned int hooknum,
200 struct sk_buff *skb,
201 const struct net_device *in,
202 const struct net_device *out,
203 int (*okfn)(struct sk_buff *))
204{
205 const struct nf_conn *ct;
206 enum ip_conntrack_info ctinfo;
207 unsigned int ret;
208
209 /* root is playing with raw sockets. */
210 if (skb->len < sizeof(struct ipv6hdr))
211 return NF_ACCEPT;
212
213 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
214 if (ret != NF_DROP && ret != NF_STOLEN &&
215 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
216 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
217
218 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
219 &ct->tuplehash[!dir].tuple.src.u3)) {
220 if (ip6_route_me_harder(skb))
221 ret = NF_DROP;
222 }
223#ifdef CONFIG_XFRM
224 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
225 ct->tuplehash[dir].tuple.dst.u.all !=
226 ct->tuplehash[!dir].tuple.src.u.all)
227 if (nf_xfrm_me_harder(skb, AF_INET6))
228 ret = NF_DROP;
229#endif
230 }
231 return ret;
232}
233
234static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
235 /* Before packet filtering, change destination */
236 {
237 .hook = nf_nat_ipv6_in,
238 .owner = THIS_MODULE,
239 .pf = NFPROTO_IPV6,
240 .hooknum = NF_INET_PRE_ROUTING,
241 .priority = NF_IP6_PRI_NAT_DST,
242 },
243 /* After packet filtering, change source */
244 {
245 .hook = nf_nat_ipv6_out,
246 .owner = THIS_MODULE,
247 .pf = NFPROTO_IPV6,
248 .hooknum = NF_INET_POST_ROUTING,
249 .priority = NF_IP6_PRI_NAT_SRC,
250 },
251 /* Before packet filtering, change destination */
252 {
253 .hook = nf_nat_ipv6_local_fn,
254 .owner = THIS_MODULE,
255 .pf = NFPROTO_IPV6,
256 .hooknum = NF_INET_LOCAL_OUT,
257 .priority = NF_IP6_PRI_NAT_DST,
258 },
259 /* After packet filtering, change source */
260 {
261 .hook = nf_nat_ipv6_fn,
262 .owner = THIS_MODULE,
263 .pf = NFPROTO_IPV6,
264 .hooknum = NF_INET_LOCAL_IN,
265 .priority = NF_IP6_PRI_NAT_SRC,
266 },
267};
268
269static int __net_init ip6table_nat_net_init(struct net *net)
270{
271 struct ip6t_replace *repl;
272
273 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
274 if (repl == NULL)
275 return -ENOMEM;
276 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
277 kfree(repl);
278 if (IS_ERR(net->ipv6.ip6table_nat))
279 return PTR_ERR(net->ipv6.ip6table_nat);
280 return 0;
281}
282
283static void __net_exit ip6table_nat_net_exit(struct net *net)
284{
285 ip6t_unregister_table(net, net->ipv6.ip6table_nat);
286}
287
288static struct pernet_operations ip6table_nat_net_ops = {
289 .init = ip6table_nat_net_init,
290 .exit = ip6table_nat_net_exit,
291};
292
293static int __init ip6table_nat_init(void)
294{
295 int err;
296
297 err = register_pernet_subsys(&ip6table_nat_net_ops);
298 if (err < 0)
299 goto err1;
300
301 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
302 if (err < 0)
303 goto err2;
304 return 0;
305
306err2:
307 unregister_pernet_subsys(&ip6table_nat_net_ops);
308err1:
309 return err;
310}
311
312static void __exit ip6table_nat_exit(void)
313{
314 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
315 unregister_pernet_subsys(&ip6table_nat_net_ops);
316}
317
318module_init(ip6table_nat_init);
319module_exit(ip6table_nat_exit);
320
321MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5b9926a011b..60d1bddff7a 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
40 net->ipv6.ip6table_raw = 40 net->ipv6.ip6table_raw =
41 ip6t_register_table(net, &packet_raw, repl); 41 ip6t_register_table(net, &packet_raw, repl);
42 kfree(repl); 42 kfree(repl);
43 if (IS_ERR(net->ipv6.ip6table_raw)) 43 return PTR_RET(net->ipv6.ip6table_raw);
44 return PTR_ERR(net->ipv6.ip6table_raw);
45 return 0;
46} 44}
47 45
48static void __net_exit ip6table_raw_net_exit(struct net *net) 46static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 91aa2b4d83c..db155351339 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
58 net->ipv6.ip6table_security = 58 net->ipv6.ip6table_security =
59 ip6t_register_table(net, &security_table, repl); 59 ip6t_register_table(net, &security_table, repl);
60 kfree(repl); 60 kfree(repl);
61 if (IS_ERR(net->ipv6.ip6table_security)) 61 return PTR_RET(net->ipv6.ip6table_security);
62 return PTR_ERR(net->ipv6.ip6table_security);
63
64 return 0;
65} 62}
66 63
67static void __net_exit ip6table_security_net_exit(struct net *net) 64static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4794f96cf2e..8860d23e61c 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
28#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_zones.h> 29#include <net/netfilter/nf_conntrack_zones.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31#include <net/netfilter/nf_nat_helper.h>
31#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 32#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32#include <net/netfilter/nf_log.h> 33#include <net/netfilter/nf_log.h>
33 34
@@ -64,82 +65,31 @@ static int ipv6_print_tuple(struct seq_file *s,
64 tuple->src.u3.ip6, tuple->dst.u3.ip6); 65 tuple->src.u3.ip6, tuple->dst.u3.ip6);
65} 66}
66 67
67/*
68 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
69 *
70 * This function parses (probably truncated) exthdr set "hdr"
71 * of length "len". "nexthdrp" initially points to some place,
72 * where type of the first header can be found.
73 *
74 * It skips all well-known exthdrs, and returns pointer to the start
75 * of unparsable area i.e. the first header with unknown type.
76 * if success, *nexthdr is updated by type/protocol of this header.
77 *
78 * NOTES: - it may return pointer pointing beyond end of packet,
79 * if the last recognized header is truncated in the middle.
80 * - if packet is truncated, so that all parsed headers are skipped,
81 * it returns -1.
82 * - if packet is fragmented, return pointer of the fragment header.
83 * - ESP is unparsable for now and considered like
84 * normal payload protocol.
85 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
86 */
87
88static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
89 u8 *nexthdrp, int len)
90{
91 u8 nexthdr = *nexthdrp;
92
93 while (ipv6_ext_hdr(nexthdr)) {
94 struct ipv6_opt_hdr hdr;
95 int hdrlen;
96
97 if (len < (int)sizeof(struct ipv6_opt_hdr))
98 return -1;
99 if (nexthdr == NEXTHDR_NONE)
100 break;
101 if (nexthdr == NEXTHDR_FRAGMENT)
102 break;
103 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
104 BUG();
105 if (nexthdr == NEXTHDR_AUTH)
106 hdrlen = (hdr.hdrlen+2)<<2;
107 else
108 hdrlen = ipv6_optlen(&hdr);
109
110 nexthdr = hdr.nexthdr;
111 len -= hdrlen;
112 start += hdrlen;
113 }
114
115 *nexthdrp = nexthdr;
116 return start;
117}
118
119static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 68static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
120 unsigned int *dataoff, u_int8_t *protonum) 69 unsigned int *dataoff, u_int8_t *protonum)
121{ 70{
122 unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 71 unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
123 unsigned char pnum; 72 __be16 frag_off;
124 int protoff; 73 int protoff;
74 u8 nexthdr;
125 75
126 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 76 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
127 &pnum, sizeof(pnum)) != 0) { 77 &nexthdr, sizeof(nexthdr)) != 0) {
128 pr_debug("ip6_conntrack_core: can't get nexthdr\n"); 78 pr_debug("ip6_conntrack_core: can't get nexthdr\n");
129 return -NF_ACCEPT; 79 return -NF_ACCEPT;
130 } 80 }
131 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); 81 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
132 /* 82 /*
133 * (protoff == skb->len) mean that the packet doesn't have no data 83 * (protoff == skb->len) mean that the packet doesn't have no data
134 * except of IPv6 & ext headers. but it's tracked anyway. - YK 84 * except of IPv6 & ext headers. but it's tracked anyway. - YK
135 */ 85 */
136 if ((protoff < 0) || (protoff > skb->len)) { 86 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
137 pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); 87 pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
138 return -NF_ACCEPT; 88 return -NF_ACCEPT;
139 } 89 }
140 90
141 *dataoff = protoff; 91 *dataoff = protoff;
142 *protonum = pnum; 92 *protonum = nexthdr;
143 return NF_ACCEPT; 93 return NF_ACCEPT;
144} 94}
145 95
@@ -153,10 +103,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
153 const struct nf_conn_help *help; 103 const struct nf_conn_help *help;
154 const struct nf_conntrack_helper *helper; 104 const struct nf_conntrack_helper *helper;
155 enum ip_conntrack_info ctinfo; 105 enum ip_conntrack_info ctinfo;
156 unsigned int ret, protoff; 106 unsigned int ret;
157 unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 107 __be16 frag_off;
158 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 108 int protoff;
159 109 u8 nexthdr;
160 110
161 /* This is where we call the helper: as the packet goes out. */ 111 /* This is where we call the helper: as the packet goes out. */
162 ct = nf_ct_get(skb, &ctinfo); 112 ct = nf_ct_get(skb, &ctinfo);
@@ -171,9 +121,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
171 if (!helper) 121 if (!helper)
172 return NF_ACCEPT; 122 return NF_ACCEPT;
173 123
174 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, 124 nexthdr = ipv6_hdr(skb)->nexthdr;
175 skb->len - extoff); 125 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
176 if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { 126 &frag_off);
127 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
177 pr_debug("proto header not found\n"); 128 pr_debug("proto header not found\n");
178 return NF_ACCEPT; 129 return NF_ACCEPT;
179 } 130 }
@@ -192,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
192 const struct net_device *out, 143 const struct net_device *out,
193 int (*okfn)(struct sk_buff *)) 144 int (*okfn)(struct sk_buff *))
194{ 145{
146 struct nf_conn *ct;
147 enum ip_conntrack_info ctinfo;
148 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
149 int protoff;
150 __be16 frag_off;
151
152 ct = nf_ct_get(skb, &ctinfo);
153 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
154 goto out;
155
156 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
157 &frag_off);
158 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
159 pr_debug("proto header not found\n");
160 goto out;
161 }
162
163 /* adjust seqs for loopback traffic only in outgoing direction */
164 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
165 !nf_is_loopback_packet(skb)) {
166 typeof(nf_nat_seq_adjust_hook) seq_adjust;
167
168 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
169 if (!seq_adjust ||
170 !seq_adjust(skb, ct, ctinfo, protoff)) {
171 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
172 return NF_DROP;
173 }
174 }
175out:
195 /* We've seen it coming out the other side: confirm it */ 176 /* We've seen it coming out the other side: confirm it */
196 return nf_conntrack_confirm(skb); 177 return nf_conntrack_confirm(skb);
197} 178}
@@ -199,9 +180,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
199static unsigned int __ipv6_conntrack_in(struct net *net, 180static unsigned int __ipv6_conntrack_in(struct net *net,
200 unsigned int hooknum, 181 unsigned int hooknum,
201 struct sk_buff *skb, 182 struct sk_buff *skb,
183 const struct net_device *in,
184 const struct net_device *out,
202 int (*okfn)(struct sk_buff *)) 185 int (*okfn)(struct sk_buff *))
203{ 186{
204 struct sk_buff *reasm = skb->nfct_reasm; 187 struct sk_buff *reasm = skb->nfct_reasm;
188 const struct nf_conn_help *help;
189 struct nf_conn *ct;
190 enum ip_conntrack_info ctinfo;
205 191
206 /* This packet is fragmented and has reassembled packet. */ 192 /* This packet is fragmented and has reassembled packet. */
207 if (reasm) { 193 if (reasm) {
@@ -213,6 +199,25 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
213 if (ret != NF_ACCEPT) 199 if (ret != NF_ACCEPT)
214 return ret; 200 return ret;
215 } 201 }
202
203 /* Conntrack helpers need the entire reassembled packet in the
204 * POST_ROUTING hook. In case of unconfirmed connections NAT
205 * might reassign a helper, so the entire packet is also
206 * required.
207 */
208 ct = nf_ct_get(reasm, &ctinfo);
209 if (ct != NULL && !nf_ct_is_untracked(ct)) {
210 help = nfct_help(ct);
211 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
212 nf_conntrack_get_reasm(skb);
213 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
214 (struct net_device *)in,
215 (struct net_device *)out,
216 okfn, NF_IP6_PRI_CONNTRACK + 1);
217 return NF_DROP_ERR(-ECANCELED);
218 }
219 }
220
216 nf_conntrack_get(reasm->nfct); 221 nf_conntrack_get(reasm->nfct);
217 skb->nfct = reasm->nfct; 222 skb->nfct = reasm->nfct;
218 skb->nfctinfo = reasm->nfctinfo; 223 skb->nfctinfo = reasm->nfctinfo;
@@ -228,7 +233,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
228 const struct net_device *out, 233 const struct net_device *out,
229 int (*okfn)(struct sk_buff *)) 234 int (*okfn)(struct sk_buff *))
230{ 235{
231 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); 236 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
232} 237}
233 238
234static unsigned int ipv6_conntrack_local(unsigned int hooknum, 239static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +247,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
242 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 247 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
243 return NF_ACCEPT; 248 return NF_ACCEPT;
244 } 249 }
245 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); 250 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
246} 251}
247 252
248static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 253static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c9c78c2e666..18bd9bbbd1c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -57,41 +57,27 @@ struct nf_ct_frag6_skb_cb
57 57
58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) 58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
59 59
60struct nf_ct_frag6_queue
61{
62 struct inet_frag_queue q;
63
64 __be32 id; /* fragment id */
65 u32 user;
66 struct in6_addr saddr;
67 struct in6_addr daddr;
68
69 unsigned int csum;
70 __u16 nhoffset;
71};
72
73static struct inet_frags nf_frags; 60static struct inet_frags nf_frags;
74static struct netns_frags nf_init_frags;
75 61
76#ifdef CONFIG_SYSCTL 62#ifdef CONFIG_SYSCTL
77static struct ctl_table nf_ct_frag6_sysctl_table[] = { 63static struct ctl_table nf_ct_frag6_sysctl_table[] = {
78 { 64 {
79 .procname = "nf_conntrack_frag6_timeout", 65 .procname = "nf_conntrack_frag6_timeout",
80 .data = &nf_init_frags.timeout, 66 .data = &init_net.nf_frag.frags.timeout,
81 .maxlen = sizeof(unsigned int), 67 .maxlen = sizeof(unsigned int),
82 .mode = 0644, 68 .mode = 0644,
83 .proc_handler = proc_dointvec_jiffies, 69 .proc_handler = proc_dointvec_jiffies,
84 }, 70 },
85 { 71 {
86 .procname = "nf_conntrack_frag6_low_thresh", 72 .procname = "nf_conntrack_frag6_low_thresh",
87 .data = &nf_init_frags.low_thresh, 73 .data = &init_net.nf_frag.frags.low_thresh,
88 .maxlen = sizeof(unsigned int), 74 .maxlen = sizeof(unsigned int),
89 .mode = 0644, 75 .mode = 0644,
90 .proc_handler = proc_dointvec, 76 .proc_handler = proc_dointvec,
91 }, 77 },
92 { 78 {
93 .procname = "nf_conntrack_frag6_high_thresh", 79 .procname = "nf_conntrack_frag6_high_thresh",
94 .data = &nf_init_frags.high_thresh, 80 .data = &init_net.nf_frag.frags.high_thresh,
95 .maxlen = sizeof(unsigned int), 81 .maxlen = sizeof(unsigned int),
96 .mode = 0644, 82 .mode = 0644,
97 .proc_handler = proc_dointvec, 83 .proc_handler = proc_dointvec,
@@ -99,68 +85,86 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
99 { } 85 { }
100}; 86};
101 87
102static struct ctl_table_header *nf_ct_frag6_sysctl_header; 88static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
103#endif
104
105static unsigned int nf_hashfn(struct inet_frag_queue *q)
106{ 89{
107 const struct nf_ct_frag6_queue *nq; 90 struct ctl_table *table;
91 struct ctl_table_header *hdr;
92
93 table = nf_ct_frag6_sysctl_table;
94 if (!net_eq(net, &init_net)) {
95 table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
96 GFP_KERNEL);
97 if (table == NULL)
98 goto err_alloc;
99
100 table[0].data = &net->ipv6.frags.high_thresh;
101 table[1].data = &net->ipv6.frags.low_thresh;
102 table[2].data = &net->ipv6.frags.timeout;
103 }
108 104
109 nq = container_of(q, struct nf_ct_frag6_queue, q); 105 hdr = register_net_sysctl(net, "net/netfilter", table);
110 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); 106 if (hdr == NULL)
107 goto err_reg;
108
109 net->nf_frag.sysctl.frags_hdr = hdr;
110 return 0;
111
112err_reg:
113 if (!net_eq(net, &init_net))
114 kfree(table);
115err_alloc:
116 return -ENOMEM;
111} 117}
112 118
113static void nf_skb_free(struct sk_buff *skb) 119static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
114{ 120{
115 if (NFCT_FRAG6_CB(skb)->orig) 121 struct ctl_table *table;
116 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
117}
118 122
119/* Destruction primitives. */ 123 table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
124 unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
125 if (!net_eq(net, &init_net))
126 kfree(table);
127}
120 128
121static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) 129#else
130static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
122{ 131{
123 inet_frag_put(&fq->q, &nf_frags); 132 return 0;
124} 133}
134static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
135{
136}
137#endif
125 138
126/* Kill fq entry. It is not destroyed immediately, 139static unsigned int nf_hashfn(struct inet_frag_queue *q)
127 * because caller (and someone more) holds reference count.
128 */
129static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
130{ 140{
131 inet_frag_kill(&fq->q, &nf_frags); 141 const struct frag_queue *nq;
142
143 nq = container_of(q, struct frag_queue, q);
144 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
132} 145}
133 146
134static void nf_ct_frag6_evictor(void) 147static void nf_skb_free(struct sk_buff *skb)
135{ 148{
136 local_bh_disable(); 149 if (NFCT_FRAG6_CB(skb)->orig)
137 inet_frag_evictor(&nf_init_frags, &nf_frags); 150 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
138 local_bh_enable();
139} 151}
140 152
141static void nf_ct_frag6_expire(unsigned long data) 153static void nf_ct_frag6_expire(unsigned long data)
142{ 154{
143 struct nf_ct_frag6_queue *fq; 155 struct frag_queue *fq;
144 156 struct net *net;
145 fq = container_of((struct inet_frag_queue *)data,
146 struct nf_ct_frag6_queue, q);
147
148 spin_lock(&fq->q.lock);
149 157
150 if (fq->q.last_in & INET_FRAG_COMPLETE) 158 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
151 goto out; 159 net = container_of(fq->q.net, struct net, nf_frag.frags);
152 160
153 fq_kill(fq); 161 ip6_expire_frag_queue(net, fq, &nf_frags);
154
155out:
156 spin_unlock(&fq->q.lock);
157 fq_put(fq);
158} 162}
159 163
160/* Creation primitives. */ 164/* Creation primitives. */
161 165static inline struct frag_queue *fq_find(struct net *net, __be32 id,
162static __inline__ struct nf_ct_frag6_queue * 166 u32 user, struct in6_addr *src,
163fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) 167 struct in6_addr *dst)
164{ 168{
165 struct inet_frag_queue *q; 169 struct inet_frag_queue *q;
166 struct ip6_create_arg arg; 170 struct ip6_create_arg arg;
@@ -174,22 +178,23 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
174 read_lock_bh(&nf_frags.lock); 178 read_lock_bh(&nf_frags.lock);
175 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); 179 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
176 180
177 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); 181 q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
178 local_bh_enable(); 182 local_bh_enable();
179 if (q == NULL) 183 if (q == NULL)
180 goto oom; 184 goto oom;
181 185
182 return container_of(q, struct nf_ct_frag6_queue, q); 186 return container_of(q, struct frag_queue, q);
183 187
184oom: 188oom:
185 return NULL; 189 return NULL;
186} 190}
187 191
188 192
189static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 193static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
190 const struct frag_hdr *fhdr, int nhoff) 194 const struct frag_hdr *fhdr, int nhoff)
191{ 195{
192 struct sk_buff *prev, *next; 196 struct sk_buff *prev, *next;
197 unsigned int payload_len;
193 int offset, end; 198 int offset, end;
194 199
195 if (fq->q.last_in & INET_FRAG_COMPLETE) { 200 if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -197,8 +202,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
197 goto err; 202 goto err;
198 } 203 }
199 204
205 payload_len = ntohs(ipv6_hdr(skb)->payload_len);
206
200 offset = ntohs(fhdr->frag_off) & ~0x7; 207 offset = ntohs(fhdr->frag_off) & ~0x7;
201 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - 208 end = offset + (payload_len -
202 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 209 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
203 210
204 if ((unsigned int)end > IPV6_MAXPLEN) { 211 if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -307,7 +314,9 @@ found:
307 skb->dev = NULL; 314 skb->dev = NULL;
308 fq->q.stamp = skb->tstamp; 315 fq->q.stamp = skb->tstamp;
309 fq->q.meat += skb->len; 316 fq->q.meat += skb->len;
310 atomic_add(skb->truesize, &nf_init_frags.mem); 317 if (payload_len > fq->q.max_size)
318 fq->q.max_size = payload_len;
319 atomic_add(skb->truesize, &fq->q.net->mem);
311 320
312 /* The first fragment. 321 /* The first fragment.
313 * nhoffset is obtained from the first fragment, of course. 322 * nhoffset is obtained from the first fragment, of course.
@@ -317,12 +326,12 @@ found:
317 fq->q.last_in |= INET_FRAG_FIRST_IN; 326 fq->q.last_in |= INET_FRAG_FIRST_IN;
318 } 327 }
319 write_lock(&nf_frags.lock); 328 write_lock(&nf_frags.lock);
320 list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); 329 list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
321 write_unlock(&nf_frags.lock); 330 write_unlock(&nf_frags.lock);
322 return 0; 331 return 0;
323 332
324discard_fq: 333discard_fq:
325 fq_kill(fq); 334 inet_frag_kill(&fq->q, &nf_frags);
326err: 335err:
327 return -1; 336 return -1;
328} 337}
@@ -337,12 +346,12 @@ err:
337 * the last and the first frames arrived and all the bits are here. 346 * the last and the first frames arrived and all the bits are here.
338 */ 347 */
339static struct sk_buff * 348static struct sk_buff *
340nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) 349nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
341{ 350{
342 struct sk_buff *fp, *op, *head = fq->q.fragments; 351 struct sk_buff *fp, *op, *head = fq->q.fragments;
343 int payload_len; 352 int payload_len;
344 353
345 fq_kill(fq); 354 inet_frag_kill(&fq->q, &nf_frags);
346 355
347 WARN_ON(head == NULL); 356 WARN_ON(head == NULL);
348 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); 357 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -386,7 +395,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
386 clone->ip_summed = head->ip_summed; 395 clone->ip_summed = head->ip_summed;
387 396
388 NFCT_FRAG6_CB(clone)->orig = NULL; 397 NFCT_FRAG6_CB(clone)->orig = NULL;
389 atomic_add(clone->truesize, &nf_init_frags.mem); 398 atomic_add(clone->truesize, &fq->q.net->mem);
390 } 399 }
391 400
392 /* We have to remove fragment header from datagram and to relocate 401 /* We have to remove fragment header from datagram and to relocate
@@ -410,12 +419,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
410 head->csum = csum_add(head->csum, fp->csum); 419 head->csum = csum_add(head->csum, fp->csum);
411 head->truesize += fp->truesize; 420 head->truesize += fp->truesize;
412 } 421 }
413 atomic_sub(head->truesize, &nf_init_frags.mem); 422 atomic_sub(head->truesize, &fq->q.net->mem);
414 423
424 head->local_df = 1;
415 head->next = NULL; 425 head->next = NULL;
416 head->dev = dev; 426 head->dev = dev;
417 head->tstamp = fq->q.stamp; 427 head->tstamp = fq->q.stamp;
418 ipv6_hdr(head)->payload_len = htons(payload_len); 428 ipv6_hdr(head)->payload_len = htons(payload_len);
429 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
419 430
420 /* Yes, and fold redundant checksum back. 8) */ 431 /* Yes, and fold redundant checksum back. 8) */
421 if (head->ip_summed == CHECKSUM_COMPLETE) 432 if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -520,8 +531,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
520{ 531{
521 struct sk_buff *clone; 532 struct sk_buff *clone;
522 struct net_device *dev = skb->dev; 533 struct net_device *dev = skb->dev;
534 struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
535 : dev_net(skb->dev);
523 struct frag_hdr *fhdr; 536 struct frag_hdr *fhdr;
524 struct nf_ct_frag6_queue *fq; 537 struct frag_queue *fq;
525 struct ipv6hdr *hdr; 538 struct ipv6hdr *hdr;
526 int fhoff, nhoff; 539 int fhoff, nhoff;
527 u8 prevhdr; 540 u8 prevhdr;
@@ -553,10 +566,11 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
553 hdr = ipv6_hdr(clone); 566 hdr = ipv6_hdr(clone);
554 fhdr = (struct frag_hdr *)skb_transport_header(clone); 567 fhdr = (struct frag_hdr *)skb_transport_header(clone);
555 568
556 if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) 569 local_bh_disable();
557 nf_ct_frag6_evictor(); 570 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
571 local_bh_enable();
558 572
559 fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); 573 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
560 if (fq == NULL) { 574 if (fq == NULL) {
561 pr_debug("Can't find and can't create new queue\n"); 575 pr_debug("Can't find and can't create new queue\n");
562 goto ret_orig; 576 goto ret_orig;
@@ -567,7 +581,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
567 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { 581 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
568 spin_unlock_bh(&fq->q.lock); 582 spin_unlock_bh(&fq->q.lock);
569 pr_debug("Can't insert skb to queue\n"); 583 pr_debug("Can't insert skb to queue\n");
570 fq_put(fq); 584 inet_frag_put(&fq->q, &nf_frags);
571 goto ret_orig; 585 goto ret_orig;
572 } 586 }
573 587
@@ -579,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
579 } 593 }
580 spin_unlock_bh(&fq->q.lock); 594 spin_unlock_bh(&fq->q.lock);
581 595
582 fq_put(fq); 596 inet_frag_put(&fq->q, &nf_frags);
583 return ret_skb; 597 return ret_skb;
584 598
585ret_orig: 599ret_orig:
@@ -592,6 +606,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
592 int (*okfn)(struct sk_buff *)) 606 int (*okfn)(struct sk_buff *))
593{ 607{
594 struct sk_buff *s, *s2; 608 struct sk_buff *s, *s2;
609 unsigned int ret = 0;
595 610
596 for (s = NFCT_FRAG6_CB(skb)->orig; s;) { 611 for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
597 nf_conntrack_put_reasm(s->nfct_reasm); 612 nf_conntrack_put_reasm(s->nfct_reasm);
@@ -601,49 +616,62 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
601 s2 = s->next; 616 s2 = s->next;
602 s->next = NULL; 617 s->next = NULL;
603 618
604 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, 619 if (ret != -ECANCELED)
605 NF_IP6_PRI_CONNTRACK_DEFRAG + 1); 620 ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
621 in, out, okfn,
622 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
623 else
624 kfree_skb(s);
625
606 s = s2; 626 s = s2;
607 } 627 }
608 nf_conntrack_put_reasm(skb); 628 nf_conntrack_put_reasm(skb);
609} 629}
610 630
631static int nf_ct_net_init(struct net *net)
632{
633 net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
634 net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
635 net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
636 inet_frags_init_net(&net->nf_frag.frags);
637
638 return nf_ct_frag6_sysctl_register(net);
639}
640
641static void nf_ct_net_exit(struct net *net)
642{
643 nf_ct_frags6_sysctl_unregister(net);
644 inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
645}
646
647static struct pernet_operations nf_ct_net_ops = {
648 .init = nf_ct_net_init,
649 .exit = nf_ct_net_exit,
650};
651
611int nf_ct_frag6_init(void) 652int nf_ct_frag6_init(void)
612{ 653{
654 int ret = 0;
655
613 nf_frags.hashfn = nf_hashfn; 656 nf_frags.hashfn = nf_hashfn;
614 nf_frags.constructor = ip6_frag_init; 657 nf_frags.constructor = ip6_frag_init;
615 nf_frags.destructor = NULL; 658 nf_frags.destructor = NULL;
616 nf_frags.skb_free = nf_skb_free; 659 nf_frags.skb_free = nf_skb_free;
617 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); 660 nf_frags.qsize = sizeof(struct frag_queue);
618 nf_frags.match = ip6_frag_match; 661 nf_frags.match = ip6_frag_match;
619 nf_frags.frag_expire = nf_ct_frag6_expire; 662 nf_frags.frag_expire = nf_ct_frag6_expire;
620 nf_frags.secret_interval = 10 * 60 * HZ; 663 nf_frags.secret_interval = 10 * 60 * HZ;
621 nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
622 nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
623 nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
624 inet_frags_init_net(&nf_init_frags);
625 inet_frags_init(&nf_frags); 664 inet_frags_init(&nf_frags);
626 665
627#ifdef CONFIG_SYSCTL 666 ret = register_pernet_subsys(&nf_ct_net_ops);
628 nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", 667 if (ret)
629 nf_ct_frag6_sysctl_table);
630 if (!nf_ct_frag6_sysctl_header) {
631 inet_frags_fini(&nf_frags); 668 inet_frags_fini(&nf_frags);
632 return -ENOMEM;
633 }
634#endif
635 669
636 return 0; 670 return ret;
637} 671}
638 672
639void nf_ct_frag6_cleanup(void) 673void nf_ct_frag6_cleanup(void)
640{ 674{
641#ifdef CONFIG_SYSCTL 675 unregister_pernet_subsys(&nf_ct_net_ops);
642 unregister_net_sysctl_table(nf_ct_frag6_sysctl_header);
643 nf_ct_frag6_sysctl_header = NULL;
644#endif
645 inet_frags_fini(&nf_frags); 676 inet_frags_fini(&nf_frags);
646
647 nf_init_frags.low_thresh = 0;
648 nf_ct_frag6_evictor();
649} 677}
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 00000000000..abfe75a2e31
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,288 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of IPv6 NAT funded by Astaro.
9 */
10#include <linux/types.h>
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ipv6.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv6.h>
16#include <net/secure_seq.h>
17#include <net/checksum.h>
18#include <net/ip6_checksum.h>
19#include <net/ip6_route.h>
20#include <net/ipv6.h>
21
22#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_nat_l3proto.h>
26#include <net/netfilter/nf_nat_l4proto.h>
27
28static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
29
30#ifdef CONFIG_XFRM
31static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
32 const struct nf_conn *ct,
33 enum ip_conntrack_dir dir,
34 unsigned long statusbit,
35 struct flowi *fl)
36{
37 const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
38 struct flowi6 *fl6 = &fl->u.ip6;
39
40 if (ct->status & statusbit) {
41 fl6->daddr = t->dst.u3.in6;
42 if (t->dst.protonum == IPPROTO_TCP ||
43 t->dst.protonum == IPPROTO_UDP ||
44 t->dst.protonum == IPPROTO_UDPLITE ||
45 t->dst.protonum == IPPROTO_DCCP ||
46 t->dst.protonum == IPPROTO_SCTP)
47 fl6->fl6_dport = t->dst.u.all;
48 }
49
50 statusbit ^= IPS_NAT_MASK;
51
52 if (ct->status & statusbit) {
53 fl6->saddr = t->src.u3.in6;
54 if (t->dst.protonum == IPPROTO_TCP ||
55 t->dst.protonum == IPPROTO_UDP ||
56 t->dst.protonum == IPPROTO_UDPLITE ||
57 t->dst.protonum == IPPROTO_DCCP ||
58 t->dst.protonum == IPPROTO_SCTP)
59 fl6->fl6_sport = t->src.u.all;
60 }
61}
62#endif
63
64static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
65 const struct nf_nat_range *range)
66{
67 return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
68 ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
69}
70
71static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
72 __be16 dport)
73{
74 return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
75}
76
77static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
78 unsigned int iphdroff,
79 const struct nf_nat_l4proto *l4proto,
80 const struct nf_conntrack_tuple *target,
81 enum nf_nat_manip_type maniptype)
82{
83 struct ipv6hdr *ipv6h;
84 __be16 frag_off;
85 int hdroff;
86 u8 nexthdr;
87
88 if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
89 return false;
90
91 ipv6h = (void *)skb->data + iphdroff;
92 nexthdr = ipv6h->nexthdr;
93 hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
94 &nexthdr, &frag_off);
95 if (hdroff < 0)
96 goto manip_addr;
97
98 if ((frag_off & htons(~0x7)) == 0 &&
99 !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
100 target, maniptype))
101 return false;
102manip_addr:
103 if (maniptype == NF_NAT_MANIP_SRC)
104 ipv6h->saddr = target->src.u3.in6;
105 else
106 ipv6h->daddr = target->dst.u3.in6;
107
108 return true;
109}
110
111static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
112 unsigned int iphdroff, __sum16 *check,
113 const struct nf_conntrack_tuple *t,
114 enum nf_nat_manip_type maniptype)
115{
116 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
117 const struct in6_addr *oldip, *newip;
118
119 if (maniptype == NF_NAT_MANIP_SRC) {
120 oldip = &ipv6h->saddr;
121 newip = &t->src.u3.in6;
122 } else {
123 oldip = &ipv6h->daddr;
124 newip = &t->dst.u3.in6;
125 }
126 inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
127 newip->s6_addr32, 1);
128}
129
130static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
131 u8 proto, void *data, __sum16 *check,
132 int datalen, int oldlen)
133{
134 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
135 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
136
137 if (skb->ip_summed != CHECKSUM_PARTIAL) {
138 if (!(rt->rt6i_flags & RTF_LOCAL) &&
139 (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
140 skb->ip_summed = CHECKSUM_PARTIAL;
141 skb->csum_start = skb_headroom(skb) +
142 skb_network_offset(skb) +
143 (data - (void *)skb->data);
144 skb->csum_offset = (void *)check - data;
145 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
146 datalen, proto, 0);
147 } else {
148 *check = 0;
149 *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
150 datalen, proto,
151 csum_partial(data, datalen,
152 0));
153 if (proto == IPPROTO_UDP && !*check)
154 *check = CSUM_MANGLED_0;
155 }
156 } else
157 inet_proto_csum_replace2(check, skb,
158 htons(oldlen), htons(datalen), 1);
159}
160
161static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
162 struct nf_nat_range *range)
163{
164 if (tb[CTA_NAT_V6_MINIP]) {
165 nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
166 sizeof(struct in6_addr));
167 range->flags |= NF_NAT_RANGE_MAP_IPS;
168 }
169
170 if (tb[CTA_NAT_V6_MAXIP])
171 nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
172 sizeof(struct in6_addr));
173 else
174 range->max_addr = range->min_addr;
175
176 return 0;
177}
178
179static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
180 .l3proto = NFPROTO_IPV6,
181 .secure_port = nf_nat_ipv6_secure_port,
182 .in_range = nf_nat_ipv6_in_range,
183 .manip_pkt = nf_nat_ipv6_manip_pkt,
184 .csum_update = nf_nat_ipv6_csum_update,
185 .csum_recalc = nf_nat_ipv6_csum_recalc,
186 .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
187#ifdef CONFIG_XFRM
188 .decode_session = nf_nat_ipv6_decode_session,
189#endif
190};
191
192int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
193 struct nf_conn *ct,
194 enum ip_conntrack_info ctinfo,
195 unsigned int hooknum,
196 unsigned int hdrlen)
197{
198 struct {
199 struct icmp6hdr icmp6;
200 struct ipv6hdr ip6;
201 } *inside;
202 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
203 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
204 const struct nf_nat_l4proto *l4proto;
205 struct nf_conntrack_tuple target;
206 unsigned long statusbit;
207
208 NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
209
210 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
211 return 0;
212 if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
213 return 0;
214
215 inside = (void *)skb->data + hdrlen;
216 if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
217 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
218 return 0;
219 if (ct->status & IPS_NAT_MASK)
220 return 0;
221 }
222
223 if (manip == NF_NAT_MANIP_SRC)
224 statusbit = IPS_SRC_NAT;
225 else
226 statusbit = IPS_DST_NAT;
227
228 /* Invert if this is reply direction */
229 if (dir == IP_CT_DIR_REPLY)
230 statusbit ^= IPS_NAT_MASK;
231
232 if (!(ct->status & statusbit))
233 return 1;
234
235 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
236 if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
237 l4proto, &ct->tuplehash[!dir].tuple, !manip))
238 return 0;
239
240 if (skb->ip_summed != CHECKSUM_PARTIAL) {
241 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
242 inside = (void *)skb->data + hdrlen;
243 inside->icmp6.icmp6_cksum = 0;
244 inside->icmp6.icmp6_cksum =
245 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
246 skb->len - hdrlen, IPPROTO_ICMPV6,
247 csum_partial(&inside->icmp6,
248 skb->len - hdrlen, 0));
249 }
250
251 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
252 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
253 if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
254 return 0;
255
256 return 1;
257}
258EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
259
260static int __init nf_nat_l3proto_ipv6_init(void)
261{
262 int err;
263
264 err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
265 if (err < 0)
266 goto err1;
267 err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
268 if (err < 0)
269 goto err2;
270 return err;
271
272err2:
273 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
274err1:
275 return err;
276}
277
278static void __exit nf_nat_l3proto_ipv6_exit(void)
279{
280 nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
281 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
282}
283
284MODULE_LICENSE("GPL");
285MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
286
287module_init(nf_nat_l3proto_ipv6_init);
288module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 00000000000..5d6da784305
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
1/*
2 * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
9 * NAT funded by Astaro.
10 */
11
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/icmpv6.h>
15
16#include <linux/netfilter.h>
17#include <net/netfilter/nf_nat.h>
18#include <net/netfilter/nf_nat_core.h>
19#include <net/netfilter/nf_nat_l3proto.h>
20#include <net/netfilter/nf_nat_l4proto.h>
21
22static bool
23icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
24 enum nf_nat_manip_type maniptype,
25 const union nf_conntrack_man_proto *min,
26 const union nf_conntrack_man_proto *max)
27{
28 return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
29 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
30}
31
32static void
33icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
34 struct nf_conntrack_tuple *tuple,
35 const struct nf_nat_range *range,
36 enum nf_nat_manip_type maniptype,
37 const struct nf_conn *ct)
38{
39 static u16 id;
40 unsigned int range_size;
41 unsigned int i;
42
43 range_size = ntohs(range->max_proto.icmp.id) -
44 ntohs(range->min_proto.icmp.id) + 1;
45
46 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
47 range_size = 0xffff;
48
49 for (i = 0; ; ++id) {
50 tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
51 (id % range_size));
52 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
53 return;
54 }
55}
56
57static bool
58icmpv6_manip_pkt(struct sk_buff *skb,
59 const struct nf_nat_l3proto *l3proto,
60 unsigned int iphdroff, unsigned int hdroff,
61 const struct nf_conntrack_tuple *tuple,
62 enum nf_nat_manip_type maniptype)
63{
64 struct icmp6hdr *hdr;
65
66 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
67 return false;
68
69 hdr = (struct icmp6hdr *)(skb->data + hdroff);
70 l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
71 tuple, maniptype);
72 if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
73 hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
74 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
75 hdr->icmp6_identifier,
76 tuple->src.u.icmp.id, 0);
77 hdr->icmp6_identifier = tuple->src.u.icmp.id;
78 }
79 return true;
80}
81
82const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
83 .l4proto = IPPROTO_ICMPV6,
84 .manip_pkt = icmpv6_manip_pkt,
85 .in_range = icmpv6_in_range,
86 .unique_tuple = icmpv6_unique_tuple,
87#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
88 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
89#endif
90};
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index da2e92d05c1..745a3204295 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net)
307 goto proc_dev_snmp6_fail; 307 goto proc_dev_snmp6_fail;
308 return 0; 308 return 0;
309 309
310proc_dev_snmp6_fail:
311 proc_net_remove(net, "snmp6");
310proc_snmp6_fail: 312proc_snmp6_fail:
311 proc_net_remove(net, "sockstat6"); 313 proc_net_remove(net, "sockstat6");
312proc_dev_snmp6_fail:
313 proc_net_remove(net, "dev_snmp6");
314 return -ENOMEM; 314 return -ENOMEM;
315} 315}
316 316
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ef0579d5bca..d8e95c77db9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -107,21 +107,20 @@ found:
107 * 0 - deliver 107 * 0 - deliver
108 * 1 - block 108 * 1 - block
109 */ 109 */
110static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) 110static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
111{ 111{
112 struct icmp6hdr *icmph; 112 struct icmp6hdr *_hdr;
113 struct raw6_sock *rp = raw6_sk(sk); 113 const struct icmp6hdr *hdr;
114
115 if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
116 __u32 *data = &rp->filter.data[0];
117 int bit_nr;
118 114
119 icmph = (struct icmp6hdr *) skb->data; 115 hdr = skb_header_pointer(skb, skb_transport_offset(skb),
120 bit_nr = icmph->icmp6_type; 116 sizeof(_hdr), &_hdr);
117 if (hdr) {
118 const __u32 *data = &raw6_sk(sk)->filter.data[0];
119 unsigned int type = hdr->icmp6_type;
121 120
122 return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; 121 return (data[type >> 5] & (1U << (type & 31))) != 0;
123 } 122 }
124 return 0; 123 return 1;
125} 124}
126 125
127#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -1251,7 +1250,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
1251 sk_wmem_alloc_get(sp), 1250 sk_wmem_alloc_get(sp),
1252 sk_rmem_alloc_get(sp), 1251 sk_rmem_alloc_get(sp),
1253 0, 0L, 0, 1252 0, 0L, 0,
1254 sock_i_uid(sp), 0, 1253 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1254 0,
1255 sock_i_ino(sp), 1255 sock_i_ino(sp),
1256 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); 1256 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
1257} 1257}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4ff9af628e7..da8a4e301b1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -65,36 +65,8 @@ struct ip6frag_skb_cb
65#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) 65#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
66 66
67 67
68/*
69 * Equivalent of ipv4 struct ipq
70 */
71
72struct frag_queue
73{
74 struct inet_frag_queue q;
75
76 __be32 id; /* fragment id */
77 u32 user;
78 struct in6_addr saddr;
79 struct in6_addr daddr;
80
81 int iif;
82 unsigned int csum;
83 __u16 nhoffset;
84};
85
86static struct inet_frags ip6_frags; 68static struct inet_frags ip6_frags;
87 69
88int ip6_frag_nqueues(struct net *net)
89{
90 return net->ipv6.frags.nqueues;
91}
92
93int ip6_frag_mem(struct net *net)
94{
95 return atomic_read(&net->ipv6.frags.mem);
96}
97
98static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 70static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
99 struct net_device *dev); 71 struct net_device *dev);
100 72
@@ -159,46 +131,18 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
159} 131}
160EXPORT_SYMBOL(ip6_frag_init); 132EXPORT_SYMBOL(ip6_frag_init);
161 133
162/* Destruction primitives. */ 134void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
163 135 struct inet_frags *frags)
164static __inline__ void fq_put(struct frag_queue *fq)
165{
166 inet_frag_put(&fq->q, &ip6_frags);
167}
168
169/* Kill fq entry. It is not destroyed immediately,
170 * because caller (and someone more) holds reference count.
171 */
172static __inline__ void fq_kill(struct frag_queue *fq)
173{
174 inet_frag_kill(&fq->q, &ip6_frags);
175}
176
177static void ip6_evictor(struct net *net, struct inet6_dev *idev)
178{ 136{
179 int evicted;
180
181 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
182 if (evicted)
183 IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
184}
185
186static void ip6_frag_expire(unsigned long data)
187{
188 struct frag_queue *fq;
189 struct net_device *dev = NULL; 137 struct net_device *dev = NULL;
190 struct net *net;
191
192 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
193 138
194 spin_lock(&fq->q.lock); 139 spin_lock(&fq->q.lock);
195 140
196 if (fq->q.last_in & INET_FRAG_COMPLETE) 141 if (fq->q.last_in & INET_FRAG_COMPLETE)
197 goto out; 142 goto out;
198 143
199 fq_kill(fq); 144 inet_frag_kill(&fq->q, frags);
200 145
201 net = container_of(fq->q.net, struct net, ipv6.frags);
202 rcu_read_lock(); 146 rcu_read_lock();
203 dev = dev_get_by_index_rcu(net, fq->iif); 147 dev = dev_get_by_index_rcu(net, fq->iif);
204 if (!dev) 148 if (!dev)
@@ -222,7 +166,19 @@ out_rcu_unlock:
222 rcu_read_unlock(); 166 rcu_read_unlock();
223out: 167out:
224 spin_unlock(&fq->q.lock); 168 spin_unlock(&fq->q.lock);
225 fq_put(fq); 169 inet_frag_put(&fq->q, frags);
170}
171EXPORT_SYMBOL(ip6_expire_frag_queue);
172
173static void ip6_frag_expire(unsigned long data)
174{
175 struct frag_queue *fq;
176 struct net *net;
177
178 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
179 net = container_of(fq->q.net, struct net, ipv6.frags);
180
181 ip6_expire_frag_queue(net, fq, &ip6_frags);
226} 182}
227 183
228static __inline__ struct frag_queue * 184static __inline__ struct frag_queue *
@@ -391,7 +347,7 @@ found:
391 return -1; 347 return -1;
392 348
393discard_fq: 349discard_fq:
394 fq_kill(fq); 350 inet_frag_kill(&fq->q, &ip6_frags);
395err: 351err:
396 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 352 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
397 IPSTATS_MIB_REASMFAILS); 353 IPSTATS_MIB_REASMFAILS);
@@ -417,7 +373,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
417 unsigned int nhoff; 373 unsigned int nhoff;
418 int sum_truesize; 374 int sum_truesize;
419 375
420 fq_kill(fq); 376 inet_frag_kill(&fq->q, &ip6_frags);
421 377
422 /* Make the one we just received the head. */ 378 /* Make the one we just received the head. */
423 if (prev) { 379 if (prev) {
@@ -550,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
550 struct frag_queue *fq; 506 struct frag_queue *fq;
551 const struct ipv6hdr *hdr = ipv6_hdr(skb); 507 const struct ipv6hdr *hdr = ipv6_hdr(skb);
552 struct net *net = dev_net(skb_dst(skb)->dev); 508 struct net *net = dev_net(skb_dst(skb)->dev);
509 int evicted;
553 510
554 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 511 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
555 512
@@ -574,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
574 return 1; 531 return 1;
575 } 532 }
576 533
577 if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) 534 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
578 ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); 535 if (evicted)
536 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
537 IPSTATS_MIB_REASMFAILS, evicted);
579 538
580 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); 539 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
581 if (fq != NULL) { 540 if (fq != NULL) {
@@ -586,7 +545,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
586 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); 545 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
587 546
588 spin_unlock(&fq->q.lock); 547 spin_unlock(&fq->q.lock);
589 fq_put(fq); 548 inet_frag_put(&fq->q, &ip6_frags);
590 return ret; 549 return ret;
591 } 550 }
592 551
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8e80fd27910..7c7e963260e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -222,11 +222,11 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255, 222 [RTAX_HOPLIMIT - 1] = 255,
223}; 223};
224 224
225static struct rt6_info ip6_null_entry_template = { 225static const struct rt6_info ip6_null_entry_template = {
226 .dst = { 226 .dst = {
227 .__refcnt = ATOMIC_INIT(1), 227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1, 228 .__use = 1,
229 .obsolete = -1, 229 .obsolete = DST_OBSOLETE_FORCE_CHK,
230 .error = -ENETUNREACH, 230 .error = -ENETUNREACH,
231 .input = ip6_pkt_discard, 231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out, 232 .output = ip6_pkt_discard_out,
@@ -242,11 +242,11 @@ static struct rt6_info ip6_null_entry_template = {
242static int ip6_pkt_prohibit(struct sk_buff *skb); 242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb); 243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244 244
245static struct rt6_info ip6_prohibit_entry_template = { 245static const struct rt6_info ip6_prohibit_entry_template = {
246 .dst = { 246 .dst = {
247 .__refcnt = ATOMIC_INIT(1), 247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1, 248 .__use = 1,
249 .obsolete = -1, 249 .obsolete = DST_OBSOLETE_FORCE_CHK,
250 .error = -EACCES, 250 .error = -EACCES,
251 .input = ip6_pkt_prohibit, 251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out, 252 .output = ip6_pkt_prohibit_out,
@@ -257,11 +257,11 @@ static struct rt6_info ip6_prohibit_entry_template = {
257 .rt6i_ref = ATOMIC_INIT(1), 257 .rt6i_ref = ATOMIC_INIT(1),
258}; 258};
259 259
260static struct rt6_info ip6_blk_hole_entry_template = { 260static const struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = { 261 .dst = {
262 .__refcnt = ATOMIC_INIT(1), 262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1, 263 .__use = 1,
264 .obsolete = -1, 264 .obsolete = DST_OBSOLETE_FORCE_CHK,
265 .error = -EINVAL, 265 .error = -EINVAL,
266 .input = dst_discard, 266 .input = dst_discard,
267 .output = dst_discard, 267 .output = dst_discard,
@@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
281 struct fib6_table *table) 281 struct fib6_table *table)
282{ 282{
283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_NONE, flags); 284 0, DST_OBSOLETE_FORCE_CHK, flags);
285 285
286 if (rt) { 286 if (rt) {
287 struct dst_entry *dst = &rt->dst; 287 struct dst_entry *dst = &rt->dst;
288 288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 rt->rt6i_genid = rt_genid(net);
291 } 292 }
292 return rt; 293 return rt;
293} 294}
@@ -369,15 +370,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
369 370
370static bool rt6_check_expired(const struct rt6_info *rt) 371static bool rt6_check_expired(const struct rt6_info *rt)
371{ 372{
372 struct rt6_info *ort = NULL;
373
374 if (rt->rt6i_flags & RTF_EXPIRES) { 373 if (rt->rt6i_flags & RTF_EXPIRES) {
375 if (time_after(jiffies, rt->dst.expires)) 374 if (time_after(jiffies, rt->dst.expires))
376 return true; 375 return true;
377 } else if (rt->dst.from) { 376 } else if (rt->dst.from) {
378 ort = (struct rt6_info *) rt->dst.from; 377 return rt6_check_expired((struct rt6_info *) rt->dst.from);
379 return (ort->rt6i_flags & RTF_EXPIRES) &&
380 time_after(jiffies, ort->dst.expires);
381 } 378 }
382 return false; 379 return false;
383} 380}
@@ -451,10 +448,9 @@ static void rt6_probe(struct rt6_info *rt)
451 * Router Reachability Probe MUST be rate-limited 448 * Router Reachability Probe MUST be rate-limited
452 * to no more than one per minute. 449 * to no more than one per minute.
453 */ 450 */
454 rcu_read_lock();
455 neigh = rt ? rt->n : NULL; 451 neigh = rt ? rt->n : NULL;
456 if (!neigh || (neigh->nud_state & NUD_VALID)) 452 if (!neigh || (neigh->nud_state & NUD_VALID))
457 goto out; 453 return;
458 read_lock_bh(&neigh->lock); 454 read_lock_bh(&neigh->lock);
459 if (!(neigh->nud_state & NUD_VALID) && 455 if (!(neigh->nud_state & NUD_VALID) &&
460 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -470,8 +466,6 @@ static void rt6_probe(struct rt6_info *rt)
470 } else { 466 } else {
471 read_unlock_bh(&neigh->lock); 467 read_unlock_bh(&neigh->lock);
472 } 468 }
473out:
474 rcu_read_unlock();
475} 469}
476#else 470#else
477static inline void rt6_probe(struct rt6_info *rt) 471static inline void rt6_probe(struct rt6_info *rt)
@@ -498,7 +492,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
498 struct neighbour *neigh; 492 struct neighbour *neigh;
499 int m; 493 int m;
500 494
501 rcu_read_lock();
502 neigh = rt->n; 495 neigh = rt->n;
503 if (rt->rt6i_flags & RTF_NONEXTHOP || 496 if (rt->rt6i_flags & RTF_NONEXTHOP ||
504 !(rt->rt6i_flags & RTF_GATEWAY)) 497 !(rt->rt6i_flags & RTF_GATEWAY))
@@ -516,7 +509,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
516 read_unlock_bh(&neigh->lock); 509 read_unlock_bh(&neigh->lock);
517 } else 510 } else
518 m = 0; 511 m = 0;
519 rcu_read_unlock();
520 return m; 512 return m;
521} 513}
522 514
@@ -965,7 +957,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
965{ 957{
966 int flags = 0; 958 int flags = 0;
967 959
968 fl6->flowi6_iif = net->loopback_dev->ifindex; 960 fl6->flowi6_iif = LOOPBACK_IFINDEX;
969 961
970 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
971 flags |= RT6_LOOKUP_F_IFACE; 963 flags |= RT6_LOOKUP_F_IFACE;
@@ -1031,6 +1023,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1031 1023
1032 rt = (struct rt6_info *) dst; 1024 rt = (struct rt6_info *) dst;
1033 1025
1026 /* All IPV6 dsts are created with ->obsolete set to the value
1027 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028 * into this function always.
1029 */
1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 return NULL;
1032
1034 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1035 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1036 if (!rt6_has_peer(rt)) 1035 if (!rt6_has_peer(rt))
@@ -1397,8 +1396,6 @@ int ip6_route_add(struct fib6_config *cfg)
1397 goto out; 1396 goto out;
1398 } 1397 }
1399 1398
1400 rt->dst.obsolete = -1;
1401
1402 if (cfg->fc_flags & RTF_EXPIRES) 1399 if (cfg->fc_flags & RTF_EXPIRES)
1403 rt6_set_expires(rt, jiffies + 1400 rt6_set_expires(rt, jiffies +
1404 clock_t_to_jiffies(cfg->fc_expires)); 1401 clock_t_to_jiffies(cfg->fc_expires));
@@ -1463,8 +1460,21 @@ int ip6_route_add(struct fib6_config *cfg)
1463 } 1460 }
1464 rt->dst.output = ip6_pkt_discard_out; 1461 rt->dst.output = ip6_pkt_discard_out;
1465 rt->dst.input = ip6_pkt_discard; 1462 rt->dst.input = ip6_pkt_discard;
1466 rt->dst.error = -ENETUNREACH;
1467 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464 switch (cfg->fc_type) {
1465 case RTN_BLACKHOLE:
1466 rt->dst.error = -EINVAL;
1467 break;
1468 case RTN_PROHIBIT:
1469 rt->dst.error = -EACCES;
1470 break;
1471 case RTN_THROW:
1472 rt->dst.error = -EAGAIN;
1473 break;
1474 default:
1475 rt->dst.error = -ENETUNREACH;
1476 break;
1477 }
1468 goto install_route; 1478 goto install_route;
1469 } 1479 }
1470 1480
@@ -1583,17 +1593,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1583 struct fib6_table *table; 1593 struct fib6_table *table;
1584 struct net *net = dev_net(rt->dst.dev); 1594 struct net *net = dev_net(rt->dst.dev);
1585 1595
1586 if (rt == net->ipv6.ip6_null_entry) 1596 if (rt == net->ipv6.ip6_null_entry) {
1587 return -ENOENT; 1597 err = -ENOENT;
1598 goto out;
1599 }
1588 1600
1589 table = rt->rt6i_table; 1601 table = rt->rt6i_table;
1590 write_lock_bh(&table->tb6_lock); 1602 write_lock_bh(&table->tb6_lock);
1591
1592 err = fib6_del(rt, info); 1603 err = fib6_del(rt, info);
1593 dst_release(&rt->dst);
1594
1595 write_unlock_bh(&table->tb6_lock); 1604 write_unlock_bh(&table->tb6_lock);
1596 1605
1606out:
1607 dst_release(&rt->dst);
1597 return err; 1608 return err;
1598} 1609}
1599 1610
@@ -1829,7 +1840,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1829 if (!table) 1840 if (!table)
1830 return NULL; 1841 return NULL;
1831 1842
1832 write_lock_bh(&table->tb6_lock); 1843 read_lock_bh(&table->tb6_lock);
1833 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1844 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1834 if (!fn) 1845 if (!fn)
1835 goto out; 1846 goto out;
@@ -1845,7 +1856,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1845 break; 1856 break;
1846 } 1857 }
1847out: 1858out:
1848 write_unlock_bh(&table->tb6_lock); 1859 read_unlock_bh(&table->tb6_lock);
1849 return rt; 1860 return rt;
1850} 1861}
1851 1862
@@ -1861,7 +1872,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
1861 .fc_dst_len = prefixlen, 1872 .fc_dst_len = prefixlen,
1862 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1873 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1863 RTF_UP | RTF_PREF(pref), 1874 RTF_UP | RTF_PREF(pref),
1864 .fc_nlinfo.pid = 0, 1875 .fc_nlinfo.portid = 0,
1865 .fc_nlinfo.nlh = NULL, 1876 .fc_nlinfo.nlh = NULL,
1866 .fc_nlinfo.nl_net = net, 1877 .fc_nlinfo.nl_net = net,
1867 }; 1878 };
@@ -1888,7 +1899,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
1888 if (!table) 1899 if (!table)
1889 return NULL; 1900 return NULL;
1890 1901
1891 write_lock_bh(&table->tb6_lock); 1902 read_lock_bh(&table->tb6_lock);
1892 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1903 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1893 if (dev == rt->dst.dev && 1904 if (dev == rt->dst.dev &&
1894 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1905 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -1897,7 +1908,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
1897 } 1908 }
1898 if (rt) 1909 if (rt)
1899 dst_hold(&rt->dst); 1910 dst_hold(&rt->dst);
1900 write_unlock_bh(&table->tb6_lock); 1911 read_unlock_bh(&table->tb6_lock);
1901 return rt; 1912 return rt;
1902} 1913}
1903 1914
@@ -1911,7 +1922,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1911 .fc_ifindex = dev->ifindex, 1922 .fc_ifindex = dev->ifindex,
1912 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1923 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1913 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1924 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1914 .fc_nlinfo.pid = 0, 1925 .fc_nlinfo.portid = 0,
1915 .fc_nlinfo.nlh = NULL, 1926 .fc_nlinfo.nlh = NULL,
1916 .fc_nlinfo.nl_net = dev_net(dev), 1927 .fc_nlinfo.nl_net = dev_net(dev),
1917 }; 1928 };
@@ -2080,7 +2091,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2080 rt->dst.input = ip6_input; 2091 rt->dst.input = ip6_input;
2081 rt->dst.output = ip6_output; 2092 rt->dst.output = ip6_output;
2082 rt->rt6i_idev = idev; 2093 rt->rt6i_idev = idev;
2083 rt->dst.obsolete = -1;
2084 2094
2085 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2095 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2086 if (anycast) 2096 if (anycast)
@@ -2261,14 +2271,18 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2261 cfg->fc_src_len = rtm->rtm_src_len; 2271 cfg->fc_src_len = rtm->rtm_src_len;
2262 cfg->fc_flags = RTF_UP; 2272 cfg->fc_flags = RTF_UP;
2263 cfg->fc_protocol = rtm->rtm_protocol; 2273 cfg->fc_protocol = rtm->rtm_protocol;
2274 cfg->fc_type = rtm->rtm_type;
2264 2275
2265 if (rtm->rtm_type == RTN_UNREACHABLE) 2276 if (rtm->rtm_type == RTN_UNREACHABLE ||
2277 rtm->rtm_type == RTN_BLACKHOLE ||
2278 rtm->rtm_type == RTN_PROHIBIT ||
2279 rtm->rtm_type == RTN_THROW)
2266 cfg->fc_flags |= RTF_REJECT; 2280 cfg->fc_flags |= RTF_REJECT;
2267 2281
2268 if (rtm->rtm_type == RTN_LOCAL) 2282 if (rtm->rtm_type == RTN_LOCAL)
2269 cfg->fc_flags |= RTF_LOCAL; 2283 cfg->fc_flags |= RTF_LOCAL;
2270 2284
2271 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2285 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2272 cfg->fc_nlinfo.nlh = nlh; 2286 cfg->fc_nlinfo.nlh = nlh;
2273 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2287 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2274 2288
@@ -2359,7 +2373,7 @@ static inline size_t rt6_nlmsg_size(void)
2359static int rt6_fill_node(struct net *net, 2373static int rt6_fill_node(struct net *net,
2360 struct sk_buff *skb, struct rt6_info *rt, 2374 struct sk_buff *skb, struct rt6_info *rt,
2361 struct in6_addr *dst, struct in6_addr *src, 2375 struct in6_addr *dst, struct in6_addr *src,
2362 int iif, int type, u32 pid, u32 seq, 2376 int iif, int type, u32 portid, u32 seq,
2363 int prefix, int nowait, unsigned int flags) 2377 int prefix, int nowait, unsigned int flags)
2364{ 2378{
2365 struct rtmsg *rtm; 2379 struct rtmsg *rtm;
@@ -2375,7 +2389,7 @@ static int rt6_fill_node(struct net *net,
2375 } 2389 }
2376 } 2390 }
2377 2391
2378 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2392 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2379 if (!nlh) 2393 if (!nlh)
2380 return -EMSGSIZE; 2394 return -EMSGSIZE;
2381 2395
@@ -2391,8 +2405,22 @@ static int rt6_fill_node(struct net *net,
2391 rtm->rtm_table = table; 2405 rtm->rtm_table = table;
2392 if (nla_put_u32(skb, RTA_TABLE, table)) 2406 if (nla_put_u32(skb, RTA_TABLE, table))
2393 goto nla_put_failure; 2407 goto nla_put_failure;
2394 if (rt->rt6i_flags & RTF_REJECT) 2408 if (rt->rt6i_flags & RTF_REJECT) {
2395 rtm->rtm_type = RTN_UNREACHABLE; 2409 switch (rt->dst.error) {
2410 case -EINVAL:
2411 rtm->rtm_type = RTN_BLACKHOLE;
2412 break;
2413 case -EACCES:
2414 rtm->rtm_type = RTN_PROHIBIT;
2415 break;
2416 case -EAGAIN:
2417 rtm->rtm_type = RTN_THROW;
2418 break;
2419 default:
2420 rtm->rtm_type = RTN_UNREACHABLE;
2421 break;
2422 }
2423 }
2396 else if (rt->rt6i_flags & RTF_LOCAL) 2424 else if (rt->rt6i_flags & RTF_LOCAL)
2397 rtm->rtm_type = RTN_LOCAL; 2425 rtm->rtm_type = RTN_LOCAL;
2398 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2426 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
@@ -2465,15 +2493,11 @@ static int rt6_fill_node(struct net *net,
2465 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2493 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2466 goto nla_put_failure; 2494 goto nla_put_failure;
2467 2495
2468 rcu_read_lock();
2469 n = rt->n; 2496 n = rt->n;
2470 if (n) { 2497 if (n) {
2471 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { 2498 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2472 rcu_read_unlock();
2473 goto nla_put_failure; 2499 goto nla_put_failure;
2474 }
2475 } 2500 }
2476 rcu_read_unlock();
2477 2501
2478 if (rt->dst.dev && 2502 if (rt->dst.dev &&
2479 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2503 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -2506,7 +2530,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2506 2530
2507 return rt6_fill_node(arg->net, 2531 return rt6_fill_node(arg->net,
2508 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2532 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2509 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2533 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2510 prefix, 0, NLM_F_MULTI); 2534 prefix, 0, NLM_F_MULTI);
2511} 2535}
2512 2536
@@ -2586,14 +2610,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2586 skb_dst_set(skb, &rt->dst); 2610 skb_dst_set(skb, &rt->dst);
2587 2611
2588 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2612 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2589 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2613 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2590 nlh->nlmsg_seq, 0, 0, 0); 2614 nlh->nlmsg_seq, 0, 0, 0);
2591 if (err < 0) { 2615 if (err < 0) {
2592 kfree_skb(skb); 2616 kfree_skb(skb);
2593 goto errout; 2617 goto errout;
2594 } 2618 }
2595 2619
2596 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2620 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2597errout: 2621errout:
2598 return err; 2622 return err;
2599} 2623}
@@ -2613,14 +2637,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2613 goto errout; 2637 goto errout;
2614 2638
2615 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2639 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2616 event, info->pid, seq, 0, 0, 0); 2640 event, info->portid, seq, 0, 0, 0);
2617 if (err < 0) { 2641 if (err < 0) {
2618 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2642 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2619 WARN_ON(err == -EMSGSIZE); 2643 WARN_ON(err == -EMSGSIZE);
2620 kfree_skb(skb); 2644 kfree_skb(skb);
2621 goto errout; 2645 goto errout;
2622 } 2646 }
2623 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2647 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2624 info->nlh, gfp_any()); 2648 info->nlh, gfp_any());
2625 return; 2649 return;
2626errout: 2650errout:
@@ -2675,14 +2699,12 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2675#else 2699#else
2676 seq_puts(m, "00000000000000000000000000000000 00 "); 2700 seq_puts(m, "00000000000000000000000000000000 00 ");
2677#endif 2701#endif
2678 rcu_read_lock();
2679 n = rt->n; 2702 n = rt->n;
2680 if (n) { 2703 if (n) {
2681 seq_printf(m, "%pi6", n->primary_key); 2704 seq_printf(m, "%pi6", n->primary_key);
2682 } else { 2705 } else {
2683 seq_puts(m, "00000000000000000000000000000000"); 2706 seq_puts(m, "00000000000000000000000000000000");
2684 } 2707 }
2685 rcu_read_unlock();
2686 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2708 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2687 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2709 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2688 rt->dst.__use, rt->rt6i_flags, 2710 rt->dst.__use, rt->rt6i_flags,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3bd1bfc01f8..3ed54ffd8d5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
545 545
546 err = -ENOENT; 546 err = -ENOENT;
547 547
548 rcu_read_lock();
549 t = ipip6_tunnel_lookup(dev_net(skb->dev), 548 t = ipip6_tunnel_lookup(dev_net(skb->dev),
550 skb->dev, 549 skb->dev,
551 iph->daddr, 550 iph->daddr,
@@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
579 t->err_count = 1; 578 t->err_count = 1;
580 t->err_time = jiffies; 579 t->err_time = jiffies;
581out: 580out:
582 rcu_read_unlock();
583 return err; 581 return err;
584} 582}
585 583
@@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb)
599 597
600 iph = ip_hdr(skb); 598 iph = ip_hdr(skb);
601 599
602 rcu_read_lock();
603 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 600 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
604 iph->saddr, iph->daddr); 601 iph->saddr, iph->daddr);
605 if (tunnel != NULL) { 602 if (tunnel != NULL) {
@@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb)
615 if ((tunnel->dev->priv_flags & IFF_ISATAP) && 612 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
616 !isatap_chksrc(skb, iph, tunnel)) { 613 !isatap_chksrc(skb, iph, tunnel)) {
617 tunnel->dev->stats.rx_errors++; 614 tunnel->dev->stats.rx_errors++;
618 rcu_read_unlock();
619 kfree_skb(skb); 615 kfree_skb(skb);
620 return 0; 616 return 0;
621 } 617 }
@@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb)
630 626
631 netif_rx(skb); 627 netif_rx(skb);
632 628
633 rcu_read_unlock();
634 return 0; 629 return 0;
635 } 630 }
636 631
637 /* no tunnel matched, let upstream know, ipsec may handle it */ 632 /* no tunnel matched, let upstream know, ipsec may handle it */
638 rcu_read_unlock();
639 return 1; 633 return 1;
640out: 634out:
641 kfree_skb(skb); 635 kfree_skb(skb);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb46061c813..182ab9a85d6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
190 ireq = inet_rsk(req); 190 ireq = inet_rsk(req);
191 ireq6 = inet6_rsk(req); 191 ireq6 = inet6_rsk(req);
192 treq = tcp_rsk(req); 192 treq = tcp_rsk(req);
193 treq->listener = NULL;
193 194
194 if (security_inet_conn_request(sk, skb, req)) 195 if (security_inet_conn_request(sk, skb, req))
195 goto out_free; 196 goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb9ce2b2f37..49c890386ce 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
94} 94}
95#endif 95#endif
96 96
97static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
98{
99 struct dst_entry *dst = skb_dst(skb);
100 const struct rt6_info *rt = (const struct rt6_info *)dst;
101
102 dst_hold(dst);
103 sk->sk_rx_dst = dst;
104 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
105 if (rt->rt6i_node)
106 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
107}
108
97static void tcp_v6_hash(struct sock *sk) 109static void tcp_v6_hash(struct sock *sk)
98{ 110{
99 if (sk->sk_state != TCP_CLOSE) { 111 if (sk->sk_state != TCP_CLOSE) {
@@ -391,8 +403,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
391 tp->mtu_info = ntohl(info); 403 tp->mtu_info = ntohl(info);
392 if (!sock_owned_by_user(sk)) 404 if (!sock_owned_by_user(sk))
393 tcp_v6_mtu_reduced(sk); 405 tcp_v6_mtu_reduced(sk);
394 else 406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
395 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); 407 &tp->tsq_flags))
408 sock_hold(sk);
396 goto out; 409 goto out;
397 } 410 }
398 411
@@ -463,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
463 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) 476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
464 goto done; 477 goto done;
465 478
466 skb = tcp_make_synack(sk, dst, req, rvp); 479 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
467 480
468 if (skb) { 481 if (skb) {
469 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -750,6 +763,8 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
750 struct sk_buff *skb) 763 struct sk_buff *skb)
751{ 764{
752 const struct ipv6hdr *iph = skb_gro_network_header(skb); 765 const struct ipv6hdr *iph = skb_gro_network_header(skb);
766 __wsum wsum;
767 __sum16 sum;
753 768
754 switch (skb->ip_summed) { 769 switch (skb->ip_summed) {
755 case CHECKSUM_COMPLETE: 770 case CHECKSUM_COMPLETE:
@@ -758,11 +773,23 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
758 skb->ip_summed = CHECKSUM_UNNECESSARY; 773 skb->ip_summed = CHECKSUM_UNNECESSARY;
759 break; 774 break;
760 } 775 }
761 776flush:
762 /* fall through */
763 case CHECKSUM_NONE:
764 NAPI_GRO_CB(skb)->flush = 1; 777 NAPI_GRO_CB(skb)->flush = 1;
765 return NULL; 778 return NULL;
779
780 case CHECKSUM_NONE:
781 wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
782 skb_gro_len(skb),
783 IPPROTO_TCP, 0));
784 sum = csum_fold(skb_checksum(skb,
785 skb_gro_offset(skb),
786 skb_gro_len(skb),
787 wsum));
788 if (sum)
789 goto flush;
790
791 skb->ip_summed = CHECKSUM_UNNECESSARY;
792 break;
766 } 793 }
767 794
768 return tcp_gro_receive(head, skb); 795 return tcp_gro_receive(head, skb);
@@ -975,7 +1002,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
975 &ipv6_hdr(skb)->saddr, 1002 &ipv6_hdr(skb)->saddr,
976 &ipv6_hdr(skb)->daddr, inet6_iif(skb)); 1003 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
977 if (req) 1004 if (req)
978 return tcp_check_req(sk, skb, req, prev); 1005 return tcp_check_req(sk, skb, req, prev, false);
979 1006
980 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 1007 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
981 &ipv6_hdr(skb)->saddr, th->source, 1008 &ipv6_hdr(skb)->saddr, th->source,
@@ -1156,7 +1183,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1156 } 1183 }
1157have_isn: 1184have_isn:
1158 tcp_rsk(req)->snt_isn = isn; 1185 tcp_rsk(req)->snt_isn = isn;
1159 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1160 1186
1161 if (security_inet_conn_request(sk, skb, req)) 1187 if (security_inet_conn_request(sk, skb, req))
1162 goto drop_and_release; 1188 goto drop_and_release;
@@ -1167,6 +1193,8 @@ have_isn:
1167 want_cookie) 1193 want_cookie)
1168 goto drop_and_free; 1194 goto drop_and_free;
1169 1195
1196 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1197 tcp_rsk(req)->listener = NULL;
1170 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1198 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1171 return 0; 1199 return 0;
1172 1200
@@ -1270,6 +1298,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1270 1298
1271 newsk->sk_gso_type = SKB_GSO_TCPV6; 1299 newsk->sk_gso_type = SKB_GSO_TCPV6;
1272 __ip6_dst_store(newsk, dst, NULL, NULL); 1300 __ip6_dst_store(newsk, dst, NULL, NULL);
1301 inet6_sk_rx_dst_set(newsk, skb);
1273 1302
1274 newtcp6sk = (struct tcp6_sock *)newsk; 1303 newtcp6sk = (struct tcp6_sock *)newsk;
1275 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; 1304 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1333,9 +1362,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1333 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1362 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1334 1363
1335 tcp_initialize_rcv_mss(newsk); 1364 tcp_initialize_rcv_mss(newsk);
1336 if (tcp_rsk(req)->snt_synack) 1365 tcp_synack_rtt_meas(newsk, req);
1337 tcp_valid_rtt_meas(newsk,
1338 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1339 newtp->total_retrans = req->retrans; 1366 newtp->total_retrans = req->retrans;
1340 1367
1341 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1368 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
@@ -1729,18 +1756,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1729 .twsk_destructor= tcp_twsk_destructor, 1756 .twsk_destructor= tcp_twsk_destructor,
1730}; 1757};
1731 1758
1732static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1733{
1734 struct dst_entry *dst = skb_dst(skb);
1735 const struct rt6_info *rt = (const struct rt6_info *)dst;
1736
1737 dst_hold(dst);
1738 sk->sk_rx_dst = dst;
1739 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1740 if (rt->rt6i_node)
1741 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
1742}
1743
1744static const struct inet_connection_sock_af_ops ipv6_specific = { 1759static const struct inet_connection_sock_af_ops ipv6_specific = {
1745 .queue_xmit = inet6_csk_xmit, 1760 .queue_xmit = inet6_csk_xmit,
1746 .send_check = tcp_v6_send_check, 1761 .send_check = tcp_v6_send_check,
@@ -1827,7 +1842,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
1827#ifdef CONFIG_PROC_FS 1842#ifdef CONFIG_PROC_FS
1828/* Proc filesystem TCPv6 sock list dumping. */ 1843/* Proc filesystem TCPv6 sock list dumping. */
1829static void get_openreq6(struct seq_file *seq, 1844static void get_openreq6(struct seq_file *seq,
1830 const struct sock *sk, struct request_sock *req, int i, int uid) 1845 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1831{ 1846{
1832 int ttd = req->expires - jiffies; 1847 int ttd = req->expires - jiffies;
1833 const struct in6_addr *src = &inet6_rsk(req)->loc_addr; 1848 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
@@ -1851,7 +1866,7 @@ static void get_openreq6(struct seq_file *seq,
1851 1, /* timers active (only the expire timer) */ 1866 1, /* timers active (only the expire timer) */
1852 jiffies_to_clock_t(ttd), 1867 jiffies_to_clock_t(ttd),
1853 req->retrans, 1868 req->retrans,
1854 uid, 1869 from_kuid_munged(seq_user_ns(seq), uid),
1855 0, /* non standard timer */ 1870 0, /* non standard timer */
1856 0, /* open_requests have no inode */ 1871 0, /* open_requests have no inode */
1857 0, req); 1872 0, req);
@@ -1899,9 +1914,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1899 tp->write_seq-tp->snd_una, 1914 tp->write_seq-tp->snd_una,
1900 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 1915 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1901 timer_active, 1916 timer_active,
1902 jiffies_to_clock_t(timer_expires - jiffies), 1917 jiffies_delta_to_clock_t(timer_expires - jiffies),
1903 icsk->icsk_retransmits, 1918 icsk->icsk_retransmits,
1904 sock_i_uid(sp), 1919 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1905 icsk->icsk_probes_out, 1920 icsk->icsk_probes_out,
1906 sock_i_ino(sp), 1921 sock_i_ino(sp),
1907 atomic_read(&sp->sk_refcnt), sp, 1922 atomic_read(&sp->sk_refcnt), sp,
@@ -1919,10 +1934,7 @@ static void get_timewait6_sock(struct seq_file *seq,
1919 const struct in6_addr *dest, *src; 1934 const struct in6_addr *dest, *src;
1920 __u16 destp, srcp; 1935 __u16 destp, srcp;
1921 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); 1936 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1922 int ttd = tw->tw_ttd - jiffies; 1937 long delta = tw->tw_ttd - jiffies;
1923
1924 if (ttd < 0)
1925 ttd = 0;
1926 1938
1927 dest = &tw6->tw_v6_daddr; 1939 dest = &tw6->tw_v6_daddr;
1928 src = &tw6->tw_v6_rcv_saddr; 1940 src = &tw6->tw_v6_rcv_saddr;
@@ -1938,7 +1950,7 @@ static void get_timewait6_sock(struct seq_file *seq,
1938 dest->s6_addr32[0], dest->s6_addr32[1], 1950 dest->s6_addr32[0], dest->s6_addr32[1],
1939 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1951 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1940 tw->tw_substate, 0, 0, 1952 tw->tw_substate, 0, 0,
1941 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 1953 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1942 atomic_read(&tw->tw_refcnt), tw); 1954 atomic_read(&tw->tw_refcnt), tw);
1943} 1955}
1944 1956
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 99d0077b56b..fc9997260a6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -394,6 +394,17 @@ try_again:
394 } 394 }
395 if (unlikely(err)) { 395 if (unlikely(err)) {
396 trace_kfree_skb(skb, udpv6_recvmsg); 396 trace_kfree_skb(skb, udpv6_recvmsg);
397 if (!peeked) {
398 atomic_inc(&sk->sk_drops);
399 if (is_udp4)
400 UDP_INC_STATS_USER(sock_net(sk),
401 UDP_MIB_INERRORS,
402 is_udplite);
403 else
404 UDP6_INC_STATS_USER(sock_net(sk),
405 UDP_MIB_INERRORS,
406 is_udplite);
407 }
397 goto out_free; 408 goto out_free;
398 } 409 }
399 if (!peeked) { 410 if (!peeked) {
@@ -1458,7 +1469,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
1458 sk_wmem_alloc_get(sp), 1469 sk_wmem_alloc_get(sp),
1459 sk_rmem_alloc_get(sp), 1470 sk_rmem_alloc_get(sp),
1460 0, 0L, 0, 1471 0, 0L, 0,
1461 sock_i_uid(sp), 0, 1472 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1473 0,
1462 sock_i_ino(sp), 1474 sock_i_ino(sp),
1463 atomic_read(&sp->sk_refcnt), sp, 1475 atomic_read(&sp->sk_refcnt), sp,
1464 atomic_read(&sp->sk_drops)); 1476 atomic_read(&sp->sk_drops));
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef39812107b..f8c4c08ffb6 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl)
73 return 0; 73 return 0;
74} 74}
75 75
76static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
77{
78 struct rt6_info *rt = (struct rt6_info *)xdst;
79
80 rt6_init_peer(rt, net->ipv6.peers);
81}
82
76static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, 83static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
77 int nfheader_len) 84 int nfheader_len)
78{ 85{
@@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
286 .get_saddr = xfrm6_get_saddr, 293 .get_saddr = xfrm6_get_saddr,
287 .decode_session = _decode_session6, 294 .decode_session = _decode_session6,
288 .get_tos = xfrm6_get_tos, 295 .get_tos = xfrm6_get_tos,
296 .init_dst = xfrm6_init_dst,
289 .init_path = xfrm6_init_path, 297 .init_path = xfrm6_init_path,
290 .fill_dst = xfrm6_fill_dst, 298 .fill_dst = xfrm6_fill_dst,
291 .blackhole_route = ip6_blackhole_route, 299 .blackhole_route = ip6_blackhole_route,