aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c83
-rw-r--r--net/ipv6/addrlabel.c24
-rw-r--r--net/ipv6/ip6_fib.c20
-rw-r--r--net/ipv6/ip6_gre.c1770
-rw-r--r--net/ipv6/ip6_output.c85
-rw-r--r--net/ipv6/ip6_tunnel.c91
-rw-r--r--net/ipv6/ip6mr.c10
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/Kconfig37
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c135
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c165
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c321
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c137
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c218
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/reassembly.c89
-rw-r--r--net/ipv6/route.c96
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c40
28 files changed, 3306 insertions, 448 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5728695b5449..4f7fe7270e37 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -201,6 +201,22 @@ config IPV6_TUNNEL
201 201
202 If unsure, say N. 202 If unsure, say N.
203 203
204config IPV6_GRE
205 tristate "IPv6: GRE tunnel"
206 select IPV6_TUNNEL
207 ---help---
208 Tunneling means encapsulating data of one protocol type within
209 another protocol and sending it over a channel that understands the
210 encapsulating protocol. This particular tunneling driver implements
211 GRE (Generic Routing Encapsulation) and at this time allows
212 encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
213 This driver is useful if the other endpoint is a Cisco router: Cisco
214 likes GRE much better than the other Linux tunneling driver ("IP
215 tunneling" above). In addition, GRE allows multicast redistribution
216 through the tunnel.
217
218 Saying M here will produce a module called ip6_gre. If unsure, say N.
219
204config IPV6_MULTIPLE_TABLES 220config IPV6_MULTIPLE_TABLES
205 bool "IPv6: Multiple Routing Tables" 221 bool "IPv6: Multiple Routing Tables"
206 depends on EXPERIMENTAL 222 depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac1..b6d3f79151e2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
36 36
37obj-$(CONFIG_IPV6_SIT) += sit.o 37obj-$(CONFIG_IPV6_SIT) += sit.o
38obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o 38obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
39obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
39 40
40obj-y += addrconf_core.o exthdrs_core.o 41obj-y += addrconf_core.o exthdrs_core.o
41 42
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6bc85f7c31e3..480e68422efb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
127#endif 127#endif
128 128
129#ifdef CONFIG_IPV6_PRIVACY 129#ifdef CONFIG_IPV6_PRIVACY
130static int __ipv6_regen_rndid(struct inet6_dev *idev); 130static void __ipv6_regen_rndid(struct inet6_dev *idev);
131static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 131static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
132static void ipv6_regen_rndid(unsigned long data); 132static void ipv6_regen_rndid(unsigned long data);
133#endif 133#endif
134 134
@@ -788,10 +788,16 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
788 struct in6_addr prefix; 788 struct in6_addr prefix;
789 struct rt6_info *rt; 789 struct rt6_info *rt;
790 struct net *net = dev_net(ifp->idev->dev); 790 struct net *net = dev_net(ifp->idev->dev);
791 struct flowi6 fl6 = {};
792
791 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); 793 ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
792 rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); 794 fl6.flowi6_oif = ifp->idev->dev->ifindex;
795 fl6.daddr = prefix;
796 rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
797 RT6_LOOKUP_F_IFACE);
793 798
794 if (rt && addrconf_is_prefix_route(rt)) { 799 if (rt != net->ipv6.ip6_null_entry &&
800 addrconf_is_prefix_route(rt)) {
795 if (onlink == 0) { 801 if (onlink == 0) {
796 ip6_del_rt(rt); 802 ip6_del_rt(rt);
797 rt = NULL; 803 rt = NULL;
@@ -852,16 +858,7 @@ retry:
852 } 858 }
853 in6_ifa_hold(ifp); 859 in6_ifa_hold(ifp);
854 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); 860 memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
855 if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { 861 __ipv6_try_regen_rndid(idev, tmpaddr);
856 spin_unlock_bh(&ifp->lock);
857 write_unlock(&idev->lock);
858 pr_warn("%s: regeneration of randomized interface id failed\n",
859 __func__);
860 in6_ifa_put(ifp);
861 in6_dev_put(idev);
862 ret = -1;
863 goto out;
864 }
865 memcpy(&addr.s6_addr[8], idev->rndid, 8); 862 memcpy(&addr.s6_addr[8], idev->rndid, 8);
866 age = (now - ifp->tstamp) / HZ; 863 age = (now - ifp->tstamp) / HZ;
867 tmp_valid_lft = min_t(__u32, 864 tmp_valid_lft = min_t(__u32,
@@ -1079,8 +1076,10 @@ static int ipv6_get_saddr_eval(struct net *net,
1079 break; 1076 break;
1080 case IPV6_SADDR_RULE_PREFIX: 1077 case IPV6_SADDR_RULE_PREFIX:
1081 /* Rule 8: Use longest matching prefix */ 1078 /* Rule 8: Use longest matching prefix */
1082 score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, 1079 ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
1083 dst->addr); 1080 if (ret > score->ifa->prefix_len)
1081 ret = score->ifa->prefix_len;
1082 score->matchlen = ret;
1084 break; 1083 break;
1085 default: 1084 default:
1086 ret = 0; 1085 ret = 0;
@@ -1093,7 +1092,7 @@ out:
1093 return ret; 1092 return ret;
1094} 1093}
1095 1094
1096int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, 1095int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
1097 const struct in6_addr *daddr, unsigned int prefs, 1096 const struct in6_addr *daddr, unsigned int prefs,
1098 struct in6_addr *saddr) 1097 struct in6_addr *saddr)
1099{ 1098{
@@ -1600,7 +1599,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
1600 1599
1601#ifdef CONFIG_IPV6_PRIVACY 1600#ifdef CONFIG_IPV6_PRIVACY
1602/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ 1601/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
1603static int __ipv6_regen_rndid(struct inet6_dev *idev) 1602static void __ipv6_regen_rndid(struct inet6_dev *idev)
1604{ 1603{
1605regen: 1604regen:
1606 get_random_bytes(idev->rndid, sizeof(idev->rndid)); 1605 get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1627,8 +1626,6 @@ regen:
1627 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) 1626 if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
1628 goto regen; 1627 goto regen;
1629 } 1628 }
1630
1631 return 0;
1632} 1629}
1633 1630
1634static void ipv6_regen_rndid(unsigned long data) 1631static void ipv6_regen_rndid(unsigned long data)
@@ -1642,8 +1639,7 @@ static void ipv6_regen_rndid(unsigned long data)
1642 if (idev->dead) 1639 if (idev->dead)
1643 goto out; 1640 goto out;
1644 1641
1645 if (__ipv6_regen_rndid(idev) < 0) 1642 __ipv6_regen_rndid(idev);
1646 goto out;
1647 1643
1648 expires = jiffies + 1644 expires = jiffies +
1649 idev->cnf.temp_prefered_lft * HZ - 1645 idev->cnf.temp_prefered_lft * HZ -
@@ -1664,13 +1660,10 @@ out:
1664 in6_dev_put(idev); 1660 in6_dev_put(idev);
1665} 1661}
1666 1662
1667static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) 1663static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
1668{ 1664{
1669 int ret = 0;
1670
1671 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) 1665 if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
1672 ret = __ipv6_regen_rndid(idev); 1666 __ipv6_regen_rndid(idev);
1673 return ret;
1674} 1667}
1675#endif 1668#endif
1676 1669
@@ -1721,7 +1714,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
1721 if (table == NULL) 1714 if (table == NULL)
1722 return NULL; 1715 return NULL;
1723 1716
1724 write_lock_bh(&table->tb6_lock); 1717 read_lock_bh(&table->tb6_lock);
1725 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); 1718 fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
1726 if (!fn) 1719 if (!fn)
1727 goto out; 1720 goto out;
@@ -1736,7 +1729,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
1736 break; 1729 break;
1737 } 1730 }
1738out: 1731out:
1739 write_unlock_bh(&table->tb6_lock); 1732 read_unlock_bh(&table->tb6_lock);
1740 return rt; 1733 return rt;
1741} 1734}
1742 1735
@@ -3549,12 +3542,12 @@ static inline int inet6_ifaddr_msgsize(void)
3549} 3542}
3550 3543
3551static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 3544static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3552 u32 pid, u32 seq, int event, unsigned int flags) 3545 u32 portid, u32 seq, int event, unsigned int flags)
3553{ 3546{
3554 struct nlmsghdr *nlh; 3547 struct nlmsghdr *nlh;
3555 u32 preferred, valid; 3548 u32 preferred, valid;
3556 3549
3557 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); 3550 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
3558 if (nlh == NULL) 3551 if (nlh == NULL)
3559 return -EMSGSIZE; 3552 return -EMSGSIZE;
3560 3553
@@ -3592,7 +3585,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3592} 3585}
3593 3586
3594static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 3587static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3595 u32 pid, u32 seq, int event, u16 flags) 3588 u32 portid, u32 seq, int event, u16 flags)
3596{ 3589{
3597 struct nlmsghdr *nlh; 3590 struct nlmsghdr *nlh;
3598 u8 scope = RT_SCOPE_UNIVERSE; 3591 u8 scope = RT_SCOPE_UNIVERSE;
@@ -3601,7 +3594,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3601 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) 3594 if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
3602 scope = RT_SCOPE_SITE; 3595 scope = RT_SCOPE_SITE;
3603 3596
3604 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); 3597 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
3605 if (nlh == NULL) 3598 if (nlh == NULL)
3606 return -EMSGSIZE; 3599 return -EMSGSIZE;
3607 3600
@@ -3617,7 +3610,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
3617} 3610}
3618 3611
3619static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 3612static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3620 u32 pid, u32 seq, int event, unsigned int flags) 3613 u32 portid, u32 seq, int event, unsigned int flags)
3621{ 3614{
3622 struct nlmsghdr *nlh; 3615 struct nlmsghdr *nlh;
3623 u8 scope = RT_SCOPE_UNIVERSE; 3616 u8 scope = RT_SCOPE_UNIVERSE;
@@ -3626,7 +3619,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3626 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) 3619 if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
3627 scope = RT_SCOPE_SITE; 3620 scope = RT_SCOPE_SITE;
3628 3621
3629 nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); 3622 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
3630 if (nlh == NULL) 3623 if (nlh == NULL)
3631 return -EMSGSIZE; 3624 return -EMSGSIZE;
3632 3625
@@ -3667,7 +3660,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3667 if (++ip_idx < s_ip_idx) 3660 if (++ip_idx < s_ip_idx)
3668 continue; 3661 continue;
3669 err = inet6_fill_ifaddr(skb, ifa, 3662 err = inet6_fill_ifaddr(skb, ifa,
3670 NETLINK_CB(cb->skb).pid, 3663 NETLINK_CB(cb->skb).portid,
3671 cb->nlh->nlmsg_seq, 3664 cb->nlh->nlmsg_seq,
3672 RTM_NEWADDR, 3665 RTM_NEWADDR,
3673 NLM_F_MULTI); 3666 NLM_F_MULTI);
@@ -3683,7 +3676,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3683 if (ip_idx < s_ip_idx) 3676 if (ip_idx < s_ip_idx)
3684 continue; 3677 continue;
3685 err = inet6_fill_ifmcaddr(skb, ifmca, 3678 err = inet6_fill_ifmcaddr(skb, ifmca,
3686 NETLINK_CB(cb->skb).pid, 3679 NETLINK_CB(cb->skb).portid,
3687 cb->nlh->nlmsg_seq, 3680 cb->nlh->nlmsg_seq,
3688 RTM_GETMULTICAST, 3681 RTM_GETMULTICAST,
3689 NLM_F_MULTI); 3682 NLM_F_MULTI);
@@ -3698,7 +3691,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3698 if (ip_idx < s_ip_idx) 3691 if (ip_idx < s_ip_idx)
3699 continue; 3692 continue;
3700 err = inet6_fill_ifacaddr(skb, ifaca, 3693 err = inet6_fill_ifacaddr(skb, ifaca,
3701 NETLINK_CB(cb->skb).pid, 3694 NETLINK_CB(cb->skb).portid,
3702 cb->nlh->nlmsg_seq, 3695 cb->nlh->nlmsg_seq,
3703 RTM_GETANYCAST, 3696 RTM_GETANYCAST,
3704 NLM_F_MULTI); 3697 NLM_F_MULTI);
@@ -3820,7 +3813,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3820 goto errout_ifa; 3813 goto errout_ifa;
3821 } 3814 }
3822 3815
3823 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, 3816 err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
3824 nlh->nlmsg_seq, RTM_NEWADDR, 0); 3817 nlh->nlmsg_seq, RTM_NEWADDR, 0);
3825 if (err < 0) { 3818 if (err < 0) {
3826 /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ 3819 /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3828,7 +3821,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3828 kfree_skb(skb); 3821 kfree_skb(skb);
3829 goto errout_ifa; 3822 goto errout_ifa;
3830 } 3823 }
3831 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 3824 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3832errout_ifa: 3825errout_ifa:
3833 in6_ifa_put(ifa); 3826 in6_ifa_put(ifa);
3834errout: 3827errout:
@@ -4030,14 +4023,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
4030} 4023}
4031 4024
4032static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 4025static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4033 u32 pid, u32 seq, int event, unsigned int flags) 4026 u32 portid, u32 seq, int event, unsigned int flags)
4034{ 4027{
4035 struct net_device *dev = idev->dev; 4028 struct net_device *dev = idev->dev;
4036 struct ifinfomsg *hdr; 4029 struct ifinfomsg *hdr;
4037 struct nlmsghdr *nlh; 4030 struct nlmsghdr *nlh;
4038 void *protoinfo; 4031 void *protoinfo;
4039 4032
4040 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); 4033 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
4041 if (nlh == NULL) 4034 if (nlh == NULL)
4042 return -EMSGSIZE; 4035 return -EMSGSIZE;
4043 4036
@@ -4095,7 +4088,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
4095 if (!idev) 4088 if (!idev)
4096 goto cont; 4089 goto cont;
4097 if (inet6_fill_ifinfo(skb, idev, 4090 if (inet6_fill_ifinfo(skb, idev,
4098 NETLINK_CB(cb->skb).pid, 4091 NETLINK_CB(cb->skb).portid,
4099 cb->nlh->nlmsg_seq, 4092 cb->nlh->nlmsg_seq,
4100 RTM_NEWLINK, NLM_F_MULTI) <= 0) 4093 RTM_NEWLINK, NLM_F_MULTI) <= 0)
4101 goto out; 4094 goto out;
@@ -4143,14 +4136,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
4143} 4136}
4144 4137
4145static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 4138static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
4146 struct prefix_info *pinfo, u32 pid, u32 seq, 4139 struct prefix_info *pinfo, u32 portid, u32 seq,
4147 int event, unsigned int flags) 4140 int event, unsigned int flags)
4148{ 4141{
4149 struct prefixmsg *pmsg; 4142 struct prefixmsg *pmsg;
4150 struct nlmsghdr *nlh; 4143 struct nlmsghdr *nlh;
4151 struct prefix_cacheinfo ci; 4144 struct prefix_cacheinfo ci;
4152 4145
4153 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); 4146 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
4154 if (nlh == NULL) 4147 if (nlh == NULL)
4155 return -EMSGSIZE; 4148 return -EMSGSIZE;
4156 4149
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index eb6a63632d3c..4be23da32b89 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
57} 57}
58 58
59/* 59/*
60 * Default policy table (RFC3484 + extensions) 60 * Default policy table (RFC6724 + extensions)
61 * 61 *
62 * prefix addr_type label 62 * prefix addr_type label
63 * ------------------------------------------------------------------------- 63 * -------------------------------------------------------------------------
@@ -69,8 +69,12 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
69 * fc00::/7 N/A 5 ULA (RFC 4193) 69 * fc00::/7 N/A 5 ULA (RFC 4193)
70 * 2001::/32 N/A 6 Teredo (RFC 4380) 70 * 2001::/32 N/A 6 Teredo (RFC 4380)
71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 71 * 2001:10::/28 N/A 7 ORCHID (RFC 4843)
72 * fec0::/10 N/A 11 Site-local
73 * (deprecated by RFC3879)
74 * 3ffe::/16 N/A 12 6bone
72 * 75 *
73 * Note: 0xffffffff is used if we do not have any policies. 76 * Note: 0xffffffff is used if we do not have any policies.
77 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
74 */ 78 */
75 79
76#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 80#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
@@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table
88 .prefix = &(struct in6_addr){{{ 0xfc }}}, 92 .prefix = &(struct in6_addr){{{ 0xfc }}},
89 .prefixlen = 7, 93 .prefixlen = 7,
90 .label = 5, 94 .label = 5,
95 },{ /* fec0::/10 */
96 .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
97 .prefixlen = 10,
98 .label = 11,
91 },{ /* 2002::/16 */ 99 },{ /* 2002::/16 */
92 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 100 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
93 .prefixlen = 16, 101 .prefixlen = 16,
94 .label = 2, 102 .label = 2,
103 },{ /* 3ffe::/16 */
104 .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
105 .prefixlen = 16,
106 .label = 12,
95 },{ /* 2001::/32 */ 107 },{ /* 2001::/32 */
96 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 108 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
97 .prefixlen = 32, 109 .prefixlen = 32,
@@ -470,10 +482,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
470static int ip6addrlbl_fill(struct sk_buff *skb, 482static int ip6addrlbl_fill(struct sk_buff *skb,
471 struct ip6addrlbl_entry *p, 483 struct ip6addrlbl_entry *p,
472 u32 lseq, 484 u32 lseq,
473 u32 pid, u32 seq, int event, 485 u32 portid, u32 seq, int event,
474 unsigned int flags) 486 unsigned int flags)
475{ 487{
476 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, 488 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
477 sizeof(struct ifaddrlblmsg), flags); 489 sizeof(struct ifaddrlblmsg), flags);
478 if (!nlh) 490 if (!nlh)
479 return -EMSGSIZE; 491 return -EMSGSIZE;
@@ -503,7 +515,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
503 net_eq(ip6addrlbl_net(p), net)) { 515 net_eq(ip6addrlbl_net(p), net)) {
504 if ((err = ip6addrlbl_fill(skb, p, 516 if ((err = ip6addrlbl_fill(skb, p,
505 ip6addrlbl_table.seq, 517 ip6addrlbl_table.seq,
506 NETLINK_CB(cb->skb).pid, 518 NETLINK_CB(cb->skb).portid,
507 cb->nlh->nlmsg_seq, 519 cb->nlh->nlmsg_seq,
508 RTM_NEWADDRLABEL, 520 RTM_NEWADDRLABEL,
509 NLM_F_MULTI)) <= 0) 521 NLM_F_MULTI)) <= 0)
@@ -574,7 +586,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
574 } 586 }
575 587
576 err = ip6addrlbl_fill(skb, p, lseq, 588 err = ip6addrlbl_fill(skb, p, lseq,
577 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 589 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
578 RTM_NEWADDRLABEL, 0); 590 RTM_NEWADDRLABEL, 0);
579 591
580 ip6addrlbl_put(p); 592 ip6addrlbl_put(p);
@@ -585,7 +597,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
585 goto out; 597 goto out;
586 } 598 }
587 599
588 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 600 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
589out: 601out:
590 return err; 602 return err;
591} 603}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 286acfc21250..24995a93ef8c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -514,7 +514,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
514 ln = node_alloc(); 514 ln = node_alloc();
515 515
516 if (!ln) 516 if (!ln)
517 return NULL; 517 return ERR_PTR(-ENOMEM);
518 ln->fn_bit = plen; 518 ln->fn_bit = plen;
519 519
520 ln->parent = pn; 520 ln->parent = pn;
@@ -561,7 +561,7 @@ insert_above:
561 node_free(in); 561 node_free(in);
562 if (ln) 562 if (ln)
563 node_free(ln); 563 node_free(ln);
564 return NULL; 564 return ERR_PTR(-ENOMEM);
565 } 565 }
566 566
567 /* 567 /*
@@ -611,7 +611,7 @@ insert_above:
611 ln = node_alloc(); 611 ln = node_alloc();
612 612
613 if (!ln) 613 if (!ln)
614 return NULL; 614 return ERR_PTR(-ENOMEM);
615 615
616 ln->fn_bit = plen; 616 ln->fn_bit = plen;
617 617
@@ -777,11 +777,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
777 777
778 if (IS_ERR(fn)) { 778 if (IS_ERR(fn)) {
779 err = PTR_ERR(fn); 779 err = PTR_ERR(fn);
780 fn = NULL;
781 }
782
783 if (!fn)
784 goto out; 780 goto out;
781 }
785 782
786 pn = fn; 783 pn = fn;
787 784
@@ -820,15 +817,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
820 allow_create, replace_required); 817 allow_create, replace_required);
821 818
822 if (IS_ERR(sn)) { 819 if (IS_ERR(sn)) {
823 err = PTR_ERR(sn);
824 sn = NULL;
825 }
826 if (!sn) {
827 /* If it is failed, discard just allocated 820 /* If it is failed, discard just allocated
828 root, and then (in st_failure) stale node 821 root, and then (in st_failure) stale node
829 in main tree. 822 in main tree.
830 */ 823 */
831 node_free(sfn); 824 node_free(sfn);
825 err = PTR_ERR(sn);
832 goto st_failure; 826 goto st_failure;
833 } 827 }
834 828
@@ -843,10 +837,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
843 837
844 if (IS_ERR(sn)) { 838 if (IS_ERR(sn)) {
845 err = PTR_ERR(sn); 839 err = PTR_ERR(sn);
846 sn = NULL;
847 }
848 if (!sn)
849 goto st_failure; 840 goto st_failure;
841 }
850 } 842 }
851 843
852 if (!fn->leaf) { 844 if (!fn->leaf) {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 000000000000..0185679c5f53
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1770 @@
1/*
2 * GRE over IPv6 protocol decoder.
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <linux/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35#include <linux/hash.h>
36#include <linux/if_tunnel.h>
37#include <linux/ip6_tunnel.h>
38
39#include <net/sock.h>
40#include <net/ip.h>
41#include <net/icmp.h>
42#include <net/protocol.h>
43#include <net/addrconf.h>
44#include <net/arp.h>
45#include <net/checksum.h>
46#include <net/dsfield.h>
47#include <net/inet_ecn.h>
48#include <net/xfrm.h>
49#include <net/net_namespace.h>
50#include <net/netns/generic.h>
51#include <net/rtnetlink.h>
52
53#include <net/ipv6.h>
54#include <net/ip6_fib.h>
55#include <net/ip6_route.h>
56#include <net/ip6_tunnel.h>
57
58
59static bool log_ecn_error = true;
60module_param(log_ecn_error, bool, 0644);
61MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
62
63#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
64#define IPV6_TCLASS_SHIFT 20
65
66#define HASH_SIZE_SHIFT 5
67#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
68
69static int ip6gre_net_id __read_mostly;
70struct ip6gre_net {
71 struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
72
73 struct net_device *fb_tunnel_dev;
74};
75
76static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
77static int ip6gre_tunnel_init(struct net_device *dev);
78static void ip6gre_tunnel_setup(struct net_device *dev);
79static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
80static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
81
82/* Tunnel hash table */
83
84/*
85 4 hash tables:
86
87 3: (remote,local)
88 2: (remote,*)
89 1: (*,local)
90 0: (*,*)
91
92 We require exact key match i.e. if a key is present in packet
93 it will match only tunnel with the same key; if it is not present,
94 it will match only keyless tunnel.
95
96 All keysless packets, if not matched configured keyless tunnels
97 will match fallback tunnel.
98 */
99
100#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
101static u32 HASH_ADDR(const struct in6_addr *addr)
102{
103 u32 hash = ipv6_addr_hash(addr);
104
105 return hash_32(hash, HASH_SIZE_SHIFT);
106}
107
108#define tunnels_r_l tunnels[3]
109#define tunnels_r tunnels[2]
110#define tunnels_l tunnels[1]
111#define tunnels_wc tunnels[0]
112/*
113 * Locking : hash tables are protected by RCU and RTNL
114 */
115
116#define for_each_ip_tunnel_rcu(start) \
117 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
118
119/* often modified stats are per cpu, other are shared (netdev->stats) */
120struct pcpu_tstats {
121 u64 rx_packets;
122 u64 rx_bytes;
123 u64 tx_packets;
124 u64 tx_bytes;
125 struct u64_stats_sync syncp;
126};
127
128static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
129 struct rtnl_link_stats64 *tot)
130{
131 int i;
132
133 for_each_possible_cpu(i) {
134 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
135 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
136 unsigned int start;
137
138 do {
139 start = u64_stats_fetch_begin_bh(&tstats->syncp);
140 rx_packets = tstats->rx_packets;
141 tx_packets = tstats->tx_packets;
142 rx_bytes = tstats->rx_bytes;
143 tx_bytes = tstats->tx_bytes;
144 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
145
146 tot->rx_packets += rx_packets;
147 tot->tx_packets += tx_packets;
148 tot->rx_bytes += rx_bytes;
149 tot->tx_bytes += tx_bytes;
150 }
151
152 tot->multicast = dev->stats.multicast;
153 tot->rx_crc_errors = dev->stats.rx_crc_errors;
154 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
155 tot->rx_length_errors = dev->stats.rx_length_errors;
156 tot->rx_frame_errors = dev->stats.rx_frame_errors;
157 tot->rx_errors = dev->stats.rx_errors;
158
159 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
160 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
161 tot->tx_dropped = dev->stats.tx_dropped;
162 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
163 tot->tx_errors = dev->stats.tx_errors;
164
165 return tot;
166}
167
168/* Given src, dst and key, find appropriate for input tunnel. */
169
170static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
171 const struct in6_addr *remote, const struct in6_addr *local,
172 __be32 key, __be16 gre_proto)
173{
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
176 unsigned int h0 = HASH_ADDR(remote);
177 unsigned int h1 = HASH_KEY(key);
178 struct ip6_tnl *t, *cand = NULL;
179 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IP6GRE;
182 int score, cand_score = 4;
183
184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185 if (!ipv6_addr_equal(local, &t->parms.laddr) ||
186 !ipv6_addr_equal(remote, &t->parms.raddr) ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (t->dev->type != ARPHRD_IP6GRE &&
192 t->dev->type != dev_type)
193 continue;
194
195 score = 0;
196 if (t->parms.link != link)
197 score |= 1;
198 if (t->dev->type != dev_type)
199 score |= 2;
200 if (score == 0)
201 return t;
202
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
206 }
207 }
208
209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210 if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->dev->type != ARPHRD_IP6GRE &&
216 t->dev->type != dev_type)
217 continue;
218
219 score = 0;
220 if (t->parms.link != link)
221 score |= 1;
222 if (t->dev->type != dev_type)
223 score |= 2;
224 if (score == 0)
225 return t;
226
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
230 }
231 }
232
233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234 if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
235 (!ipv6_addr_equal(local, &t->parms.raddr) ||
236 !ipv6_addr_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
240
241 if (t->dev->type != ARPHRD_IP6GRE &&
242 t->dev->type != dev_type)
243 continue;
244
245 score = 0;
246 if (t->parms.link != link)
247 score |= 1;
248 if (t->dev->type != dev_type)
249 score |= 2;
250 if (score == 0)
251 return t;
252
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
256 }
257 }
258
259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
263
264 if (t->dev->type != ARPHRD_IP6GRE &&
265 t->dev->type != dev_type)
266 continue;
267
268 score = 0;
269 if (t->parms.link != link)
270 score |= 1;
271 if (t->dev->type != dev_type)
272 score |= 2;
273 if (score == 0)
274 return t;
275
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
279 }
280 }
281
282 if (cand != NULL)
283 return cand;
284
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
288
289 return NULL;
290}
291
292static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
293 const struct __ip6_tnl_parm *p)
294{
295 const struct in6_addr *remote = &p->raddr;
296 const struct in6_addr *local = &p->laddr;
297 unsigned int h = HASH_KEY(p->i_key);
298 int prio = 0;
299
300 if (!ipv6_addr_any(local))
301 prio |= 1;
302 if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
303 prio |= 2;
304 h ^= HASH_ADDR(remote);
305 }
306
307 return &ign->tunnels[prio][h];
308}
309
310static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
311 const struct ip6_tnl *t)
312{
313 return __ip6gre_bucket(ign, &t->parms);
314}
315
316static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
317{
318 struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
319
320 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
321 rcu_assign_pointer(*tp, t);
322}
323
324static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
325{
326 struct ip6_tnl __rcu **tp;
327 struct ip6_tnl *iter;
328
329 for (tp = ip6gre_bucket(ign, t);
330 (iter = rtnl_dereference(*tp)) != NULL;
331 tp = &iter->next) {
332 if (t == iter) {
333 rcu_assign_pointer(*tp, t->next);
334 break;
335 }
336 }
337}
338
339static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
340 const struct __ip6_tnl_parm *parms,
341 int type)
342{
343 const struct in6_addr *remote = &parms->raddr;
344 const struct in6_addr *local = &parms->laddr;
345 __be32 key = parms->i_key;
346 int link = parms->link;
347 struct ip6_tnl *t;
348 struct ip6_tnl __rcu **tp;
349 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
350
351 for (tp = __ip6gre_bucket(ign, parms);
352 (t = rtnl_dereference(*tp)) != NULL;
353 tp = &t->next)
354 if (ipv6_addr_equal(local, &t->parms.laddr) &&
355 ipv6_addr_equal(remote, &t->parms.raddr) &&
356 key == t->parms.i_key &&
357 link == t->parms.link &&
358 type == t->dev->type)
359 break;
360
361 return t;
362}
363
364static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
365 const struct __ip6_tnl_parm *parms, int create)
366{
367 struct ip6_tnl *t, *nt;
368 struct net_device *dev;
369 char name[IFNAMSIZ];
370 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
371
372 t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
373 if (t || !create)
374 return t;
375
376 if (parms->name[0])
377 strlcpy(name, parms->name, IFNAMSIZ);
378 else
379 strcpy(name, "ip6gre%d");
380
381 dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
382 if (!dev)
383 return NULL;
384
385 dev_net_set(dev, net);
386
387 nt = netdev_priv(dev);
388 nt->parms = *parms;
389 dev->rtnl_link_ops = &ip6gre_link_ops;
390
391 nt->dev = dev;
392 ip6gre_tnl_link_config(nt, 1);
393
394 if (register_netdevice(dev) < 0)
395 goto failed_free;
396
397 /* Can use a lockless transmit, unless we generate output sequences */
398 if (!(nt->parms.o_flags & GRE_SEQ))
399 dev->features |= NETIF_F_LLTX;
400
401 dev_hold(dev);
402 ip6gre_tunnel_link(ign, nt);
403 return nt;
404
405failed_free:
406 free_netdev(dev);
407 return NULL;
408}
409
410static void ip6gre_tunnel_uninit(struct net_device *dev)
411{
412 struct net *net = dev_net(dev);
413 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
414
415 ip6gre_tunnel_unlink(ign, netdev_priv(dev));
416 dev_put(dev);
417}
418
419
420static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
421 u8 type, u8 code, int offset, __be32 info)
422{
423 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
424 __be16 *p = (__be16 *)(skb->data + offset);
425 int grehlen = offset + 4;
426 struct ip6_tnl *t;
427 __be16 flags;
428
429 flags = p[0];
430 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
431 if (flags&(GRE_VERSION|GRE_ROUTING))
432 return;
433 if (flags&GRE_KEY) {
434 grehlen += 4;
435 if (flags&GRE_CSUM)
436 grehlen += 4;
437 }
438 }
439
440 /* If only 8 bytes returned, keyed message will be dropped here */
441 if (!pskb_may_pull(skb, grehlen))
442 return;
443 ipv6h = (const struct ipv6hdr *)skb->data;
444 p = (__be16 *)(skb->data + offset);
445
446 t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
447 flags & GRE_KEY ?
448 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
449 p[1]);
450 if (t == NULL)
451 return;
452
453 switch (type) {
454 __u32 teli;
455 struct ipv6_tlv_tnl_enc_lim *tel;
456 __u32 mtu;
457 case ICMPV6_DEST_UNREACH:
458 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
459 t->parms.name);
460 break;
461 case ICMPV6_TIME_EXCEED:
462 if (code == ICMPV6_EXC_HOPLIMIT) {
463 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
464 t->parms.name);
465 }
466 break;
467 case ICMPV6_PARAMPROB:
468 teli = 0;
469 if (code == ICMPV6_HDR_FIELD)
470 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
471
472 if (teli && teli == info - 2) {
473 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
474 if (tel->encap_limit == 0) {
475 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
476 t->parms.name);
477 }
478 } else {
479 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
480 t->parms.name);
481 }
482 break;
483 case ICMPV6_PKT_TOOBIG:
484 mtu = info - offset;
485 if (mtu < IPV6_MIN_MTU)
486 mtu = IPV6_MIN_MTU;
487 t->dev->mtu = mtu;
488 break;
489 }
490
491 if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
492 t->err_count++;
493 else
494 t->err_count = 1;
495 t->err_time = jiffies;
496}
497
498static int ip6gre_rcv(struct sk_buff *skb)
499{
500 const struct ipv6hdr *ipv6h;
501 u8 *h;
502 __be16 flags;
503 __sum16 csum = 0;
504 __be32 key = 0;
505 u32 seqno = 0;
506 struct ip6_tnl *tunnel;
507 int offset = 4;
508 __be16 gre_proto;
509 int err;
510
511 if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
512 goto drop;
513
514 ipv6h = ipv6_hdr(skb);
515 h = skb->data;
516 flags = *(__be16 *)h;
517
518 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
519 /* - Version must be 0.
520 - We do not support routing headers.
521 */
522 if (flags&(GRE_VERSION|GRE_ROUTING))
523 goto drop;
524
525 if (flags&GRE_CSUM) {
526 switch (skb->ip_summed) {
527 case CHECKSUM_COMPLETE:
528 csum = csum_fold(skb->csum);
529 if (!csum)
530 break;
531 /* fall through */
532 case CHECKSUM_NONE:
533 skb->csum = 0;
534 csum = __skb_checksum_complete(skb);
535 skb->ip_summed = CHECKSUM_COMPLETE;
536 }
537 offset += 4;
538 }
539 if (flags&GRE_KEY) {
540 key = *(__be32 *)(h + offset);
541 offset += 4;
542 }
543 if (flags&GRE_SEQ) {
544 seqno = ntohl(*(__be32 *)(h + offset));
545 offset += 4;
546 }
547 }
548
549 gre_proto = *(__be16 *)(h + 2);
550
551 tunnel = ip6gre_tunnel_lookup(skb->dev,
552 &ipv6h->saddr, &ipv6h->daddr, key,
553 gre_proto);
554 if (tunnel) {
555 struct pcpu_tstats *tstats;
556
557 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
558 goto drop;
559
560 if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
561 tunnel->dev->stats.rx_dropped++;
562 goto drop;
563 }
564
565 secpath_reset(skb);
566
567 skb->protocol = gre_proto;
568 /* WCCP version 1 and 2 protocol decoding.
569 * - Change protocol to IP
570 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
571 */
572 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
573 skb->protocol = htons(ETH_P_IP);
574 if ((*(h + offset) & 0xF0) != 0x40)
575 offset += 4;
576 }
577
578 skb->mac_header = skb->network_header;
579 __pskb_pull(skb, offset);
580 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
581 skb->pkt_type = PACKET_HOST;
582
583 if (((flags&GRE_CSUM) && csum) ||
584 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
585 tunnel->dev->stats.rx_crc_errors++;
586 tunnel->dev->stats.rx_errors++;
587 goto drop;
588 }
589 if (tunnel->parms.i_flags&GRE_SEQ) {
590 if (!(flags&GRE_SEQ) ||
591 (tunnel->i_seqno &&
592 (s32)(seqno - tunnel->i_seqno) < 0)) {
593 tunnel->dev->stats.rx_fifo_errors++;
594 tunnel->dev->stats.rx_errors++;
595 goto drop;
596 }
597 tunnel->i_seqno = seqno + 1;
598 }
599
600 /* Warning: All skb pointers will be invalidated! */
601 if (tunnel->dev->type == ARPHRD_ETHER) {
602 if (!pskb_may_pull(skb, ETH_HLEN)) {
603 tunnel->dev->stats.rx_length_errors++;
604 tunnel->dev->stats.rx_errors++;
605 goto drop;
606 }
607
608 ipv6h = ipv6_hdr(skb);
609 skb->protocol = eth_type_trans(skb, tunnel->dev);
610 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
611 }
612
613 __skb_tunnel_rx(skb, tunnel->dev);
614
615 skb_reset_network_header(skb);
616
617 err = IP6_ECN_decapsulate(ipv6h, skb);
618 if (unlikely(err)) {
619 if (log_ecn_error)
620 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
621 &ipv6h->saddr,
622 ipv6_get_dsfield(ipv6h));
623 if (err > 1) {
624 ++tunnel->dev->stats.rx_frame_errors;
625 ++tunnel->dev->stats.rx_errors;
626 goto drop;
627 }
628 }
629
630 tstats = this_cpu_ptr(tunnel->dev->tstats);
631 u64_stats_update_begin(&tstats->syncp);
632 tstats->rx_packets++;
633 tstats->rx_bytes += skb->len;
634 u64_stats_update_end(&tstats->syncp);
635
636 netif_rx(skb);
637
638 return 0;
639 }
640 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
641
642drop:
643 kfree_skb(skb);
644 return 0;
645}
646
647struct ipv6_tel_txoption {
648 struct ipv6_txoptions ops;
649 __u8 dst_opt[8];
650};
651
652static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
653{
654 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
655
656 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
657 opt->dst_opt[3] = 1;
658 opt->dst_opt[4] = encap_limit;
659 opt->dst_opt[5] = IPV6_TLV_PADN;
660 opt->dst_opt[6] = 1;
661
662 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
663 opt->ops.opt_nflen = 8;
664}
665
666static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
667 struct net_device *dev,
668 __u8 dsfield,
669 struct flowi6 *fl6,
670 int encap_limit,
671 __u32 *pmtu)
672{
673 struct net *net = dev_net(dev);
674 struct ip6_tnl *tunnel = netdev_priv(dev);
675 struct net_device *tdev; /* Device to other host */
676 struct ipv6hdr *ipv6h; /* Our new IP header */
677 unsigned int max_headroom; /* The extra header space needed */
678 int gre_hlen;
679 struct ipv6_tel_txoption opt;
680 int mtu;
681 struct dst_entry *dst = NULL, *ndst = NULL;
682 struct net_device_stats *stats = &tunnel->dev->stats;
683 int err = -1;
684 u8 proto;
685 int pkt_len;
686 struct sk_buff *new_skb;
687
688 if (dev->type == ARPHRD_ETHER)
689 IPCB(skb)->flags = 0;
690
691 if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
692 gre_hlen = 0;
693 ipv6h = (struct ipv6hdr *)skb->data;
694 fl6->daddr = ipv6h->daddr;
695 } else {
696 gre_hlen = tunnel->hlen;
697 fl6->daddr = tunnel->parms.raddr;
698 }
699
700 if (!fl6->flowi6_mark)
701 dst = ip6_tnl_dst_check(tunnel);
702
703 if (!dst) {
704 ndst = ip6_route_output(net, NULL, fl6);
705
706 if (ndst->error)
707 goto tx_err_link_failure;
708 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
709 if (IS_ERR(ndst)) {
710 err = PTR_ERR(ndst);
711 ndst = NULL;
712 goto tx_err_link_failure;
713 }
714 dst = ndst;
715 }
716
717 tdev = dst->dev;
718
719 if (tdev == dev) {
720 stats->collisions++;
721 net_warn_ratelimited("%s: Local routing loop detected!\n",
722 tunnel->parms.name);
723 goto tx_err_dst_release;
724 }
725
726 mtu = dst_mtu(dst) - sizeof(*ipv6h);
727 if (encap_limit >= 0) {
728 max_headroom += 8;
729 mtu -= 8;
730 }
731 if (mtu < IPV6_MIN_MTU)
732 mtu = IPV6_MIN_MTU;
733 if (skb_dst(skb))
734 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
735 if (skb->len > mtu) {
736 *pmtu = mtu;
737 err = -EMSGSIZE;
738 goto tx_err_dst_release;
739 }
740
741 if (tunnel->err_count > 0) {
742 if (time_before(jiffies,
743 tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
744 tunnel->err_count--;
745
746 dst_link_failure(skb);
747 } else
748 tunnel->err_count = 0;
749 }
750
751 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
752
753 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
754 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
755 new_skb = skb_realloc_headroom(skb, max_headroom);
756 if (max_headroom > dev->needed_headroom)
757 dev->needed_headroom = max_headroom;
758 if (!new_skb)
759 goto tx_err_dst_release;
760
761 if (skb->sk)
762 skb_set_owner_w(new_skb, skb->sk);
763 consume_skb(skb);
764 skb = new_skb;
765 }
766
767 skb_dst_drop(skb);
768
769 if (fl6->flowi6_mark) {
770 skb_dst_set(skb, dst);
771 ndst = NULL;
772 } else {
773 skb_dst_set_noref(skb, dst);
774 }
775
776 skb->transport_header = skb->network_header;
777
778 proto = NEXTHDR_GRE;
779 if (encap_limit >= 0) {
780 init_tel_txopt(&opt, encap_limit);
781 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
782 }
783
784 skb_push(skb, gre_hlen);
785 skb_reset_network_header(skb);
786
787 /*
788 * Push down and install the IP header.
789 */
790 ipv6h = ipv6_hdr(skb);
791 *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
792 dsfield = INET_ECN_encapsulate(0, dsfield);
793 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
794 ipv6h->hop_limit = tunnel->parms.hop_limit;
795 ipv6h->nexthdr = proto;
796 ipv6h->saddr = fl6->saddr;
797 ipv6h->daddr = fl6->daddr;
798
799 ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
800 ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
801 htons(ETH_P_TEB) : skb->protocol;
802
803 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
804 __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
805
806 if (tunnel->parms.o_flags&GRE_SEQ) {
807 ++tunnel->o_seqno;
808 *ptr = htonl(tunnel->o_seqno);
809 ptr--;
810 }
811 if (tunnel->parms.o_flags&GRE_KEY) {
812 *ptr = tunnel->parms.o_key;
813 ptr--;
814 }
815 if (tunnel->parms.o_flags&GRE_CSUM) {
816 *ptr = 0;
817 *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
818 skb->len - sizeof(struct ipv6hdr));
819 }
820 }
821
822 nf_reset(skb);
823 pkt_len = skb->len;
824 err = ip6_local_out(skb);
825
826 if (net_xmit_eval(err) == 0) {
827 struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
828
829 tstats->tx_bytes += pkt_len;
830 tstats->tx_packets++;
831 } else {
832 stats->tx_errors++;
833 stats->tx_aborted_errors++;
834 }
835
836 if (ndst)
837 ip6_tnl_dst_store(tunnel, ndst);
838
839 return 0;
840tx_err_link_failure:
841 stats->tx_carrier_errors++;
842 dst_link_failure(skb);
843tx_err_dst_release:
844 dst_release(ndst);
845 return err;
846}
847
848static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
849{
850 struct ip6_tnl *t = netdev_priv(dev);
851 const struct iphdr *iph = ip_hdr(skb);
852 int encap_limit = -1;
853 struct flowi6 fl6;
854 __u8 dsfield;
855 __u32 mtu;
856 int err;
857
858 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
859 encap_limit = t->parms.encap_limit;
860
861 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
862 fl6.flowi6_proto = IPPROTO_IPIP;
863
864 dsfield = ipv4_get_dsfield(iph);
865
866 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
867 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
868 & IPV6_TCLASS_MASK;
869 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
870 fl6.flowi6_mark = skb->mark;
871
872 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
873 if (err != 0) {
874 /* XXX: send ICMP error even if DF is not set. */
875 if (err == -EMSGSIZE)
876 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
877 htonl(mtu));
878 return -1;
879 }
880
881 return 0;
882}
883
884static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
885{
886 struct ip6_tnl *t = netdev_priv(dev);
887 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
888 int encap_limit = -1;
889 __u16 offset;
890 struct flowi6 fl6;
891 __u8 dsfield;
892 __u32 mtu;
893 int err;
894
895 if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
896 return -1;
897
898 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
899 if (offset > 0) {
900 struct ipv6_tlv_tnl_enc_lim *tel;
901 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
902 if (tel->encap_limit == 0) {
903 icmpv6_send(skb, ICMPV6_PARAMPROB,
904 ICMPV6_HDR_FIELD, offset + 2);
905 return -1;
906 }
907 encap_limit = tel->encap_limit - 1;
908 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
909 encap_limit = t->parms.encap_limit;
910
911 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
912 fl6.flowi6_proto = IPPROTO_IPV6;
913
914 dsfield = ipv6_get_dsfield(ipv6h);
915 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
916 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
917 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
918 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
919 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
920 fl6.flowi6_mark = skb->mark;
921
922 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
923 if (err != 0) {
924 if (err == -EMSGSIZE)
925 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
926 return -1;
927 }
928
929 return 0;
930}
931
932/**
933 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
934 * @t: the outgoing tunnel device
935 * @hdr: IPv6 header from the incoming packet
936 *
937 * Description:
938 * Avoid trivial tunneling loop by checking that tunnel exit-point
939 * doesn't match source of incoming packet.
940 *
941 * Return:
942 * 1 if conflict,
943 * 0 else
944 **/
945
946static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
947 const struct ipv6hdr *hdr)
948{
949 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
950}
951
952static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
953{
954 struct ip6_tnl *t = netdev_priv(dev);
955 int encap_limit = -1;
956 struct flowi6 fl6;
957 __u32 mtu;
958 int err;
959
960 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
961 encap_limit = t->parms.encap_limit;
962
963 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
964 fl6.flowi6_proto = skb->protocol;
965
966 err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
967
968 return err;
969}
970
971static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
972 struct net_device *dev)
973{
974 struct ip6_tnl *t = netdev_priv(dev);
975 struct net_device_stats *stats = &t->dev->stats;
976 int ret;
977
978 if (!ip6_tnl_xmit_ctl(t))
979 return -1;
980
981 switch (skb->protocol) {
982 case htons(ETH_P_IP):
983 ret = ip6gre_xmit_ipv4(skb, dev);
984 break;
985 case htons(ETH_P_IPV6):
986 ret = ip6gre_xmit_ipv6(skb, dev);
987 break;
988 default:
989 ret = ip6gre_xmit_other(skb, dev);
990 break;
991 }
992
993 if (ret < 0)
994 goto tx_err;
995
996 return NETDEV_TX_OK;
997
998tx_err:
999 stats->tx_errors++;
1000 stats->tx_dropped++;
1001 kfree_skb(skb);
1002 return NETDEV_TX_OK;
1003}
1004
1005static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
1006{
1007 struct net_device *dev = t->dev;
1008 struct __ip6_tnl_parm *p = &t->parms;
1009 struct flowi6 *fl6 = &t->fl.u.ip6;
1010 int addend = sizeof(struct ipv6hdr) + 4;
1011
1012 if (dev->type != ARPHRD_ETHER) {
1013 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1014 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1015 }
1016
1017 /* Set up flowi template */
1018 fl6->saddr = p->laddr;
1019 fl6->daddr = p->raddr;
1020 fl6->flowi6_oif = p->link;
1021 fl6->flowlabel = 0;
1022
1023 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1024 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1025 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1026 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1027
1028 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1029 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1030
1031 if (p->flags&IP6_TNL_F_CAP_XMIT &&
1032 p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
1033 dev->flags |= IFF_POINTOPOINT;
1034 else
1035 dev->flags &= ~IFF_POINTOPOINT;
1036
1037 dev->iflink = p->link;
1038
1039 /* Precalculate GRE options length */
1040 if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1041 if (t->parms.o_flags&GRE_CSUM)
1042 addend += 4;
1043 if (t->parms.o_flags&GRE_KEY)
1044 addend += 4;
1045 if (t->parms.o_flags&GRE_SEQ)
1046 addend += 4;
1047 }
1048
1049 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1050 int strict = (ipv6_addr_type(&p->raddr) &
1051 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1052
1053 struct rt6_info *rt = rt6_lookup(dev_net(dev),
1054 &p->raddr, &p->laddr,
1055 p->link, strict);
1056
1057 if (rt == NULL)
1058 return;
1059
1060 if (rt->dst.dev) {
1061 dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
1062
1063 if (set_mtu) {
1064 dev->mtu = rt->dst.dev->mtu - addend;
1065 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1066 dev->mtu -= 8;
1067
1068 if (dev->mtu < IPV6_MIN_MTU)
1069 dev->mtu = IPV6_MIN_MTU;
1070 }
1071 }
1072 dst_release(&rt->dst);
1073 }
1074
1075 t->hlen = addend;
1076}
1077
1078static int ip6gre_tnl_change(struct ip6_tnl *t,
1079 const struct __ip6_tnl_parm *p, int set_mtu)
1080{
1081 t->parms.laddr = p->laddr;
1082 t->parms.raddr = p->raddr;
1083 t->parms.flags = p->flags;
1084 t->parms.hop_limit = p->hop_limit;
1085 t->parms.encap_limit = p->encap_limit;
1086 t->parms.flowinfo = p->flowinfo;
1087 t->parms.link = p->link;
1088 t->parms.proto = p->proto;
1089 t->parms.i_key = p->i_key;
1090 t->parms.o_key = p->o_key;
1091 t->parms.i_flags = p->i_flags;
1092 t->parms.o_flags = p->o_flags;
1093 ip6_tnl_dst_reset(t);
1094 ip6gre_tnl_link_config(t, set_mtu);
1095 return 0;
1096}
1097
1098static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
1099 const struct ip6_tnl_parm2 *u)
1100{
1101 p->laddr = u->laddr;
1102 p->raddr = u->raddr;
1103 p->flags = u->flags;
1104 p->hop_limit = u->hop_limit;
1105 p->encap_limit = u->encap_limit;
1106 p->flowinfo = u->flowinfo;
1107 p->link = u->link;
1108 p->i_key = u->i_key;
1109 p->o_key = u->o_key;
1110 p->i_flags = u->i_flags;
1111 p->o_flags = u->o_flags;
1112 memcpy(p->name, u->name, sizeof(u->name));
1113}
1114
1115static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
1116 const struct __ip6_tnl_parm *p)
1117{
1118 u->proto = IPPROTO_GRE;
1119 u->laddr = p->laddr;
1120 u->raddr = p->raddr;
1121 u->flags = p->flags;
1122 u->hop_limit = p->hop_limit;
1123 u->encap_limit = p->encap_limit;
1124 u->flowinfo = p->flowinfo;
1125 u->link = p->link;
1126 u->i_key = p->i_key;
1127 u->o_key = p->o_key;
1128 u->i_flags = p->i_flags;
1129 u->o_flags = p->o_flags;
1130 memcpy(u->name, p->name, sizeof(u->name));
1131}
1132
1133static int ip6gre_tunnel_ioctl(struct net_device *dev,
1134 struct ifreq *ifr, int cmd)
1135{
1136 int err = 0;
1137 struct ip6_tnl_parm2 p;
1138 struct __ip6_tnl_parm p1;
1139 struct ip6_tnl *t;
1140 struct net *net = dev_net(dev);
1141 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1142
1143 switch (cmd) {
1144 case SIOCGETTUNNEL:
1145 t = NULL;
1146 if (dev == ign->fb_tunnel_dev) {
1147 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1148 err = -EFAULT;
1149 break;
1150 }
1151 ip6gre_tnl_parm_from_user(&p1, &p);
1152 t = ip6gre_tunnel_locate(net, &p1, 0);
1153 }
1154 if (t == NULL)
1155 t = netdev_priv(dev);
1156 ip6gre_tnl_parm_to_user(&p, &t->parms);
1157 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1158 err = -EFAULT;
1159 break;
1160
1161 case SIOCADDTUNNEL:
1162 case SIOCCHGTUNNEL:
1163 err = -EPERM;
1164 if (!capable(CAP_NET_ADMIN))
1165 goto done;
1166
1167 err = -EFAULT;
1168 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1169 goto done;
1170
1171 err = -EINVAL;
1172 if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
1173 goto done;
1174
1175 if (!(p.i_flags&GRE_KEY))
1176 p.i_key = 0;
1177 if (!(p.o_flags&GRE_KEY))
1178 p.o_key = 0;
1179
1180 ip6gre_tnl_parm_from_user(&p1, &p);
1181 t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
1182
1183 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1184 if (t != NULL) {
1185 if (t->dev != dev) {
1186 err = -EEXIST;
1187 break;
1188 }
1189 } else {
1190 t = netdev_priv(dev);
1191
1192 ip6gre_tunnel_unlink(ign, t);
1193 synchronize_net();
1194 ip6gre_tnl_change(t, &p1, 1);
1195 ip6gre_tunnel_link(ign, t);
1196 netdev_state_change(dev);
1197 }
1198 }
1199
1200 if (t) {
1201 err = 0;
1202
1203 ip6gre_tnl_parm_to_user(&p, &t->parms);
1204 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1205 err = -EFAULT;
1206 } else
1207 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1208 break;
1209
1210 case SIOCDELTUNNEL:
1211 err = -EPERM;
1212 if (!capable(CAP_NET_ADMIN))
1213 goto done;
1214
1215 if (dev == ign->fb_tunnel_dev) {
1216 err = -EFAULT;
1217 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1218 goto done;
1219 err = -ENOENT;
1220 ip6gre_tnl_parm_from_user(&p1, &p);
1221 t = ip6gre_tunnel_locate(net, &p1, 0);
1222 if (t == NULL)
1223 goto done;
1224 err = -EPERM;
1225 if (t == netdev_priv(ign->fb_tunnel_dev))
1226 goto done;
1227 dev = t->dev;
1228 }
1229 unregister_netdevice(dev);
1230 err = 0;
1231 break;
1232
1233 default:
1234 err = -EINVAL;
1235 }
1236
1237done:
1238 return err;
1239}
1240
1241static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1242{
1243 struct ip6_tnl *tunnel = netdev_priv(dev);
1244 if (new_mtu < 68 ||
1245 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1246 return -EINVAL;
1247 dev->mtu = new_mtu;
1248 return 0;
1249}
1250
1251static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
1252 unsigned short type,
1253 const void *daddr, const void *saddr, unsigned int len)
1254{
1255 struct ip6_tnl *t = netdev_priv(dev);
1256 struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
1257 __be16 *p = (__be16 *)(ipv6h+1);
1258
1259 *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
1260 ipv6h->hop_limit = t->parms.hop_limit;
1261 ipv6h->nexthdr = NEXTHDR_GRE;
1262 ipv6h->saddr = t->parms.laddr;
1263 ipv6h->daddr = t->parms.raddr;
1264
1265 p[0] = t->parms.o_flags;
1266 p[1] = htons(type);
1267
1268 /*
1269 * Set the source hardware address.
1270 */
1271
1272 if (saddr)
1273 memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
1274 if (daddr)
1275 memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
1276 if (!ipv6_addr_any(&ipv6h->daddr))
1277 return t->hlen;
1278
1279 return -t->hlen;
1280}
1281
1282static const struct header_ops ip6gre_header_ops = {
1283 .create = ip6gre_header,
1284};
1285
1286static const struct net_device_ops ip6gre_netdev_ops = {
1287 .ndo_init = ip6gre_tunnel_init,
1288 .ndo_uninit = ip6gre_tunnel_uninit,
1289 .ndo_start_xmit = ip6gre_tunnel_xmit,
1290 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1291 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1292 .ndo_get_stats64 = ip6gre_get_stats64,
1293};
1294
1295static void ip6gre_dev_free(struct net_device *dev)
1296{
1297 free_percpu(dev->tstats);
1298 free_netdev(dev);
1299}
1300
1301static void ip6gre_tunnel_setup(struct net_device *dev)
1302{
1303 struct ip6_tnl *t;
1304
1305 dev->netdev_ops = &ip6gre_netdev_ops;
1306 dev->destructor = ip6gre_dev_free;
1307
1308 dev->type = ARPHRD_IP6GRE;
1309 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
1310 dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
1311 t = netdev_priv(dev);
1312 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1313 dev->mtu -= 8;
1314 dev->flags |= IFF_NOARP;
1315 dev->iflink = 0;
1316 dev->addr_len = sizeof(struct in6_addr);
1317 dev->features |= NETIF_F_NETNS_LOCAL;
1318 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1319}
1320
1321static int ip6gre_tunnel_init(struct net_device *dev)
1322{
1323 struct ip6_tnl *tunnel;
1324
1325 tunnel = netdev_priv(dev);
1326
1327 tunnel->dev = dev;
1328 strcpy(tunnel->parms.name, dev->name);
1329
1330 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1331 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1332
1333 if (ipv6_addr_any(&tunnel->parms.raddr))
1334 dev->header_ops = &ip6gre_header_ops;
1335
1336 dev->tstats = alloc_percpu(struct pcpu_tstats);
1337 if (!dev->tstats)
1338 return -ENOMEM;
1339
1340 return 0;
1341}
1342
1343static void ip6gre_fb_tunnel_init(struct net_device *dev)
1344{
1345 struct ip6_tnl *tunnel = netdev_priv(dev);
1346
1347 tunnel->dev = dev;
1348 strcpy(tunnel->parms.name, dev->name);
1349
1350 tunnel->hlen = sizeof(struct ipv6hdr) + 4;
1351
1352 dev_hold(dev);
1353}
1354
1355
1356static struct inet6_protocol ip6gre_protocol __read_mostly = {
1357 .handler = ip6gre_rcv,
1358 .err_handler = ip6gre_err,
1359 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1360};
1361
1362static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
1363 struct list_head *head)
1364{
1365 int prio;
1366
1367 for (prio = 0; prio < 4; prio++) {
1368 int h;
1369 for (h = 0; h < HASH_SIZE; h++) {
1370 struct ip6_tnl *t;
1371
1372 t = rtnl_dereference(ign->tunnels[prio][h]);
1373
1374 while (t != NULL) {
1375 unregister_netdevice_queue(t->dev, head);
1376 t = rtnl_dereference(t->next);
1377 }
1378 }
1379 }
1380}
1381
1382static int __net_init ip6gre_init_net(struct net *net)
1383{
1384 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1385 int err;
1386
1387 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
1388 ip6gre_tunnel_setup);
1389 if (!ign->fb_tunnel_dev) {
1390 err = -ENOMEM;
1391 goto err_alloc_dev;
1392 }
1393 dev_net_set(ign->fb_tunnel_dev, net);
1394
1395 ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
1396 ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
1397
1398 err = register_netdev(ign->fb_tunnel_dev);
1399 if (err)
1400 goto err_reg_dev;
1401
1402 rcu_assign_pointer(ign->tunnels_wc[0],
1403 netdev_priv(ign->fb_tunnel_dev));
1404 return 0;
1405
1406err_reg_dev:
1407 ip6gre_dev_free(ign->fb_tunnel_dev);
1408err_alloc_dev:
1409 return err;
1410}
1411
1412static void __net_exit ip6gre_exit_net(struct net *net)
1413{
1414 struct ip6gre_net *ign;
1415 LIST_HEAD(list);
1416
1417 ign = net_generic(net, ip6gre_net_id);
1418 rtnl_lock();
1419 ip6gre_destroy_tunnels(ign, &list);
1420 unregister_netdevice_many(&list);
1421 rtnl_unlock();
1422}
1423
1424static struct pernet_operations ip6gre_net_ops = {
1425 .init = ip6gre_init_net,
1426 .exit = ip6gre_exit_net,
1427 .id = &ip6gre_net_id,
1428 .size = sizeof(struct ip6gre_net),
1429};
1430
1431static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1432{
1433 __be16 flags;
1434
1435 if (!data)
1436 return 0;
1437
1438 flags = 0;
1439 if (data[IFLA_GRE_IFLAGS])
1440 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1441 if (data[IFLA_GRE_OFLAGS])
1442 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1443 if (flags & (GRE_VERSION|GRE_ROUTING))
1444 return -EINVAL;
1445
1446 return 0;
1447}
1448
1449static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1450{
1451 struct in6_addr daddr;
1452
1453 if (tb[IFLA_ADDRESS]) {
1454 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1455 return -EINVAL;
1456 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1457 return -EADDRNOTAVAIL;
1458 }
1459
1460 if (!data)
1461 goto out;
1462
1463 if (data[IFLA_GRE_REMOTE]) {
1464 nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1465 if (ipv6_addr_any(&daddr))
1466 return -EINVAL;
1467 }
1468
1469out:
1470 return ip6gre_tunnel_validate(tb, data);
1471}
1472
1473
1474static void ip6gre_netlink_parms(struct nlattr *data[],
1475 struct __ip6_tnl_parm *parms)
1476{
1477 memset(parms, 0, sizeof(*parms));
1478
1479 if (!data)
1480 return;
1481
1482 if (data[IFLA_GRE_LINK])
1483 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1484
1485 if (data[IFLA_GRE_IFLAGS])
1486 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1487
1488 if (data[IFLA_GRE_OFLAGS])
1489 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1490
1491 if (data[IFLA_GRE_IKEY])
1492 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1493
1494 if (data[IFLA_GRE_OKEY])
1495 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1496
1497 if (data[IFLA_GRE_LOCAL])
1498 nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
1499
1500 if (data[IFLA_GRE_REMOTE])
1501 nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1502
1503 if (data[IFLA_GRE_TTL])
1504 parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
1505
1506 if (data[IFLA_GRE_ENCAP_LIMIT])
1507 parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
1508
1509 if (data[IFLA_GRE_FLOWINFO])
1510 parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
1511
1512 if (data[IFLA_GRE_FLAGS])
1513 parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
1514}
1515
1516static int ip6gre_tap_init(struct net_device *dev)
1517{
1518 struct ip6_tnl *tunnel;
1519
1520 tunnel = netdev_priv(dev);
1521
1522 tunnel->dev = dev;
1523 strcpy(tunnel->parms.name, dev->name);
1524
1525 ip6gre_tnl_link_config(tunnel, 1);
1526
1527 dev->tstats = alloc_percpu(struct pcpu_tstats);
1528 if (!dev->tstats)
1529 return -ENOMEM;
1530
1531 return 0;
1532}
1533
1534static const struct net_device_ops ip6gre_tap_netdev_ops = {
1535 .ndo_init = ip6gre_tap_init,
1536 .ndo_uninit = ip6gre_tunnel_uninit,
1537 .ndo_start_xmit = ip6gre_tunnel_xmit,
1538 .ndo_set_mac_address = eth_mac_addr,
1539 .ndo_validate_addr = eth_validate_addr,
1540 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1541 .ndo_get_stats64 = ip6gre_get_stats64,
1542};
1543
1544static void ip6gre_tap_setup(struct net_device *dev)
1545{
1546
1547 ether_setup(dev);
1548
1549 dev->netdev_ops = &ip6gre_tap_netdev_ops;
1550 dev->destructor = ip6gre_dev_free;
1551
1552 dev->iflink = 0;
1553 dev->features |= NETIF_F_NETNS_LOCAL;
1554}
1555
1556static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1557 struct nlattr *tb[], struct nlattr *data[])
1558{
1559 struct ip6_tnl *nt;
1560 struct net *net = dev_net(dev);
1561 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1562 int err;
1563
1564 nt = netdev_priv(dev);
1565 ip6gre_netlink_parms(data, &nt->parms);
1566
1567 if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
1568 return -EEXIST;
1569
1570 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1571 eth_hw_addr_random(dev);
1572
1573 nt->dev = dev;
1574 ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1575
1576 /* Can use a lockless transmit, unless we generate output sequences */
1577 if (!(nt->parms.o_flags & GRE_SEQ))
1578 dev->features |= NETIF_F_LLTX;
1579
1580 err = register_netdevice(dev);
1581 if (err)
1582 goto out;
1583
1584 dev_hold(dev);
1585 ip6gre_tunnel_link(ign, nt);
1586
1587out:
1588 return err;
1589}
1590
1591static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
1592 struct nlattr *data[])
1593{
1594 struct ip6_tnl *t, *nt;
1595 struct net *net = dev_net(dev);
1596 struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1597 struct __ip6_tnl_parm p;
1598
1599 if (dev == ign->fb_tunnel_dev)
1600 return -EINVAL;
1601
1602 nt = netdev_priv(dev);
1603 ip6gre_netlink_parms(data, &p);
1604
1605 t = ip6gre_tunnel_locate(net, &p, 0);
1606
1607 if (t) {
1608 if (t->dev != dev)
1609 return -EEXIST;
1610 } else {
1611 t = nt;
1612
1613 ip6gre_tunnel_unlink(ign, t);
1614 ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
1615 ip6gre_tunnel_link(ign, t);
1616 netdev_state_change(dev);
1617 }
1618
1619 return 0;
1620}
1621
1622static size_t ip6gre_get_size(const struct net_device *dev)
1623{
1624 return
1625 /* IFLA_GRE_LINK */
1626 nla_total_size(4) +
1627 /* IFLA_GRE_IFLAGS */
1628 nla_total_size(2) +
1629 /* IFLA_GRE_OFLAGS */
1630 nla_total_size(2) +
1631 /* IFLA_GRE_IKEY */
1632 nla_total_size(4) +
1633 /* IFLA_GRE_OKEY */
1634 nla_total_size(4) +
1635 /* IFLA_GRE_LOCAL */
1636 nla_total_size(4) +
1637 /* IFLA_GRE_REMOTE */
1638 nla_total_size(4) +
1639 /* IFLA_GRE_TTL */
1640 nla_total_size(1) +
1641 /* IFLA_GRE_TOS */
1642 nla_total_size(1) +
1643 /* IFLA_GRE_ENCAP_LIMIT */
1644 nla_total_size(1) +
1645 /* IFLA_GRE_FLOWINFO */
1646 nla_total_size(4) +
1647 /* IFLA_GRE_FLAGS */
1648 nla_total_size(4) +
1649 0;
1650}
1651
1652static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1653{
1654 struct ip6_tnl *t = netdev_priv(dev);
1655 struct __ip6_tnl_parm *p = &t->parms;
1656
1657 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1658 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1659 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1660 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1661 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1662 nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
1663 nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
1664 nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
1665 /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
1666 nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
1667 nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
1668 nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
1669 goto nla_put_failure;
1670 return 0;
1671
1672nla_put_failure:
1673 return -EMSGSIZE;
1674}
1675
1676static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
1677 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1678 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1679 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1680 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1681 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1682 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
1683 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
1684 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1685 [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
1686 [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
1687 [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
1688};
1689
1690static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1691 .kind = "ip6gre",
1692 .maxtype = IFLA_GRE_MAX,
1693 .policy = ip6gre_policy,
1694 .priv_size = sizeof(struct ip6_tnl),
1695 .setup = ip6gre_tunnel_setup,
1696 .validate = ip6gre_tunnel_validate,
1697 .newlink = ip6gre_newlink,
1698 .changelink = ip6gre_changelink,
1699 .get_size = ip6gre_get_size,
1700 .fill_info = ip6gre_fill_info,
1701};
1702
1703static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
1704 .kind = "ip6gretap",
1705 .maxtype = IFLA_GRE_MAX,
1706 .policy = ip6gre_policy,
1707 .priv_size = sizeof(struct ip6_tnl),
1708 .setup = ip6gre_tap_setup,
1709 .validate = ip6gre_tap_validate,
1710 .newlink = ip6gre_newlink,
1711 .changelink = ip6gre_changelink,
1712 .get_size = ip6gre_get_size,
1713 .fill_info = ip6gre_fill_info,
1714};
1715
1716/*
1717 * And now the modules code and kernel interface.
1718 */
1719
1720static int __init ip6gre_init(void)
1721{
1722 int err;
1723
1724 pr_info("GRE over IPv6 tunneling driver\n");
1725
1726 err = register_pernet_device(&ip6gre_net_ops);
1727 if (err < 0)
1728 return err;
1729
1730 err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
1731 if (err < 0) {
1732 pr_info("%s: can't add protocol\n", __func__);
1733 goto add_proto_failed;
1734 }
1735
1736 err = rtnl_link_register(&ip6gre_link_ops);
1737 if (err < 0)
1738 goto rtnl_link_failed;
1739
1740 err = rtnl_link_register(&ip6gre_tap_ops);
1741 if (err < 0)
1742 goto tap_ops_failed;
1743
1744out:
1745 return err;
1746
1747tap_ops_failed:
1748 rtnl_link_unregister(&ip6gre_link_ops);
1749rtnl_link_failed:
1750 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1751add_proto_failed:
1752 unregister_pernet_device(&ip6gre_net_ops);
1753 goto out;
1754}
1755
1756static void __exit ip6gre_fini(void)
1757{
1758 rtnl_link_unregister(&ip6gre_tap_ops);
1759 rtnl_link_unregister(&ip6gre_link_ops);
1760 inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1761 unregister_pernet_device(&ip6gre_net_ops);
1762}
1763
1764module_init(ip6gre_init);
1765module_exit(ip6gre_fini);
1766MODULE_LICENSE("GPL");
1767MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
1768MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
1769MODULE_ALIAS_RTNL_LINK("ip6gre");
1770MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5b2d63ed793e..aece3e792f84 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -123,16 +123,11 @@ static int ip6_finish_output2(struct sk_buff *skb)
123 skb->len); 123 skb->len);
124 } 124 }
125 125
126 rcu_read_lock();
127 rt = (struct rt6_info *) dst; 126 rt = (struct rt6_info *) dst;
128 neigh = rt->n; 127 neigh = rt->n;
129 if (neigh) { 128 if (neigh)
130 int res = dst_neigh_output(dst, neigh, skb); 129 return dst_neigh_output(dst, neigh, skb);
131 130
132 rcu_read_unlock();
133 return res;
134 }
135 rcu_read_unlock();
136 IP6_INC_STATS_BH(dev_net(dst->dev), 131 IP6_INC_STATS_BH(dev_net(dst->dev),
137 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
138 kfree_skb(skb); 133 kfree_skb(skb);
@@ -493,7 +488,8 @@ int ip6_forward(struct sk_buff *skb)
493 if (mtu < IPV6_MIN_MTU) 488 if (mtu < IPV6_MIN_MTU)
494 mtu = IPV6_MIN_MTU; 489 mtu = IPV6_MIN_MTU;
495 490
496 if (skb->len > mtu && !skb_is_gso(skb)) { 491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
497 /* Again, force OUTPUT device used as source address */ 493 /* Again, force OUTPUT device used as source address */
498 skb->dev = dst->dev; 494 skb->dev = dst->dev;
499 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -636,7 +632,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
636 /* We must not fragment if the socket is set to force MTU discovery 632 /* We must not fragment if the socket is set to force MTU discovery
637 * or if the skb it not generated by a local socket. 633 * or if the skb it not generated by a local socket.
638 */ 634 */
639 if (unlikely(!skb->local_df && skb->len > mtu)) { 635 if (unlikely(!skb->local_df && skb->len > mtu) ||
636 (IP6CB(skb)->frag_max_size &&
637 IP6CB(skb)->frag_max_size > mtu)) {
640 if (skb->sk && dst_allfrag(skb_dst(skb))) 638 if (skb->sk && dst_allfrag(skb_dst(skb)))
641 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); 639 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
642 640
@@ -980,7 +978,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
980 * dst entry and replace it instead with the 978 * dst entry and replace it instead with the
981 * dst entry of the nexthop router 979 * dst entry of the nexthop router
982 */ 980 */
983 rcu_read_lock();
984 rt = (struct rt6_info *) *dst; 981 rt = (struct rt6_info *) *dst;
985 n = rt->n; 982 n = rt->n;
986 if (n && !(n->nud_state & NUD_VALID)) { 983 if (n && !(n->nud_state & NUD_VALID)) {
@@ -988,7 +985,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
988 struct flowi6 fl_gw6; 985 struct flowi6 fl_gw6;
989 int redirect; 986 int redirect;
990 987
991 rcu_read_unlock();
992 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 988 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
993 (*dst)->dev, 1); 989 (*dst)->dev, 1);
994 990
@@ -1008,8 +1004,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
1008 if ((err = (*dst)->error)) 1004 if ((err = (*dst)->error))
1009 goto out_err_release; 1005 goto out_err_release;
1010 } 1006 }
1011 } else {
1012 rcu_read_unlock();
1013 } 1007 }
1014#endif 1008#endif
1015 1009
@@ -1285,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1285 if (dst_allfrag(rt->dst.path)) 1279 if (dst_allfrag(rt->dst.path))
1286 cork->flags |= IPCORK_ALLFRAG; 1280 cork->flags |= IPCORK_ALLFRAG;
1287 cork->length = 0; 1281 cork->length = 0;
1288 sk->sk_sndmsg_page = NULL;
1289 sk->sk_sndmsg_off = 0;
1290 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; 1282 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1291 length += exthdrlen; 1283 length += exthdrlen;
1292 transhdrlen += exthdrlen; 1284 transhdrlen += exthdrlen;
@@ -1510,48 +1502,31 @@ alloc_new_skb:
1510 } 1502 }
1511 } else { 1503 } else {
1512 int i = skb_shinfo(skb)->nr_frags; 1504 int i = skb_shinfo(skb)->nr_frags;
1513 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; 1505 struct page_frag *pfrag = sk_page_frag(sk);
1514 struct page *page = sk->sk_sndmsg_page;
1515 int off = sk->sk_sndmsg_off;
1516 unsigned int left;
1517
1518 if (page && (left = PAGE_SIZE - off) > 0) {
1519 if (copy >= left)
1520 copy = left;
1521 if (page != skb_frag_page(frag)) {
1522 if (i == MAX_SKB_FRAGS) {
1523 err = -EMSGSIZE;
1524 goto error;
1525 }
1526 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1527 skb_frag_ref(skb, i);
1528 frag = &skb_shinfo(skb)->frags[i];
1529 }
1530 } else if(i < MAX_SKB_FRAGS) {
1531 if (copy > PAGE_SIZE)
1532 copy = PAGE_SIZE;
1533 page = alloc_pages(sk->sk_allocation, 0);
1534 if (page == NULL) {
1535 err = -ENOMEM;
1536 goto error;
1537 }
1538 sk->sk_sndmsg_page = page;
1539 sk->sk_sndmsg_off = 0;
1540 1506
1541 skb_fill_page_desc(skb, i, page, 0, 0); 1507 err = -ENOMEM;
1542 frag = &skb_shinfo(skb)->frags[i]; 1508 if (!sk_page_frag_refill(sk, pfrag))
1543 } else {
1544 err = -EMSGSIZE;
1545 goto error; 1509 goto error;
1510
1511 if (!skb_can_coalesce(skb, i, pfrag->page,
1512 pfrag->offset)) {
1513 err = -EMSGSIZE;
1514 if (i == MAX_SKB_FRAGS)
1515 goto error;
1516
1517 __skb_fill_page_desc(skb, i, pfrag->page,
1518 pfrag->offset, 0);
1519 skb_shinfo(skb)->nr_frags = ++i;
1520 get_page(pfrag->page);
1546 } 1521 }
1522 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1547 if (getfrag(from, 1523 if (getfrag(from,
1548 skb_frag_address(frag) + skb_frag_size(frag), 1524 page_address(pfrag->page) + pfrag->offset,
1549 offset, copy, skb->len, skb) < 0) { 1525 offset, copy, skb->len, skb) < 0)
1550 err = -EFAULT; 1526 goto error_efault;
1551 goto error; 1527
1552 } 1528 pfrag->offset += copy;
1553 sk->sk_sndmsg_off += copy; 1529 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1554 skb_frag_size_add(frag, copy);
1555 skb->len += copy; 1530 skb->len += copy;
1556 skb->data_len += copy; 1531 skb->data_len += copy;
1557 skb->truesize += copy; 1532 skb->truesize += copy;
@@ -1560,7 +1535,11 @@ alloc_new_skb:
1560 offset += copy; 1535 offset += copy;
1561 length -= copy; 1536 length -= copy;
1562 } 1537 }
1538
1563 return 0; 1539 return 0;
1540
1541error_efault:
1542 err = -EFAULT;
1564error: 1543error:
1565 cork->length -= length; 1544 cork->length -= length;
1566 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 1545 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9a1d5fe6aef8..cb7e2ded6f08 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
126 * Locking : hash tables are protected by RCU and RTNL 126 * Locking : hash tables are protected by RCU and RTNL
127 */ 127 */
128 128
129static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 129struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
130{ 130{
131 struct dst_entry *dst = t->dst_cache; 131 struct dst_entry *dst = t->dst_cache;
132 132
@@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
139 139
140 return dst; 140 return dst;
141} 141}
142EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
142 143
143static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) 144void ip6_tnl_dst_reset(struct ip6_tnl *t)
144{ 145{
145 dst_release(t->dst_cache); 146 dst_release(t->dst_cache);
146 t->dst_cache = NULL; 147 t->dst_cache = NULL;
147} 148}
149EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
148 150
149static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 151void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
150{ 152{
151 struct rt6_info *rt = (struct rt6_info *) dst; 153 struct rt6_info *rt = (struct rt6_info *) dst;
152 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 154 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
153 dst_release(t->dst_cache); 155 dst_release(t->dst_cache);
154 t->dst_cache = dst; 156 t->dst_cache = dst;
155} 157}
158EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
156 159
157/** 160/**
158 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 161 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
200 **/ 203 **/
201 204
202static struct ip6_tnl __rcu ** 205static struct ip6_tnl __rcu **
203ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) 206ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
204{ 207{
205 const struct in6_addr *remote = &p->raddr; 208 const struct in6_addr *remote = &p->raddr;
206 const struct in6_addr *local = &p->laddr; 209 const struct in6_addr *local = &p->laddr;
@@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev)
267 * created tunnel or NULL 270 * created tunnel or NULL
268 **/ 271 **/
269 272
270static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) 273static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
271{ 274{
272 struct net_device *dev; 275 struct net_device *dev;
273 struct ip6_tnl *t; 276 struct ip6_tnl *t;
@@ -322,7 +325,7 @@ failed:
322 **/ 325 **/
323 326
324static struct ip6_tnl *ip6_tnl_locate(struct net *net, 327static struct ip6_tnl *ip6_tnl_locate(struct net *net,
325 struct ip6_tnl_parm *p, int create) 328 struct __ip6_tnl_parm *p, int create)
326{ 329{
327 const struct in6_addr *remote = &p->raddr; 330 const struct in6_addr *remote = &p->raddr;
328 const struct in6_addr *local = &p->laddr; 331 const struct in6_addr *local = &p->laddr;
@@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
374 * else index to encapsulation limit 377 * else index to encapsulation limit
375 **/ 378 **/
376 379
377static __u16 380__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
378parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
379{ 381{
380 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; 382 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
381 __u8 nexthdr = ipv6h->nexthdr; 383 __u8 nexthdr = ipv6h->nexthdr;
@@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
425 } 427 }
426 return 0; 428 return 0;
427} 429}
430EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
428 431
429/** 432/**
430 * ip6_tnl_err - tunnel error handler 433 * ip6_tnl_err - tunnel error handler
@@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
480 case ICMPV6_PARAMPROB: 483 case ICMPV6_PARAMPROB:
481 teli = 0; 484 teli = 0;
482 if ((*code) == ICMPV6_HDR_FIELD) 485 if ((*code) == ICMPV6_HDR_FIELD)
483 teli = parse_tlv_tnl_enc_lim(skb, skb->data); 486 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
484 487
485 if (teli && teli == *info - 2) { 488 if (teli && teli == *info - 2) {
486 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 489 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
@@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
693 IP6_ECN_set_ce(ipv6_hdr(skb)); 696 IP6_ECN_set_ce(ipv6_hdr(skb));
694} 697}
695 698
696static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, 699__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
697 const struct in6_addr *laddr, 700 const struct in6_addr *laddr,
698 const struct in6_addr *raddr) 701 const struct in6_addr *raddr)
699{ 702{
700 struct ip6_tnl_parm *p = &t->parms; 703 struct __ip6_tnl_parm *p = &t->parms;
701 int ltype = ipv6_addr_type(laddr); 704 int ltype = ipv6_addr_type(laddr);
702 int rtype = ipv6_addr_type(raddr); 705 int rtype = ipv6_addr_type(raddr);
703 __u32 flags = 0; 706 __u32 flags = 0;
@@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
715 } 718 }
716 return flags; 719 return flags;
717} 720}
721EXPORT_SYMBOL(ip6_tnl_get_cap);
718 722
719/* called with rcu_read_lock() */ 723/* called with rcu_read_lock() */
720static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, 724int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
721 const struct in6_addr *laddr, 725 const struct in6_addr *laddr,
722 const struct in6_addr *raddr) 726 const struct in6_addr *raddr)
723{ 727{
724 struct ip6_tnl_parm *p = &t->parms; 728 struct __ip6_tnl_parm *p = &t->parms;
725 int ret = 0; 729 int ret = 0;
726 struct net *net = dev_net(t->dev); 730 struct net *net = dev_net(t->dev);
727 731
@@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
740 } 744 }
741 return ret; 745 return ret;
742} 746}
747EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
743 748
744/** 749/**
745 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 750 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
859 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 864 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
860} 865}
861 866
862static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) 867int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
863{ 868{
864 struct ip6_tnl_parm *p = &t->parms; 869 struct __ip6_tnl_parm *p = &t->parms;
865 int ret = 0; 870 int ret = 0;
866 struct net *net = dev_net(t->dev); 871 struct net *net = dev_net(t->dev);
867 872
@@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
885 } 890 }
886 return ret; 891 return ret;
887} 892}
893EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
894
888/** 895/**
889 * ip6_tnl_xmit2 - encapsulate packet and send 896 * ip6_tnl_xmit2 - encapsulate packet and send
890 * @skb: the outgoing socket buffer 897 * @skb: the outgoing socket buffer
@@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1085 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) 1092 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1086 return -1; 1093 return -1;
1087 1094
1088 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); 1095 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1089 if (offset > 0) { 1096 if (offset > 0) {
1090 struct ipv6_tlv_tnl_enc_lim *tel; 1097 struct ipv6_tlv_tnl_enc_lim *tel;
1091 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; 1098 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1152,7 +1159,7 @@ tx_err:
1152static void ip6_tnl_link_config(struct ip6_tnl *t) 1159static void ip6_tnl_link_config(struct ip6_tnl *t)
1153{ 1160{
1154 struct net_device *dev = t->dev; 1161 struct net_device *dev = t->dev;
1155 struct ip6_tnl_parm *p = &t->parms; 1162 struct __ip6_tnl_parm *p = &t->parms;
1156 struct flowi6 *fl6 = &t->fl.u.ip6; 1163 struct flowi6 *fl6 = &t->fl.u.ip6;
1157 1164
1158 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1165 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1215 **/ 1222 **/
1216 1223
1217static int 1224static int
1218ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) 1225ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1219{ 1226{
1220 t->parms.laddr = p->laddr; 1227 t->parms.laddr = p->laddr;
1221 t->parms.raddr = p->raddr; 1228 t->parms.raddr = p->raddr;
@@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
1230 return 0; 1237 return 0;
1231} 1238}
1232 1239
1240static void
1241ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1242{
1243 p->laddr = u->laddr;
1244 p->raddr = u->raddr;
1245 p->flags = u->flags;
1246 p->hop_limit = u->hop_limit;
1247 p->encap_limit = u->encap_limit;
1248 p->flowinfo = u->flowinfo;
1249 p->link = u->link;
1250 p->proto = u->proto;
1251 memcpy(p->name, u->name, sizeof(u->name));
1252}
1253
1254static void
1255ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1256{
1257 u->laddr = p->laddr;
1258 u->raddr = p->raddr;
1259 u->flags = p->flags;
1260 u->hop_limit = p->hop_limit;
1261 u->encap_limit = p->encap_limit;
1262 u->flowinfo = p->flowinfo;
1263 u->link = p->link;
1264 u->proto = p->proto;
1265 memcpy(u->name, p->name, sizeof(u->name));
1266}
1267
1233/** 1268/**
1234 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1269 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1235 * @dev: virtual device associated with tunnel 1270 * @dev: virtual device associated with tunnel
@@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1263{ 1298{
1264 int err = 0; 1299 int err = 0;
1265 struct ip6_tnl_parm p; 1300 struct ip6_tnl_parm p;
1301 struct __ip6_tnl_parm p1;
1266 struct ip6_tnl *t = NULL; 1302 struct ip6_tnl *t = NULL;
1267 struct net *net = dev_net(dev); 1303 struct net *net = dev_net(dev);
1268 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1304 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -1274,11 +1310,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1274 err = -EFAULT; 1310 err = -EFAULT;
1275 break; 1311 break;
1276 } 1312 }
1277 t = ip6_tnl_locate(net, &p, 0); 1313 ip6_tnl_parm_from_user(&p1, &p);
1314 t = ip6_tnl_locate(net, &p1, 0);
1315 } else {
1316 memset(&p, 0, sizeof(p));
1278 } 1317 }
1279 if (t == NULL) 1318 if (t == NULL)
1280 t = netdev_priv(dev); 1319 t = netdev_priv(dev);
1281 memcpy(&p, &t->parms, sizeof (p)); 1320 ip6_tnl_parm_to_user(&p, &t->parms);
1282 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 1321 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1283 err = -EFAULT; 1322 err = -EFAULT;
1284 } 1323 }
@@ -1295,7 +1334,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1295 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1334 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1296 p.proto != 0) 1335 p.proto != 0)
1297 break; 1336 break;
1298 t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); 1337 ip6_tnl_parm_from_user(&p1, &p);
1338 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1299 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1339 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1300 if (t != NULL) { 1340 if (t != NULL) {
1301 if (t->dev != dev) { 1341 if (t->dev != dev) {
@@ -1307,13 +1347,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1307 1347
1308 ip6_tnl_unlink(ip6n, t); 1348 ip6_tnl_unlink(ip6n, t);
1309 synchronize_net(); 1349 synchronize_net();
1310 err = ip6_tnl_change(t, &p); 1350 err = ip6_tnl_change(t, &p1);
1311 ip6_tnl_link(ip6n, t); 1351 ip6_tnl_link(ip6n, t);
1312 netdev_state_change(dev); 1352 netdev_state_change(dev);
1313 } 1353 }
1314 if (t) { 1354 if (t) {
1315 err = 0; 1355 err = 0;
1316 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) 1356 ip6_tnl_parm_to_user(&p, &t->parms);
1357 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1317 err = -EFAULT; 1358 err = -EFAULT;
1318 1359
1319 } else 1360 } else
@@ -1329,7 +1370,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1329 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1370 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1330 break; 1371 break;
1331 err = -ENOENT; 1372 err = -ENOENT;
1332 if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) 1373 ip6_tnl_parm_from_user(&p1, &p);
1374 t = ip6_tnl_locate(net, &p1, 0);
1375 if (t == NULL)
1333 break; 1376 break;
1334 err = -EPERM; 1377 err = -EPERM;
1335 if (t->dev == ip6n->fb_tnl_dev) 1378 if (t->dev == ip6n->fb_tnl_dev)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 4532973f0dd4..08ea3f0b6e55 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
838 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 838 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
839 skb_trim(skb, nlh->nlmsg_len); 839 skb_trim(skb, nlh->nlmsg_len);
840 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 840 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
841 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 841 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
842 } else 842 } else
843 kfree_skb(skb); 843 kfree_skb(skb);
844 } 844 }
@@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1052 skb_trim(skb, nlh->nlmsg_len); 1052 skb_trim(skb, nlh->nlmsg_len);
1053 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1053 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1054 } 1054 }
1055 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 1055 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1056 } else 1056 } else
1057 ip6_mr_forward(net, mrt, skb, c); 1057 ip6_mr_forward(net, mrt, skb, c);
1058 } 1058 }
@@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net,
2202} 2202}
2203 2203
2204static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, 2204static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2205 u32 pid, u32 seq, struct mfc6_cache *c) 2205 u32 portid, u32 seq, struct mfc6_cache *c)
2206{ 2206{
2207 struct nlmsghdr *nlh; 2207 struct nlmsghdr *nlh;
2208 struct rtmsg *rtm; 2208 struct rtmsg *rtm;
2209 2209
2210 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); 2210 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2211 if (nlh == NULL) 2211 if (nlh == NULL)
2212 return -EMSGSIZE; 2212 return -EMSGSIZE;
2213 2213
@@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2260 if (e < s_e) 2260 if (e < s_e)
2261 goto next_entry; 2261 goto next_entry;
2262 if (ip6mr_fill_mroute(mrt, skb, 2262 if (ip6mr_fill_mroute(mrt, skb,
2263 NETLINK_CB(cb->skb).pid, 2263 NETLINK_CB(cb->skb).portid,
2264 cb->nlh->nlmsg_seq, 2264 cb->nlh->nlmsg_seq,
2265 mfc) < 0) 2265 mfc) < 0)
2266 goto done; 2266 goto done;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index db31561cc8df..429089cb073d 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -15,6 +15,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
15{ 15{
16 struct net *net = dev_net(skb_dst(skb)->dev); 16 struct net *net = dev_net(skb_dst(skb)->dev);
17 const struct ipv6hdr *iph = ipv6_hdr(skb); 17 const struct ipv6hdr *iph = ipv6_hdr(skb);
18 unsigned int hh_len;
18 struct dst_entry *dst; 19 struct dst_entry *dst;
19 struct flowi6 fl6 = { 20 struct flowi6 fl6 = {
20 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 21 .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -47,6 +48,13 @@ int ip6_route_me_harder(struct sk_buff *skb)
47 } 48 }
48#endif 49#endif
49 50
51 /* Change in oif may mean change in hh_len. */
52 hh_len = skb_dst(skb)->dev->hard_header_len;
53 if (skb_headroom(skb) < hh_len &&
54 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
55 0, GFP_ATOMIC))
56 return -1;
57
50 return 0; 58 return 0;
51} 59}
52EXPORT_SYMBOL(ip6_route_me_harder); 60EXPORT_SYMBOL(ip6_route_me_harder);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 10135342799e..c72532a60d88 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -181,9 +181,44 @@ config IP6_NF_SECURITY
181 help 181 help
182 This option adds a `security' table to iptables, for use 182 This option adds a `security' table to iptables, for use
183 with Mandatory Access Control (MAC) policy. 183 with Mandatory Access Control (MAC) policy.
184 184
185 If unsure, say N. 185 If unsure, say N.
186 186
187config NF_NAT_IPV6
188 tristate "IPv6 NAT"
189 depends on NF_CONNTRACK_IPV6
190 depends on NETFILTER_ADVANCED
191 select NF_NAT
192 help
193 The IPv6 NAT option allows masquerading, port forwarding and other
194 forms of full Network Address Port Translation. It is controlled by
195 the `nat' table in ip6tables, see the man page for ip6tables(8).
196
197 To compile it as a module, choose M here. If unsure, say N.
198
199if NF_NAT_IPV6
200
201config IP6_NF_TARGET_MASQUERADE
202 tristate "MASQUERADE target support"
203 help
204 Masquerading is a special case of NAT: all outgoing connections are
205 changed to seem to come from a particular interface's address, and
206 if the interface goes down, those connections are lost. This is
207 only useful for dialup accounts with dynamic IP address (ie. your IP
208 address will be different on next dialup).
209
210 To compile it as a module, choose M here. If unsure, say N.
211
212config IP6_NF_TARGET_NPT
213 tristate "NPT (Network Prefix translation) target support"
214 help
215 This option adds the `SNPT' and `DNPT' target, which perform
216 stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
217
218 To compile it as a module, choose M here. If unsure, say N.
219
220endif # NF_NAT_IPV6
221
187endif # IP6_NF_IPTABLES 222endif # IP6_NF_IPTABLES
188 223
189endmenu 224endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 534d3f216f7b..2d11fcc2cf3c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o 8obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
9obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o 9obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
10obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 10obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
11obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
11 12
12# objects for l3 independent conntrack 13# objects for l3 independent conntrack
13nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15# l3 independent conntrack 16# l3 independent conntrack
16obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
17 18
19nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
20obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
21
18# defrag 22# defrag
19nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o 23nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
20obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o 24obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -30,4 +34,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
30obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 34obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
31 35
32# targets 36# targets
37obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
38obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
33obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 39obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 000000000000..60e9053bab05
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,135 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
9 * NAT funded by Astaro.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/netdevice.h>
15#include <linux/ipv6.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv6.h>
18#include <linux/netfilter/x_tables.h>
19#include <net/netfilter/nf_nat.h>
20#include <net/addrconf.h>
21#include <net/ipv6.h>
22
23static unsigned int
24masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
25{
26 const struct nf_nat_range *range = par->targinfo;
27 enum ip_conntrack_info ctinfo;
28 struct in6_addr src;
29 struct nf_conn *ct;
30 struct nf_nat_range newrange;
31
32 ct = nf_ct_get(skb, &ctinfo);
33 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
34 ctinfo == IP_CT_RELATED_REPLY));
35
36 if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
37 &ipv6_hdr(skb)->daddr, 0, &src) < 0)
38 return NF_DROP;
39
40 nfct_nat(ct)->masq_index = par->out->ifindex;
41
42 newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
43 newrange.min_addr.in6 = src;
44 newrange.max_addr.in6 = src;
45 newrange.min_proto = range->min_proto;
46 newrange.max_proto = range->max_proto;
47
48 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
49}
50
51static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
52{
53 const struct nf_nat_range *range = par->targinfo;
54
55 if (range->flags & NF_NAT_RANGE_MAP_IPS)
56 return -EINVAL;
57 return 0;
58}
59
60static int device_cmp(struct nf_conn *ct, void *ifindex)
61{
62 const struct nf_conn_nat *nat = nfct_nat(ct);
63
64 if (!nat)
65 return 0;
66 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
67 return 0;
68 return nat->masq_index == (int)(long)ifindex;
69}
70
71static int masq_device_event(struct notifier_block *this,
72 unsigned long event, void *ptr)
73{
74 const struct net_device *dev = ptr;
75 struct net *net = dev_net(dev);
76
77 if (event == NETDEV_DOWN)
78 nf_ct_iterate_cleanup(net, device_cmp,
79 (void *)(long)dev->ifindex);
80
81 return NOTIFY_DONE;
82}
83
84static struct notifier_block masq_dev_notifier = {
85 .notifier_call = masq_device_event,
86};
87
88static int masq_inet_event(struct notifier_block *this,
89 unsigned long event, void *ptr)
90{
91 struct inet6_ifaddr *ifa = ptr;
92
93 return masq_device_event(this, event, ifa->idev->dev);
94}
95
96static struct notifier_block masq_inet_notifier = {
97 .notifier_call = masq_inet_event,
98};
99
100static struct xt_target masquerade_tg6_reg __read_mostly = {
101 .name = "MASQUERADE",
102 .family = NFPROTO_IPV6,
103 .checkentry = masquerade_tg6_checkentry,
104 .target = masquerade_tg6,
105 .targetsize = sizeof(struct nf_nat_range),
106 .table = "nat",
107 .hooks = 1 << NF_INET_POST_ROUTING,
108 .me = THIS_MODULE,
109};
110
111static int __init masquerade_tg6_init(void)
112{
113 int err;
114
115 err = xt_register_target(&masquerade_tg6_reg);
116 if (err == 0) {
117 register_netdevice_notifier(&masq_dev_notifier);
118 register_inet6addr_notifier(&masq_inet_notifier);
119 }
120
121 return err;
122}
123static void __exit masquerade_tg6_exit(void)
124{
125 unregister_inet6addr_notifier(&masq_inet_notifier);
126 unregister_netdevice_notifier(&masq_dev_notifier);
127 xt_unregister_target(&masquerade_tg6_reg);
128}
129
130module_init(masquerade_tg6_init);
131module_exit(masquerade_tg6_exit);
132
133MODULE_LICENSE("GPL");
134MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
135MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 000000000000..e9486915eff6
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,165 @@
1/*
2 * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/ipv6.h>
12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv6.h>
14#include <linux/netfilter_ipv6/ip6t_NPT.h>
15#include <linux/netfilter/x_tables.h>
16
17static __sum16 csum16_complement(__sum16 a)
18{
19 return (__force __sum16)(0xffff - (__force u16)a);
20}
21
22static __sum16 csum16_add(__sum16 a, __sum16 b)
23{
24 u16 sum;
25
26 sum = (__force u16)a + (__force u16)b;
27 sum += (__force u16)a < (__force u16)b;
28 return (__force __sum16)sum;
29}
30
31static __sum16 csum16_sub(__sum16 a, __sum16 b)
32{
33 return csum16_add(a, csum16_complement(b));
34}
35
36static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
37{
38 struct ip6t_npt_tginfo *npt = par->targinfo;
39 __sum16 src_sum = 0, dst_sum = 0;
40 unsigned int i;
41
42 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
43 return -EINVAL;
44
45 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
46 src_sum = csum16_add(src_sum,
47 (__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
48 dst_sum = csum16_add(dst_sum,
49 (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
50 }
51
52 npt->adjustment = csum16_sub(src_sum, dst_sum);
53 return 0;
54}
55
56static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
57 struct in6_addr *addr)
58{
59 unsigned int pfx_len;
60 unsigned int i, idx;
61 __be32 mask;
62 __sum16 sum;
63
64 pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
65 for (i = 0; i < pfx_len; i += 32) {
66 if (pfx_len - i >= 32)
67 mask = 0;
68 else
69 mask = htonl(~((1 << (pfx_len - i)) - 1));
70
71 idx = i / 32;
72 addr->s6_addr32[idx] &= mask;
73 addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
74 }
75
76 if (pfx_len <= 48)
77 idx = 3;
78 else {
79 for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
80 if ((__force __sum16)addr->s6_addr16[idx] !=
81 CSUM_MANGLED_0)
82 break;
83 }
84 if (idx == ARRAY_SIZE(addr->s6_addr16))
85 return false;
86 }
87
88 sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
89 npt->adjustment);
90 if (sum == CSUM_MANGLED_0)
91 sum = 0;
92 *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
93
94 return true;
95}
96
97static unsigned int
98ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
99{
100 const struct ip6t_npt_tginfo *npt = par->targinfo;
101
102 if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
103 icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
104 offsetof(struct ipv6hdr, saddr));
105 return NF_DROP;
106 }
107 return XT_CONTINUE;
108}
109
110static unsigned int
111ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
112{
113 const struct ip6t_npt_tginfo *npt = par->targinfo;
114
115 if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
116 icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
117 offsetof(struct ipv6hdr, daddr));
118 return NF_DROP;
119 }
120 return XT_CONTINUE;
121}
122
123static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
124 {
125 .name = "SNPT",
126 .target = ip6t_snpt_tg,
127 .targetsize = sizeof(struct ip6t_npt_tginfo),
128 .checkentry = ip6t_npt_checkentry,
129 .family = NFPROTO_IPV6,
130 .hooks = (1 << NF_INET_LOCAL_IN) |
131 (1 << NF_INET_POST_ROUTING),
132 .me = THIS_MODULE,
133 },
134 {
135 .name = "DNPT",
136 .target = ip6t_dnpt_tg,
137 .targetsize = sizeof(struct ip6t_npt_tginfo),
138 .checkentry = ip6t_npt_checkentry,
139 .family = NFPROTO_IPV6,
140 .hooks = (1 << NF_INET_PRE_ROUTING) |
141 (1 << NF_INET_LOCAL_OUT),
142 .me = THIS_MODULE,
143 },
144};
145
146static int __init ip6t_npt_init(void)
147{
148 return xt_register_targets(ip6t_npt_target_reg,
149 ARRAY_SIZE(ip6t_npt_target_reg));
150}
151
152static void __exit ip6t_npt_exit(void)
153{
154 xt_unregister_targets(ip6t_npt_target_reg,
155 ARRAY_SIZE(ip6t_npt_target_reg));
156}
157
158module_init(ip6t_npt_init);
159module_exit(ip6t_npt_exit);
160
161MODULE_LICENSE("GPL");
162MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
163MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
164MODULE_ALIAS("ip6t_SNPT");
165MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 325e59a0224f..beb5777d2043 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
61 net->ipv6.ip6table_filter = 61 net->ipv6.ip6table_filter =
62 ip6t_register_table(net, &packet_filter, repl); 62 ip6t_register_table(net, &packet_filter, repl);
63 kfree(repl); 63 kfree(repl);
64 if (IS_ERR(net->ipv6.ip6table_filter)) 64 return PTR_RET(net->ipv6.ip6table_filter);
65 return PTR_ERR(net->ipv6.ip6table_filter);
66 return 0;
67} 65}
68 66
69static void __net_exit ip6table_filter_net_exit(struct net *net) 67static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 4d782405f125..7431121b87de 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
97 net->ipv6.ip6table_mangle = 97 net->ipv6.ip6table_mangle =
98 ip6t_register_table(net, &packet_mangler, repl); 98 ip6t_register_table(net, &packet_mangler, repl);
99 kfree(repl); 99 kfree(repl);
100 if (IS_ERR(net->ipv6.ip6table_mangle)) 100 return PTR_RET(net->ipv6.ip6table_mangle);
101 return PTR_ERR(net->ipv6.ip6table_mangle);
102 return 0;
103} 101}
104 102
105static void __net_exit ip6table_mangle_net_exit(struct net *net) 103static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 000000000000..e418bd6350a4
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,321 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
9 * funded by Astaro.
10 */
11
12#include <linux/module.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter_ipv6.h>
15#include <linux/netfilter_ipv6/ip6_tables.h>
16#include <linux/ipv6.h>
17#include <net/ipv6.h>
18
19#include <net/netfilter/nf_nat.h>
20#include <net/netfilter/nf_nat_core.h>
21#include <net/netfilter/nf_nat_l3proto.h>
22
23static const struct xt_table nf_nat_ipv6_table = {
24 .name = "nat",
25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
26 (1 << NF_INET_POST_ROUTING) |
27 (1 << NF_INET_LOCAL_OUT) |
28 (1 << NF_INET_LOCAL_IN),
29 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6,
31};
32
33static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
34{
35 /* Force range to this IP; let proto decide mapping for
36 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
37 */
38 struct nf_nat_range range;
39
40 range.flags = 0;
41 pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
42 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
43 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
44 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
45
46 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
47}
48
49static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
50 const struct net_device *in,
51 const struct net_device *out,
52 struct nf_conn *ct)
53{
54 struct net *net = nf_ct_net(ct);
55 unsigned int ret;
56
57 ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
58 if (ret == NF_ACCEPT) {
59 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
60 ret = alloc_null_binding(ct, hooknum);
61 }
62 return ret;
63}
64
65static unsigned int
66nf_nat_ipv6_fn(unsigned int hooknum,
67 struct sk_buff *skb,
68 const struct net_device *in,
69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *))
71{
72 struct nf_conn *ct;
73 enum ip_conntrack_info ctinfo;
74 struct nf_conn_nat *nat;
75 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
76 __be16 frag_off;
77 int hdrlen;
78 u8 nexthdr;
79
80 ct = nf_ct_get(skb, &ctinfo);
81 /* Can't track? It's not due to stress, or conntrack would
82 * have dropped it. Hence it's the user's responsibilty to
83 * packet filter it out, or implement conntrack/NAT for that
84 * protocol. 8) --RR
85 */
86 if (!ct)
87 return NF_ACCEPT;
88
89 /* Don't try to NAT if this packet is not conntracked */
90 if (nf_ct_is_untracked(ct))
91 return NF_ACCEPT;
92
93 nat = nfct_nat(ct);
94 if (!nat) {
95 /* NAT module was loaded late. */
96 if (nf_ct_is_confirmed(ct))
97 return NF_ACCEPT;
98 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
99 if (nat == NULL) {
100 pr_debug("failed to add NAT extension\n");
101 return NF_ACCEPT;
102 }
103 }
104
105 switch (ctinfo) {
106 case IP_CT_RELATED:
107 case IP_CT_RELATED_REPLY:
108 nexthdr = ipv6_hdr(skb)->nexthdr;
109 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
110 &nexthdr, &frag_off);
111
112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
114 hooknum, hdrlen))
115 return NF_DROP;
116 else
117 return NF_ACCEPT;
118 }
119 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
120 case IP_CT_NEW:
121 /* Seen it before? This can happen for loopback, retrans,
122 * or local packets.
123 */
124 if (!nf_nat_initialized(ct, maniptype)) {
125 unsigned int ret;
126
127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
128 if (ret != NF_ACCEPT)
129 return ret;
130 } else
131 pr_debug("Already setup manip %s for ct %p\n",
132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
133 ct);
134 break;
135
136 default:
137 /* ESTABLISHED */
138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 ctinfo == IP_CT_ESTABLISHED_REPLY);
140 }
141
142 return nf_nat_packet(ct, ctinfo, hooknum, skb);
143}
144
145static unsigned int
146nf_nat_ipv6_in(unsigned int hooknum,
147 struct sk_buff *skb,
148 const struct net_device *in,
149 const struct net_device *out,
150 int (*okfn)(struct sk_buff *))
151{
152 unsigned int ret;
153 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
154
155 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
156 if (ret != NF_DROP && ret != NF_STOLEN &&
157 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
158 skb_dst_drop(skb);
159
160 return ret;
161}
162
163static unsigned int
164nf_nat_ipv6_out(unsigned int hooknum,
165 struct sk_buff *skb,
166 const struct net_device *in,
167 const struct net_device *out,
168 int (*okfn)(struct sk_buff *))
169{
170#ifdef CONFIG_XFRM
171 const struct nf_conn *ct;
172 enum ip_conntrack_info ctinfo;
173#endif
174 unsigned int ret;
175
176 /* root is playing with raw sockets. */
177 if (skb->len < sizeof(struct ipv6hdr))
178 return NF_ACCEPT;
179
180 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
181#ifdef CONFIG_XFRM
182 if (ret != NF_DROP && ret != NF_STOLEN &&
183 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
184 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
185 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
186
187 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
188 &ct->tuplehash[!dir].tuple.dst.u3) ||
189 (ct->tuplehash[dir].tuple.src.u.all !=
190 ct->tuplehash[!dir].tuple.dst.u.all))
191 if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
192 ret = NF_DROP;
193 }
194#endif
195 return ret;
196}
197
198static unsigned int
199nf_nat_ipv6_local_fn(unsigned int hooknum,
200 struct sk_buff *skb,
201 const struct net_device *in,
202 const struct net_device *out,
203 int (*okfn)(struct sk_buff *))
204{
205 const struct nf_conn *ct;
206 enum ip_conntrack_info ctinfo;
207 unsigned int ret;
208
209 /* root is playing with raw sockets. */
210 if (skb->len < sizeof(struct ipv6hdr))
211 return NF_ACCEPT;
212
213 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
214 if (ret != NF_DROP && ret != NF_STOLEN &&
215 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
216 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
217
218 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
219 &ct->tuplehash[!dir].tuple.src.u3)) {
220 if (ip6_route_me_harder(skb))
221 ret = NF_DROP;
222 }
223#ifdef CONFIG_XFRM
224 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
225 ct->tuplehash[dir].tuple.dst.u.all !=
226 ct->tuplehash[!dir].tuple.src.u.all)
227 if (nf_xfrm_me_harder(skb, AF_INET6))
228 ret = NF_DROP;
229#endif
230 }
231 return ret;
232}
233
234static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
235 /* Before packet filtering, change destination */
236 {
237 .hook = nf_nat_ipv6_in,
238 .owner = THIS_MODULE,
239 .pf = NFPROTO_IPV6,
240 .hooknum = NF_INET_PRE_ROUTING,
241 .priority = NF_IP6_PRI_NAT_DST,
242 },
243 /* After packet filtering, change source */
244 {
245 .hook = nf_nat_ipv6_out,
246 .owner = THIS_MODULE,
247 .pf = NFPROTO_IPV6,
248 .hooknum = NF_INET_POST_ROUTING,
249 .priority = NF_IP6_PRI_NAT_SRC,
250 },
251 /* Before packet filtering, change destination */
252 {
253 .hook = nf_nat_ipv6_local_fn,
254 .owner = THIS_MODULE,
255 .pf = NFPROTO_IPV6,
256 .hooknum = NF_INET_LOCAL_OUT,
257 .priority = NF_IP6_PRI_NAT_DST,
258 },
259 /* After packet filtering, change source */
260 {
261 .hook = nf_nat_ipv6_fn,
262 .owner = THIS_MODULE,
263 .pf = NFPROTO_IPV6,
264 .hooknum = NF_INET_LOCAL_IN,
265 .priority = NF_IP6_PRI_NAT_SRC,
266 },
267};
268
269static int __net_init ip6table_nat_net_init(struct net *net)
270{
271 struct ip6t_replace *repl;
272
273 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
274 if (repl == NULL)
275 return -ENOMEM;
276 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
277 kfree(repl);
278 if (IS_ERR(net->ipv6.ip6table_nat))
279 return PTR_ERR(net->ipv6.ip6table_nat);
280 return 0;
281}
282
283static void __net_exit ip6table_nat_net_exit(struct net *net)
284{
285 ip6t_unregister_table(net, net->ipv6.ip6table_nat);
286}
287
288static struct pernet_operations ip6table_nat_net_ops = {
289 .init = ip6table_nat_net_init,
290 .exit = ip6table_nat_net_exit,
291};
292
293static int __init ip6table_nat_init(void)
294{
295 int err;
296
297 err = register_pernet_subsys(&ip6table_nat_net_ops);
298 if (err < 0)
299 goto err1;
300
301 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
302 if (err < 0)
303 goto err2;
304 return 0;
305
306err2:
307 unregister_pernet_subsys(&ip6table_nat_net_ops);
308err1:
309 return err;
310}
311
312static void __exit ip6table_nat_exit(void)
313{
314 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
315 unregister_pernet_subsys(&ip6table_nat_net_ops);
316}
317
318module_init(ip6table_nat_init);
319module_exit(ip6table_nat_exit);
320
321MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5b9926a011bd..60d1bddff7a0 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
40 net->ipv6.ip6table_raw = 40 net->ipv6.ip6table_raw =
41 ip6t_register_table(net, &packet_raw, repl); 41 ip6t_register_table(net, &packet_raw, repl);
42 kfree(repl); 42 kfree(repl);
43 if (IS_ERR(net->ipv6.ip6table_raw)) 43 return PTR_RET(net->ipv6.ip6table_raw);
44 return PTR_ERR(net->ipv6.ip6table_raw);
45 return 0;
46} 44}
47 45
48static void __net_exit ip6table_raw_net_exit(struct net *net) 46static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 91aa2b4d83c9..db155351339c 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
58 net->ipv6.ip6table_security = 58 net->ipv6.ip6table_security =
59 ip6t_register_table(net, &security_table, repl); 59 ip6t_register_table(net, &security_table, repl);
60 kfree(repl); 60 kfree(repl);
61 if (IS_ERR(net->ipv6.ip6table_security)) 61 return PTR_RET(net->ipv6.ip6table_security);
62 return PTR_ERR(net->ipv6.ip6table_security);
63
64 return 0;
65} 62}
66 63
67static void __net_exit ip6table_security_net_exit(struct net *net) 64static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4794f96cf2e0..8860d23e61cf 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
28#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/nf_conntrack_zones.h> 29#include <net/netfilter/nf_conntrack_zones.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31#include <net/netfilter/nf_nat_helper.h>
31#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 32#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32#include <net/netfilter/nf_log.h> 33#include <net/netfilter/nf_log.h>
33 34
@@ -64,82 +65,31 @@ static int ipv6_print_tuple(struct seq_file *s,
64 tuple->src.u3.ip6, tuple->dst.u3.ip6); 65 tuple->src.u3.ip6, tuple->dst.u3.ip6);
65} 66}
66 67
67/*
68 * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
69 *
70 * This function parses (probably truncated) exthdr set "hdr"
71 * of length "len". "nexthdrp" initially points to some place,
72 * where type of the first header can be found.
73 *
74 * It skips all well-known exthdrs, and returns pointer to the start
75 * of unparsable area i.e. the first header with unknown type.
76 * if success, *nexthdr is updated by type/protocol of this header.
77 *
78 * NOTES: - it may return pointer pointing beyond end of packet,
79 * if the last recognized header is truncated in the middle.
80 * - if packet is truncated, so that all parsed headers are skipped,
81 * it returns -1.
82 * - if packet is fragmented, return pointer of the fragment header.
83 * - ESP is unparsable for now and considered like
84 * normal payload protocol.
85 * - Note also special handling of AUTH header. Thanks to IPsec wizards.
86 */
87
88static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
89 u8 *nexthdrp, int len)
90{
91 u8 nexthdr = *nexthdrp;
92
93 while (ipv6_ext_hdr(nexthdr)) {
94 struct ipv6_opt_hdr hdr;
95 int hdrlen;
96
97 if (len < (int)sizeof(struct ipv6_opt_hdr))
98 return -1;
99 if (nexthdr == NEXTHDR_NONE)
100 break;
101 if (nexthdr == NEXTHDR_FRAGMENT)
102 break;
103 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
104 BUG();
105 if (nexthdr == NEXTHDR_AUTH)
106 hdrlen = (hdr.hdrlen+2)<<2;
107 else
108 hdrlen = ipv6_optlen(&hdr);
109
110 nexthdr = hdr.nexthdr;
111 len -= hdrlen;
112 start += hdrlen;
113 }
114
115 *nexthdrp = nexthdr;
116 return start;
117}
118
119static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 68static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
120 unsigned int *dataoff, u_int8_t *protonum) 69 unsigned int *dataoff, u_int8_t *protonum)
121{ 70{
122 unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 71 unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
123 unsigned char pnum; 72 __be16 frag_off;
124 int protoff; 73 int protoff;
74 u8 nexthdr;
125 75
126 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 76 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
127 &pnum, sizeof(pnum)) != 0) { 77 &nexthdr, sizeof(nexthdr)) != 0) {
128 pr_debug("ip6_conntrack_core: can't get nexthdr\n"); 78 pr_debug("ip6_conntrack_core: can't get nexthdr\n");
129 return -NF_ACCEPT; 79 return -NF_ACCEPT;
130 } 80 }
131 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); 81 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
132 /* 82 /*
133 * (protoff == skb->len) mean that the packet doesn't have no data 83 * (protoff == skb->len) mean that the packet doesn't have no data
134 * except of IPv6 & ext headers. but it's tracked anyway. - YK 84 * except of IPv6 & ext headers. but it's tracked anyway. - YK
135 */ 85 */
136 if ((protoff < 0) || (protoff > skb->len)) { 86 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
137 pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); 87 pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
138 return -NF_ACCEPT; 88 return -NF_ACCEPT;
139 } 89 }
140 90
141 *dataoff = protoff; 91 *dataoff = protoff;
142 *protonum = pnum; 92 *protonum = nexthdr;
143 return NF_ACCEPT; 93 return NF_ACCEPT;
144} 94}
145 95
@@ -153,10 +103,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
153 const struct nf_conn_help *help; 103 const struct nf_conn_help *help;
154 const struct nf_conntrack_helper *helper; 104 const struct nf_conntrack_helper *helper;
155 enum ip_conntrack_info ctinfo; 105 enum ip_conntrack_info ctinfo;
156 unsigned int ret, protoff; 106 unsigned int ret;
157 unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; 107 __be16 frag_off;
158 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 108 int protoff;
159 109 u8 nexthdr;
160 110
161 /* This is where we call the helper: as the packet goes out. */ 111 /* This is where we call the helper: as the packet goes out. */
162 ct = nf_ct_get(skb, &ctinfo); 112 ct = nf_ct_get(skb, &ctinfo);
@@ -171,9 +121,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
171 if (!helper) 121 if (!helper)
172 return NF_ACCEPT; 122 return NF_ACCEPT;
173 123
174 protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, 124 nexthdr = ipv6_hdr(skb)->nexthdr;
175 skb->len - extoff); 125 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
176 if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { 126 &frag_off);
127 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
177 pr_debug("proto header not found\n"); 128 pr_debug("proto header not found\n");
178 return NF_ACCEPT; 129 return NF_ACCEPT;
179 } 130 }
@@ -192,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
192 const struct net_device *out, 143 const struct net_device *out,
193 int (*okfn)(struct sk_buff *)) 144 int (*okfn)(struct sk_buff *))
194{ 145{
146 struct nf_conn *ct;
147 enum ip_conntrack_info ctinfo;
148 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
149 int protoff;
150 __be16 frag_off;
151
152 ct = nf_ct_get(skb, &ctinfo);
153 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
154 goto out;
155
156 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
157 &frag_off);
158 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
159 pr_debug("proto header not found\n");
160 goto out;
161 }
162
163 /* adjust seqs for loopback traffic only in outgoing direction */
164 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
165 !nf_is_loopback_packet(skb)) {
166 typeof(nf_nat_seq_adjust_hook) seq_adjust;
167
168 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
169 if (!seq_adjust ||
170 !seq_adjust(skb, ct, ctinfo, protoff)) {
171 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
172 return NF_DROP;
173 }
174 }
175out:
195 /* We've seen it coming out the other side: confirm it */ 176 /* We've seen it coming out the other side: confirm it */
196 return nf_conntrack_confirm(skb); 177 return nf_conntrack_confirm(skb);
197} 178}
@@ -199,9 +180,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
199static unsigned int __ipv6_conntrack_in(struct net *net, 180static unsigned int __ipv6_conntrack_in(struct net *net,
200 unsigned int hooknum, 181 unsigned int hooknum,
201 struct sk_buff *skb, 182 struct sk_buff *skb,
183 const struct net_device *in,
184 const struct net_device *out,
202 int (*okfn)(struct sk_buff *)) 185 int (*okfn)(struct sk_buff *))
203{ 186{
204 struct sk_buff *reasm = skb->nfct_reasm; 187 struct sk_buff *reasm = skb->nfct_reasm;
188 const struct nf_conn_help *help;
189 struct nf_conn *ct;
190 enum ip_conntrack_info ctinfo;
205 191
206 /* This packet is fragmented and has reassembled packet. */ 192 /* This packet is fragmented and has reassembled packet. */
207 if (reasm) { 193 if (reasm) {
@@ -213,6 +199,25 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
213 if (ret != NF_ACCEPT) 199 if (ret != NF_ACCEPT)
214 return ret; 200 return ret;
215 } 201 }
202
203 /* Conntrack helpers need the entire reassembled packet in the
204 * POST_ROUTING hook. In case of unconfirmed connections NAT
205 * might reassign a helper, so the entire packet is also
206 * required.
207 */
208 ct = nf_ct_get(reasm, &ctinfo);
209 if (ct != NULL && !nf_ct_is_untracked(ct)) {
210 help = nfct_help(ct);
211 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
212 nf_conntrack_get_reasm(skb);
213 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
214 (struct net_device *)in,
215 (struct net_device *)out,
216 okfn, NF_IP6_PRI_CONNTRACK + 1);
217 return NF_DROP_ERR(-ECANCELED);
218 }
219 }
220
216 nf_conntrack_get(reasm->nfct); 221 nf_conntrack_get(reasm->nfct);
217 skb->nfct = reasm->nfct; 222 skb->nfct = reasm->nfct;
218 skb->nfctinfo = reasm->nfctinfo; 223 skb->nfctinfo = reasm->nfctinfo;
@@ -228,7 +233,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
228 const struct net_device *out, 233 const struct net_device *out,
229 int (*okfn)(struct sk_buff *)) 234 int (*okfn)(struct sk_buff *))
230{ 235{
231 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); 236 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
232} 237}
233 238
234static unsigned int ipv6_conntrack_local(unsigned int hooknum, 239static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +247,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
242 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 247 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
243 return NF_ACCEPT; 248 return NF_ACCEPT;
244 } 249 }
245 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); 250 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
246} 251}
247 252
248static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 253static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c9c78c2e666b..18bd9bbbd1c6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -57,41 +57,27 @@ struct nf_ct_frag6_skb_cb
57 57
58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) 58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
59 59
60struct nf_ct_frag6_queue
61{
62 struct inet_frag_queue q;
63
64 __be32 id; /* fragment id */
65 u32 user;
66 struct in6_addr saddr;
67 struct in6_addr daddr;
68
69 unsigned int csum;
70 __u16 nhoffset;
71};
72
73static struct inet_frags nf_frags; 60static struct inet_frags nf_frags;
74static struct netns_frags nf_init_frags;
75 61
76#ifdef CONFIG_SYSCTL 62#ifdef CONFIG_SYSCTL
77static struct ctl_table nf_ct_frag6_sysctl_table[] = { 63static struct ctl_table nf_ct_frag6_sysctl_table[] = {
78 { 64 {
79 .procname = "nf_conntrack_frag6_timeout", 65 .procname = "nf_conntrack_frag6_timeout",
80 .data = &nf_init_frags.timeout, 66 .data = &init_net.nf_frag.frags.timeout,
81 .maxlen = sizeof(unsigned int), 67 .maxlen = sizeof(unsigned int),
82 .mode = 0644, 68 .mode = 0644,
83 .proc_handler = proc_dointvec_jiffies, 69 .proc_handler = proc_dointvec_jiffies,
84 }, 70 },
85 { 71 {
86 .procname = "nf_conntrack_frag6_low_thresh", 72 .procname = "nf_conntrack_frag6_low_thresh",
87 .data = &nf_init_frags.low_thresh, 73 .data = &init_net.nf_frag.frags.low_thresh,
88 .maxlen = sizeof(unsigned int), 74 .maxlen = sizeof(unsigned int),
89 .mode = 0644, 75 .mode = 0644,
90 .proc_handler = proc_dointvec, 76 .proc_handler = proc_dointvec,
91 }, 77 },
92 { 78 {
93 .procname = "nf_conntrack_frag6_high_thresh", 79 .procname = "nf_conntrack_frag6_high_thresh",
94 .data = &nf_init_frags.high_thresh, 80 .data = &init_net.nf_frag.frags.high_thresh,
95 .maxlen = sizeof(unsigned int), 81 .maxlen = sizeof(unsigned int),
96 .mode = 0644, 82 .mode = 0644,
97 .proc_handler = proc_dointvec, 83 .proc_handler = proc_dointvec,
@@ -99,68 +85,86 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
99 { } 85 { }
100}; 86};
101 87
102static struct ctl_table_header *nf_ct_frag6_sysctl_header; 88static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
103#endif
104
105static unsigned int nf_hashfn(struct inet_frag_queue *q)
106{ 89{
107 const struct nf_ct_frag6_queue *nq; 90 struct ctl_table *table;
91 struct ctl_table_header *hdr;
92
93 table = nf_ct_frag6_sysctl_table;
94 if (!net_eq(net, &init_net)) {
95 table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
96 GFP_KERNEL);
97 if (table == NULL)
98 goto err_alloc;
99
100 table[0].data = &net->ipv6.frags.high_thresh;
101 table[1].data = &net->ipv6.frags.low_thresh;
102 table[2].data = &net->ipv6.frags.timeout;
103 }
108 104
109 nq = container_of(q, struct nf_ct_frag6_queue, q); 105 hdr = register_net_sysctl(net, "net/netfilter", table);
110 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); 106 if (hdr == NULL)
107 goto err_reg;
108
109 net->nf_frag.sysctl.frags_hdr = hdr;
110 return 0;
111
112err_reg:
113 if (!net_eq(net, &init_net))
114 kfree(table);
115err_alloc:
116 return -ENOMEM;
111} 117}
112 118
113static void nf_skb_free(struct sk_buff *skb) 119static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
114{ 120{
115 if (NFCT_FRAG6_CB(skb)->orig) 121 struct ctl_table *table;
116 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
117}
118 122
119/* Destruction primitives. */ 123 table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
124 unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
125 if (!net_eq(net, &init_net))
126 kfree(table);
127}
120 128
121static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) 129#else
130static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
122{ 131{
123 inet_frag_put(&fq->q, &nf_frags); 132 return 0;
124} 133}
134static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
135{
136}
137#endif
125 138
126/* Kill fq entry. It is not destroyed immediately, 139static unsigned int nf_hashfn(struct inet_frag_queue *q)
127 * because caller (and someone more) holds reference count.
128 */
129static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
130{ 140{
131 inet_frag_kill(&fq->q, &nf_frags); 141 const struct frag_queue *nq;
142
143 nq = container_of(q, struct frag_queue, q);
144 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
132} 145}
133 146
134static void nf_ct_frag6_evictor(void) 147static void nf_skb_free(struct sk_buff *skb)
135{ 148{
136 local_bh_disable(); 149 if (NFCT_FRAG6_CB(skb)->orig)
137 inet_frag_evictor(&nf_init_frags, &nf_frags); 150 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
138 local_bh_enable();
139} 151}
140 152
141static void nf_ct_frag6_expire(unsigned long data) 153static void nf_ct_frag6_expire(unsigned long data)
142{ 154{
143 struct nf_ct_frag6_queue *fq; 155 struct frag_queue *fq;
144 156 struct net *net;
145 fq = container_of((struct inet_frag_queue *)data,
146 struct nf_ct_frag6_queue, q);
147
148 spin_lock(&fq->q.lock);
149 157
150 if (fq->q.last_in & INET_FRAG_COMPLETE) 158 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
151 goto out; 159 net = container_of(fq->q.net, struct net, nf_frag.frags);
152 160
153 fq_kill(fq); 161 ip6_expire_frag_queue(net, fq, &nf_frags);
154
155out:
156 spin_unlock(&fq->q.lock);
157 fq_put(fq);
158} 162}
159 163
160/* Creation primitives. */ 164/* Creation primitives. */
161 165static inline struct frag_queue *fq_find(struct net *net, __be32 id,
162static __inline__ struct nf_ct_frag6_queue * 166 u32 user, struct in6_addr *src,
163fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) 167 struct in6_addr *dst)
164{ 168{
165 struct inet_frag_queue *q; 169 struct inet_frag_queue *q;
166 struct ip6_create_arg arg; 170 struct ip6_create_arg arg;
@@ -174,22 +178,23 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
174 read_lock_bh(&nf_frags.lock); 178 read_lock_bh(&nf_frags.lock);
175 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); 179 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
176 180
177 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); 181 q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
178 local_bh_enable(); 182 local_bh_enable();
179 if (q == NULL) 183 if (q == NULL)
180 goto oom; 184 goto oom;
181 185
182 return container_of(q, struct nf_ct_frag6_queue, q); 186 return container_of(q, struct frag_queue, q);
183 187
184oom: 188oom:
185 return NULL; 189 return NULL;
186} 190}
187 191
188 192
189static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 193static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
190 const struct frag_hdr *fhdr, int nhoff) 194 const struct frag_hdr *fhdr, int nhoff)
191{ 195{
192 struct sk_buff *prev, *next; 196 struct sk_buff *prev, *next;
197 unsigned int payload_len;
193 int offset, end; 198 int offset, end;
194 199
195 if (fq->q.last_in & INET_FRAG_COMPLETE) { 200 if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -197,8 +202,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
197 goto err; 202 goto err;
198 } 203 }
199 204
205 payload_len = ntohs(ipv6_hdr(skb)->payload_len);
206
200 offset = ntohs(fhdr->frag_off) & ~0x7; 207 offset = ntohs(fhdr->frag_off) & ~0x7;
201 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - 208 end = offset + (payload_len -
202 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 209 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
203 210
204 if ((unsigned int)end > IPV6_MAXPLEN) { 211 if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -307,7 +314,9 @@ found:
307 skb->dev = NULL; 314 skb->dev = NULL;
308 fq->q.stamp = skb->tstamp; 315 fq->q.stamp = skb->tstamp;
309 fq->q.meat += skb->len; 316 fq->q.meat += skb->len;
310 atomic_add(skb->truesize, &nf_init_frags.mem); 317 if (payload_len > fq->q.max_size)
318 fq->q.max_size = payload_len;
319 atomic_add(skb->truesize, &fq->q.net->mem);
311 320
312 /* The first fragment. 321 /* The first fragment.
313 * nhoffset is obtained from the first fragment, of course. 322 * nhoffset is obtained from the first fragment, of course.
@@ -317,12 +326,12 @@ found:
317 fq->q.last_in |= INET_FRAG_FIRST_IN; 326 fq->q.last_in |= INET_FRAG_FIRST_IN;
318 } 327 }
319 write_lock(&nf_frags.lock); 328 write_lock(&nf_frags.lock);
320 list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); 329 list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
321 write_unlock(&nf_frags.lock); 330 write_unlock(&nf_frags.lock);
322 return 0; 331 return 0;
323 332
324discard_fq: 333discard_fq:
325 fq_kill(fq); 334 inet_frag_kill(&fq->q, &nf_frags);
326err: 335err:
327 return -1; 336 return -1;
328} 337}
@@ -337,12 +346,12 @@ err:
337 * the last and the first frames arrived and all the bits are here. 346 * the last and the first frames arrived and all the bits are here.
338 */ 347 */
339static struct sk_buff * 348static struct sk_buff *
340nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) 349nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
341{ 350{
342 struct sk_buff *fp, *op, *head = fq->q.fragments; 351 struct sk_buff *fp, *op, *head = fq->q.fragments;
343 int payload_len; 352 int payload_len;
344 353
345 fq_kill(fq); 354 inet_frag_kill(&fq->q, &nf_frags);
346 355
347 WARN_ON(head == NULL); 356 WARN_ON(head == NULL);
348 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); 357 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -386,7 +395,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
386 clone->ip_summed = head->ip_summed; 395 clone->ip_summed = head->ip_summed;
387 396
388 NFCT_FRAG6_CB(clone)->orig = NULL; 397 NFCT_FRAG6_CB(clone)->orig = NULL;
389 atomic_add(clone->truesize, &nf_init_frags.mem); 398 atomic_add(clone->truesize, &fq->q.net->mem);
390 } 399 }
391 400
392 /* We have to remove fragment header from datagram and to relocate 401 /* We have to remove fragment header from datagram and to relocate
@@ -410,12 +419,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
410 head->csum = csum_add(head->csum, fp->csum); 419 head->csum = csum_add(head->csum, fp->csum);
411 head->truesize += fp->truesize; 420 head->truesize += fp->truesize;
412 } 421 }
413 atomic_sub(head->truesize, &nf_init_frags.mem); 422 atomic_sub(head->truesize, &fq->q.net->mem);
414 423
424 head->local_df = 1;
415 head->next = NULL; 425 head->next = NULL;
416 head->dev = dev; 426 head->dev = dev;
417 head->tstamp = fq->q.stamp; 427 head->tstamp = fq->q.stamp;
418 ipv6_hdr(head)->payload_len = htons(payload_len); 428 ipv6_hdr(head)->payload_len = htons(payload_len);
429 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
419 430
420 /* Yes, and fold redundant checksum back. 8) */ 431 /* Yes, and fold redundant checksum back. 8) */
421 if (head->ip_summed == CHECKSUM_COMPLETE) 432 if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -520,8 +531,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
520{ 531{
521 struct sk_buff *clone; 532 struct sk_buff *clone;
522 struct net_device *dev = skb->dev; 533 struct net_device *dev = skb->dev;
534 struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
535 : dev_net(skb->dev);
523 struct frag_hdr *fhdr; 536 struct frag_hdr *fhdr;
524 struct nf_ct_frag6_queue *fq; 537 struct frag_queue *fq;
525 struct ipv6hdr *hdr; 538 struct ipv6hdr *hdr;
526 int fhoff, nhoff; 539 int fhoff, nhoff;
527 u8 prevhdr; 540 u8 prevhdr;
@@ -553,10 +566,11 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
553 hdr = ipv6_hdr(clone); 566 hdr = ipv6_hdr(clone);
554 fhdr = (struct frag_hdr *)skb_transport_header(clone); 567 fhdr = (struct frag_hdr *)skb_transport_header(clone);
555 568
556 if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) 569 local_bh_disable();
557 nf_ct_frag6_evictor(); 570 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
571 local_bh_enable();
558 572
559 fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); 573 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
560 if (fq == NULL) { 574 if (fq == NULL) {
561 pr_debug("Can't find and can't create new queue\n"); 575 pr_debug("Can't find and can't create new queue\n");
562 goto ret_orig; 576 goto ret_orig;
@@ -567,7 +581,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
567 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { 581 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
568 spin_unlock_bh(&fq->q.lock); 582 spin_unlock_bh(&fq->q.lock);
569 pr_debug("Can't insert skb to queue\n"); 583 pr_debug("Can't insert skb to queue\n");
570 fq_put(fq); 584 inet_frag_put(&fq->q, &nf_frags);
571 goto ret_orig; 585 goto ret_orig;
572 } 586 }
573 587
@@ -579,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
579 } 593 }
580 spin_unlock_bh(&fq->q.lock); 594 spin_unlock_bh(&fq->q.lock);
581 595
582 fq_put(fq); 596 inet_frag_put(&fq->q, &nf_frags);
583 return ret_skb; 597 return ret_skb;
584 598
585ret_orig: 599ret_orig:
@@ -592,6 +606,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
592 int (*okfn)(struct sk_buff *)) 606 int (*okfn)(struct sk_buff *))
593{ 607{
594 struct sk_buff *s, *s2; 608 struct sk_buff *s, *s2;
609 unsigned int ret = 0;
595 610
596 for (s = NFCT_FRAG6_CB(skb)->orig; s;) { 611 for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
597 nf_conntrack_put_reasm(s->nfct_reasm); 612 nf_conntrack_put_reasm(s->nfct_reasm);
@@ -601,49 +616,62 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
601 s2 = s->next; 616 s2 = s->next;
602 s->next = NULL; 617 s->next = NULL;
603 618
604 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, 619 if (ret != -ECANCELED)
605 NF_IP6_PRI_CONNTRACK_DEFRAG + 1); 620 ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
621 in, out, okfn,
622 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
623 else
624 kfree_skb(s);
625
606 s = s2; 626 s = s2;
607 } 627 }
608 nf_conntrack_put_reasm(skb); 628 nf_conntrack_put_reasm(skb);
609} 629}
610 630
631static int nf_ct_net_init(struct net *net)
632{
633 net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
634 net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
635 net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
636 inet_frags_init_net(&net->nf_frag.frags);
637
638 return nf_ct_frag6_sysctl_register(net);
639}
640
641static void nf_ct_net_exit(struct net *net)
642{
643 nf_ct_frags6_sysctl_unregister(net);
644 inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
645}
646
647static struct pernet_operations nf_ct_net_ops = {
648 .init = nf_ct_net_init,
649 .exit = nf_ct_net_exit,
650};
651
611int nf_ct_frag6_init(void) 652int nf_ct_frag6_init(void)
612{ 653{
654 int ret = 0;
655
613 nf_frags.hashfn = nf_hashfn; 656 nf_frags.hashfn = nf_hashfn;
614 nf_frags.constructor = ip6_frag_init; 657 nf_frags.constructor = ip6_frag_init;
615 nf_frags.destructor = NULL; 658 nf_frags.destructor = NULL;
616 nf_frags.skb_free = nf_skb_free; 659 nf_frags.skb_free = nf_skb_free;
617 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); 660 nf_frags.qsize = sizeof(struct frag_queue);
618 nf_frags.match = ip6_frag_match; 661 nf_frags.match = ip6_frag_match;
619 nf_frags.frag_expire = nf_ct_frag6_expire; 662 nf_frags.frag_expire = nf_ct_frag6_expire;
620 nf_frags.secret_interval = 10 * 60 * HZ; 663 nf_frags.secret_interval = 10 * 60 * HZ;
621 nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
622 nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
623 nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
624 inet_frags_init_net(&nf_init_frags);
625 inet_frags_init(&nf_frags); 664 inet_frags_init(&nf_frags);
626 665
627#ifdef CONFIG_SYSCTL 666 ret = register_pernet_subsys(&nf_ct_net_ops);
628 nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", 667 if (ret)
629 nf_ct_frag6_sysctl_table);
630 if (!nf_ct_frag6_sysctl_header) {
631 inet_frags_fini(&nf_frags); 668 inet_frags_fini(&nf_frags);
632 return -ENOMEM;
633 }
634#endif
635 669
636 return 0; 670 return ret;
637} 671}
638 672
639void nf_ct_frag6_cleanup(void) 673void nf_ct_frag6_cleanup(void)
640{ 674{
641#ifdef CONFIG_SYSCTL 675 unregister_pernet_subsys(&nf_ct_net_ops);
642 unregister_net_sysctl_table(nf_ct_frag6_sysctl_header);
643 nf_ct_frag6_sysctl_header = NULL;
644#endif
645 inet_frags_fini(&nf_frags); 676 inet_frags_fini(&nf_frags);
646
647 nf_init_frags.low_thresh = 0;
648 nf_ct_frag6_evictor();
649} 677}
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 000000000000..abfe75a2e316
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,288 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of IPv6 NAT funded by Astaro.
9 */
10#include <linux/types.h>
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/ipv6.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter_ipv6.h>
16#include <net/secure_seq.h>
17#include <net/checksum.h>
18#include <net/ip6_checksum.h>
19#include <net/ip6_route.h>
20#include <net/ipv6.h>
21
22#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_nat_l3proto.h>
26#include <net/netfilter/nf_nat_l4proto.h>
27
28static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
29
30#ifdef CONFIG_XFRM
31static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
32 const struct nf_conn *ct,
33 enum ip_conntrack_dir dir,
34 unsigned long statusbit,
35 struct flowi *fl)
36{
37 const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
38 struct flowi6 *fl6 = &fl->u.ip6;
39
40 if (ct->status & statusbit) {
41 fl6->daddr = t->dst.u3.in6;
42 if (t->dst.protonum == IPPROTO_TCP ||
43 t->dst.protonum == IPPROTO_UDP ||
44 t->dst.protonum == IPPROTO_UDPLITE ||
45 t->dst.protonum == IPPROTO_DCCP ||
46 t->dst.protonum == IPPROTO_SCTP)
47 fl6->fl6_dport = t->dst.u.all;
48 }
49
50 statusbit ^= IPS_NAT_MASK;
51
52 if (ct->status & statusbit) {
53 fl6->saddr = t->src.u3.in6;
54 if (t->dst.protonum == IPPROTO_TCP ||
55 t->dst.protonum == IPPROTO_UDP ||
56 t->dst.protonum == IPPROTO_UDPLITE ||
57 t->dst.protonum == IPPROTO_DCCP ||
58 t->dst.protonum == IPPROTO_SCTP)
59 fl6->fl6_sport = t->src.u.all;
60 }
61}
62#endif
63
64static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
65 const struct nf_nat_range *range)
66{
67 return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
68 ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
69}
70
71static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
72 __be16 dport)
73{
74 return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
75}
76
77static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
78 unsigned int iphdroff,
79 const struct nf_nat_l4proto *l4proto,
80 const struct nf_conntrack_tuple *target,
81 enum nf_nat_manip_type maniptype)
82{
83 struct ipv6hdr *ipv6h;
84 __be16 frag_off;
85 int hdroff;
86 u8 nexthdr;
87
88 if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
89 return false;
90
91 ipv6h = (void *)skb->data + iphdroff;
92 nexthdr = ipv6h->nexthdr;
93 hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
94 &nexthdr, &frag_off);
95 if (hdroff < 0)
96 goto manip_addr;
97
98 if ((frag_off & htons(~0x7)) == 0 &&
99 !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
100 target, maniptype))
101 return false;
102manip_addr:
103 if (maniptype == NF_NAT_MANIP_SRC)
104 ipv6h->saddr = target->src.u3.in6;
105 else
106 ipv6h->daddr = target->dst.u3.in6;
107
108 return true;
109}
110
111static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
112 unsigned int iphdroff, __sum16 *check,
113 const struct nf_conntrack_tuple *t,
114 enum nf_nat_manip_type maniptype)
115{
116 const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
117 const struct in6_addr *oldip, *newip;
118
119 if (maniptype == NF_NAT_MANIP_SRC) {
120 oldip = &ipv6h->saddr;
121 newip = &t->src.u3.in6;
122 } else {
123 oldip = &ipv6h->daddr;
124 newip = &t->dst.u3.in6;
125 }
126 inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
127 newip->s6_addr32, 1);
128}
129
130static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
131 u8 proto, void *data, __sum16 *check,
132 int datalen, int oldlen)
133{
134 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
135 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
136
137 if (skb->ip_summed != CHECKSUM_PARTIAL) {
138 if (!(rt->rt6i_flags & RTF_LOCAL) &&
139 (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
140 skb->ip_summed = CHECKSUM_PARTIAL;
141 skb->csum_start = skb_headroom(skb) +
142 skb_network_offset(skb) +
143 (data - (void *)skb->data);
144 skb->csum_offset = (void *)check - data;
145 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
146 datalen, proto, 0);
147 } else {
148 *check = 0;
149 *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
150 datalen, proto,
151 csum_partial(data, datalen,
152 0));
153 if (proto == IPPROTO_UDP && !*check)
154 *check = CSUM_MANGLED_0;
155 }
156 } else
157 inet_proto_csum_replace2(check, skb,
158 htons(oldlen), htons(datalen), 1);
159}
160
161static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
162 struct nf_nat_range *range)
163{
164 if (tb[CTA_NAT_V6_MINIP]) {
165 nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
166 sizeof(struct in6_addr));
167 range->flags |= NF_NAT_RANGE_MAP_IPS;
168 }
169
170 if (tb[CTA_NAT_V6_MAXIP])
171 nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
172 sizeof(struct in6_addr));
173 else
174 range->max_addr = range->min_addr;
175
176 return 0;
177}
178
179static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
180 .l3proto = NFPROTO_IPV6,
181 .secure_port = nf_nat_ipv6_secure_port,
182 .in_range = nf_nat_ipv6_in_range,
183 .manip_pkt = nf_nat_ipv6_manip_pkt,
184 .csum_update = nf_nat_ipv6_csum_update,
185 .csum_recalc = nf_nat_ipv6_csum_recalc,
186 .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
187#ifdef CONFIG_XFRM
188 .decode_session = nf_nat_ipv6_decode_session,
189#endif
190};
191
192int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
193 struct nf_conn *ct,
194 enum ip_conntrack_info ctinfo,
195 unsigned int hooknum,
196 unsigned int hdrlen)
197{
198 struct {
199 struct icmp6hdr icmp6;
200 struct ipv6hdr ip6;
201 } *inside;
202 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
203 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
204 const struct nf_nat_l4proto *l4proto;
205 struct nf_conntrack_tuple target;
206 unsigned long statusbit;
207
208 NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
209
210 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
211 return 0;
212 if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
213 return 0;
214
215 inside = (void *)skb->data + hdrlen;
216 if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
217 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
218 return 0;
219 if (ct->status & IPS_NAT_MASK)
220 return 0;
221 }
222
223 if (manip == NF_NAT_MANIP_SRC)
224 statusbit = IPS_SRC_NAT;
225 else
226 statusbit = IPS_DST_NAT;
227
228 /* Invert if this is reply direction */
229 if (dir == IP_CT_DIR_REPLY)
230 statusbit ^= IPS_NAT_MASK;
231
232 if (!(ct->status & statusbit))
233 return 1;
234
235 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
236 if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
237 l4proto, &ct->tuplehash[!dir].tuple, !manip))
238 return 0;
239
240 if (skb->ip_summed != CHECKSUM_PARTIAL) {
241 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
242 inside = (void *)skb->data + hdrlen;
243 inside->icmp6.icmp6_cksum = 0;
244 inside->icmp6.icmp6_cksum =
245 csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
246 skb->len - hdrlen, IPPROTO_ICMPV6,
247 csum_partial(&inside->icmp6,
248 skb->len - hdrlen, 0));
249 }
250
251 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
252 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
253 if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
254 return 0;
255
256 return 1;
257}
258EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
259
260static int __init nf_nat_l3proto_ipv6_init(void)
261{
262 int err;
263
264 err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
265 if (err < 0)
266 goto err1;
267 err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
268 if (err < 0)
269 goto err2;
270 return err;
271
272err2:
273 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
274err1:
275 return err;
276}
277
278static void __exit nf_nat_l3proto_ipv6_exit(void)
279{
280 nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
281 nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
282}
283
284MODULE_LICENSE("GPL");
285MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
286
287module_init(nf_nat_l3proto_ipv6_init);
288module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 000000000000..5d6da784305b
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
1/*
2 * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
9 * NAT funded by Astaro.
10 */
11
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/icmpv6.h>
15
16#include <linux/netfilter.h>
17#include <net/netfilter/nf_nat.h>
18#include <net/netfilter/nf_nat_core.h>
19#include <net/netfilter/nf_nat_l3proto.h>
20#include <net/netfilter/nf_nat_l4proto.h>
21
22static bool
23icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
24 enum nf_nat_manip_type maniptype,
25 const union nf_conntrack_man_proto *min,
26 const union nf_conntrack_man_proto *max)
27{
28 return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
29 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
30}
31
32static void
33icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
34 struct nf_conntrack_tuple *tuple,
35 const struct nf_nat_range *range,
36 enum nf_nat_manip_type maniptype,
37 const struct nf_conn *ct)
38{
39 static u16 id;
40 unsigned int range_size;
41 unsigned int i;
42
43 range_size = ntohs(range->max_proto.icmp.id) -
44 ntohs(range->min_proto.icmp.id) + 1;
45
46 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
47 range_size = 0xffff;
48
49 for (i = 0; ; ++id) {
50 tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
51 (id % range_size));
52 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
53 return;
54 }
55}
56
57static bool
58icmpv6_manip_pkt(struct sk_buff *skb,
59 const struct nf_nat_l3proto *l3proto,
60 unsigned int iphdroff, unsigned int hdroff,
61 const struct nf_conntrack_tuple *tuple,
62 enum nf_nat_manip_type maniptype)
63{
64 struct icmp6hdr *hdr;
65
66 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
67 return false;
68
69 hdr = (struct icmp6hdr *)(skb->data + hdroff);
70 l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
71 tuple, maniptype);
72 if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
73 hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
74 inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
75 hdr->icmp6_identifier,
76 tuple->src.u.icmp.id, 0);
77 hdr->icmp6_identifier = tuple->src.u.icmp.id;
78 }
79 return true;
80}
81
82const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
83 .l4proto = IPPROTO_ICMPV6,
84 .manip_pkt = icmpv6_manip_pkt,
85 .in_range = icmpv6_in_range,
86 .unique_tuple = icmpv6_unique_tuple,
87#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
88 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
89#endif
90};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4ff9af628e72..da8a4e301b1b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -65,36 +65,8 @@ struct ip6frag_skb_cb
65#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) 65#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
66 66
67 67
68/*
69 * Equivalent of ipv4 struct ipq
70 */
71
72struct frag_queue
73{
74 struct inet_frag_queue q;
75
76 __be32 id; /* fragment id */
77 u32 user;
78 struct in6_addr saddr;
79 struct in6_addr daddr;
80
81 int iif;
82 unsigned int csum;
83 __u16 nhoffset;
84};
85
86static struct inet_frags ip6_frags; 68static struct inet_frags ip6_frags;
87 69
88int ip6_frag_nqueues(struct net *net)
89{
90 return net->ipv6.frags.nqueues;
91}
92
93int ip6_frag_mem(struct net *net)
94{
95 return atomic_read(&net->ipv6.frags.mem);
96}
97
98static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 70static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
99 struct net_device *dev); 71 struct net_device *dev);
100 72
@@ -159,46 +131,18 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
159} 131}
160EXPORT_SYMBOL(ip6_frag_init); 132EXPORT_SYMBOL(ip6_frag_init);
161 133
162/* Destruction primitives. */ 134void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
163 135 struct inet_frags *frags)
164static __inline__ void fq_put(struct frag_queue *fq)
165{
166 inet_frag_put(&fq->q, &ip6_frags);
167}
168
169/* Kill fq entry. It is not destroyed immediately,
170 * because caller (and someone more) holds reference count.
171 */
172static __inline__ void fq_kill(struct frag_queue *fq)
173{
174 inet_frag_kill(&fq->q, &ip6_frags);
175}
176
177static void ip6_evictor(struct net *net, struct inet6_dev *idev)
178{ 136{
179 int evicted;
180
181 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
182 if (evicted)
183 IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
184}
185
186static void ip6_frag_expire(unsigned long data)
187{
188 struct frag_queue *fq;
189 struct net_device *dev = NULL; 137 struct net_device *dev = NULL;
190 struct net *net;
191
192 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
193 138
194 spin_lock(&fq->q.lock); 139 spin_lock(&fq->q.lock);
195 140
196 if (fq->q.last_in & INET_FRAG_COMPLETE) 141 if (fq->q.last_in & INET_FRAG_COMPLETE)
197 goto out; 142 goto out;
198 143
199 fq_kill(fq); 144 inet_frag_kill(&fq->q, frags);
200 145
201 net = container_of(fq->q.net, struct net, ipv6.frags);
202 rcu_read_lock(); 146 rcu_read_lock();
203 dev = dev_get_by_index_rcu(net, fq->iif); 147 dev = dev_get_by_index_rcu(net, fq->iif);
204 if (!dev) 148 if (!dev)
@@ -222,7 +166,19 @@ out_rcu_unlock:
222 rcu_read_unlock(); 166 rcu_read_unlock();
223out: 167out:
224 spin_unlock(&fq->q.lock); 168 spin_unlock(&fq->q.lock);
225 fq_put(fq); 169 inet_frag_put(&fq->q, frags);
170}
171EXPORT_SYMBOL(ip6_expire_frag_queue);
172
173static void ip6_frag_expire(unsigned long data)
174{
175 struct frag_queue *fq;
176 struct net *net;
177
178 fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
179 net = container_of(fq->q.net, struct net, ipv6.frags);
180
181 ip6_expire_frag_queue(net, fq, &ip6_frags);
226} 182}
227 183
228static __inline__ struct frag_queue * 184static __inline__ struct frag_queue *
@@ -391,7 +347,7 @@ found:
391 return -1; 347 return -1;
392 348
393discard_fq: 349discard_fq:
394 fq_kill(fq); 350 inet_frag_kill(&fq->q, &ip6_frags);
395err: 351err:
396 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 352 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
397 IPSTATS_MIB_REASMFAILS); 353 IPSTATS_MIB_REASMFAILS);
@@ -417,7 +373,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
417 unsigned int nhoff; 373 unsigned int nhoff;
418 int sum_truesize; 374 int sum_truesize;
419 375
420 fq_kill(fq); 376 inet_frag_kill(&fq->q, &ip6_frags);
421 377
422 /* Make the one we just received the head. */ 378 /* Make the one we just received the head. */
423 if (prev) { 379 if (prev) {
@@ -550,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
550 struct frag_queue *fq; 506 struct frag_queue *fq;
551 const struct ipv6hdr *hdr = ipv6_hdr(skb); 507 const struct ipv6hdr *hdr = ipv6_hdr(skb);
552 struct net *net = dev_net(skb_dst(skb)->dev); 508 struct net *net = dev_net(skb_dst(skb)->dev);
509 int evicted;
553 510
554 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); 511 IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
555 512
@@ -574,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
574 return 1; 531 return 1;
575 } 532 }
576 533
577 if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) 534 evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
578 ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); 535 if (evicted)
536 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
537 IPSTATS_MIB_REASMFAILS, evicted);
579 538
580 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); 539 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
581 if (fq != NULL) { 540 if (fq != NULL) {
@@ -586,7 +545,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
586 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); 545 ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
587 546
588 spin_unlock(&fq->q.lock); 547 spin_unlock(&fq->q.lock);
589 fq_put(fq); 548 inet_frag_put(&fq->q, &ip6_frags);
590 return ret; 549 return ret;
591 } 550 }
592 551
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 854e4018d205..d1ddbc6ddac5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -222,7 +222,7 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255, 222 [RTAX_HOPLIMIT - 1] = 255,
223}; 223};
224 224
225static struct rt6_info ip6_null_entry_template = { 225static const struct rt6_info ip6_null_entry_template = {
226 .dst = { 226 .dst = {
227 .__refcnt = ATOMIC_INIT(1), 227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1, 228 .__use = 1,
@@ -242,7 +242,7 @@ static struct rt6_info ip6_null_entry_template = {
242static int ip6_pkt_prohibit(struct sk_buff *skb); 242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb); 243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244 244
245static struct rt6_info ip6_prohibit_entry_template = { 245static const struct rt6_info ip6_prohibit_entry_template = {
246 .dst = { 246 .dst = {
247 .__refcnt = ATOMIC_INIT(1), 247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1, 248 .__use = 1,
@@ -257,7 +257,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
257 .rt6i_ref = ATOMIC_INIT(1), 257 .rt6i_ref = ATOMIC_INIT(1),
258}; 258};
259 259
260static struct rt6_info ip6_blk_hole_entry_template = { 260static const struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = { 261 .dst = {
262 .__refcnt = ATOMIC_INIT(1), 262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1, 263 .__use = 1,
@@ -370,15 +370,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
370 370
371static bool rt6_check_expired(const struct rt6_info *rt) 371static bool rt6_check_expired(const struct rt6_info *rt)
372{ 372{
373 struct rt6_info *ort = NULL;
374
375 if (rt->rt6i_flags & RTF_EXPIRES) { 373 if (rt->rt6i_flags & RTF_EXPIRES) {
376 if (time_after(jiffies, rt->dst.expires)) 374 if (time_after(jiffies, rt->dst.expires))
377 return true; 375 return true;
378 } else if (rt->dst.from) { 376 } else if (rt->dst.from) {
379 ort = (struct rt6_info *) rt->dst.from; 377 return rt6_check_expired((struct rt6_info *) rt->dst.from);
380 return (ort->rt6i_flags & RTF_EXPIRES) &&
381 time_after(jiffies, ort->dst.expires);
382 } 378 }
383 return false; 379 return false;
384} 380}
@@ -452,10 +448,9 @@ static void rt6_probe(struct rt6_info *rt)
452 * Router Reachability Probe MUST be rate-limited 448 * Router Reachability Probe MUST be rate-limited
453 * to no more than one per minute. 449 * to no more than one per minute.
454 */ 450 */
455 rcu_read_lock();
456 neigh = rt ? rt->n : NULL; 451 neigh = rt ? rt->n : NULL;
457 if (!neigh || (neigh->nud_state & NUD_VALID)) 452 if (!neigh || (neigh->nud_state & NUD_VALID))
458 goto out; 453 return;
459 read_lock_bh(&neigh->lock); 454 read_lock_bh(&neigh->lock);
460 if (!(neigh->nud_state & NUD_VALID) && 455 if (!(neigh->nud_state & NUD_VALID) &&
461 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -471,8 +466,6 @@ static void rt6_probe(struct rt6_info *rt)
471 } else { 466 } else {
472 read_unlock_bh(&neigh->lock); 467 read_unlock_bh(&neigh->lock);
473 } 468 }
474out:
475 rcu_read_unlock();
476} 469}
477#else 470#else
478static inline void rt6_probe(struct rt6_info *rt) 471static inline void rt6_probe(struct rt6_info *rt)
@@ -499,7 +492,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
499 struct neighbour *neigh; 492 struct neighbour *neigh;
500 int m; 493 int m;
501 494
502 rcu_read_lock();
503 neigh = rt->n; 495 neigh = rt->n;
504 if (rt->rt6i_flags & RTF_NONEXTHOP || 496 if (rt->rt6i_flags & RTF_NONEXTHOP ||
505 !(rt->rt6i_flags & RTF_GATEWAY)) 497 !(rt->rt6i_flags & RTF_GATEWAY))
@@ -517,7 +509,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
517 read_unlock_bh(&neigh->lock); 509 read_unlock_bh(&neigh->lock);
518 } else 510 } else
519 m = 0; 511 m = 0;
520 rcu_read_unlock();
521 return m; 512 return m;
522} 513}
523 514
@@ -966,7 +957,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
966{ 957{
967 int flags = 0; 958 int flags = 0;
968 959
969 fl6->flowi6_iif = net->loopback_dev->ifindex; 960 fl6->flowi6_iif = LOOPBACK_IFINDEX;
970 961
971 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
972 flags |= RT6_LOOKUP_F_IFACE; 963 flags |= RT6_LOOKUP_F_IFACE;
@@ -1469,8 +1460,21 @@ int ip6_route_add(struct fib6_config *cfg)
1469 } 1460 }
1470 rt->dst.output = ip6_pkt_discard_out; 1461 rt->dst.output = ip6_pkt_discard_out;
1471 rt->dst.input = ip6_pkt_discard; 1462 rt->dst.input = ip6_pkt_discard;
1472 rt->dst.error = -ENETUNREACH;
1473 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464 switch (cfg->fc_type) {
1465 case RTN_BLACKHOLE:
1466 rt->dst.error = -EINVAL;
1467 break;
1468 case RTN_PROHIBIT:
1469 rt->dst.error = -EACCES;
1470 break;
1471 case RTN_THROW:
1472 rt->dst.error = -EAGAIN;
1473 break;
1474 default:
1475 rt->dst.error = -ENETUNREACH;
1476 break;
1477 }
1474 goto install_route; 1478 goto install_route;
1475 } 1479 }
1476 1480
@@ -1835,7 +1839,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1835 if (!table) 1839 if (!table)
1836 return NULL; 1840 return NULL;
1837 1841
1838 write_lock_bh(&table->tb6_lock); 1842 read_lock_bh(&table->tb6_lock);
1839 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1843 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1840 if (!fn) 1844 if (!fn)
1841 goto out; 1845 goto out;
@@ -1851,7 +1855,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1851 break; 1855 break;
1852 } 1856 }
1853out: 1857out:
1854 write_unlock_bh(&table->tb6_lock); 1858 read_unlock_bh(&table->tb6_lock);
1855 return rt; 1859 return rt;
1856} 1860}
1857 1861
@@ -1867,7 +1871,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
1867 .fc_dst_len = prefixlen, 1871 .fc_dst_len = prefixlen,
1868 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1872 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1869 RTF_UP | RTF_PREF(pref), 1873 RTF_UP | RTF_PREF(pref),
1870 .fc_nlinfo.pid = 0, 1874 .fc_nlinfo.portid = 0,
1871 .fc_nlinfo.nlh = NULL, 1875 .fc_nlinfo.nlh = NULL,
1872 .fc_nlinfo.nl_net = net, 1876 .fc_nlinfo.nl_net = net,
1873 }; 1877 };
@@ -1894,7 +1898,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
1894 if (!table) 1898 if (!table)
1895 return NULL; 1899 return NULL;
1896 1900
1897 write_lock_bh(&table->tb6_lock); 1901 read_lock_bh(&table->tb6_lock);
1898 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1902 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1899 if (dev == rt->dst.dev && 1903 if (dev == rt->dst.dev &&
1900 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1904 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -1903,7 +1907,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
1903 } 1907 }
1904 if (rt) 1908 if (rt)
1905 dst_hold(&rt->dst); 1909 dst_hold(&rt->dst);
1906 write_unlock_bh(&table->tb6_lock); 1910 read_unlock_bh(&table->tb6_lock);
1907 return rt; 1911 return rt;
1908} 1912}
1909 1913
@@ -1917,7 +1921,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1917 .fc_ifindex = dev->ifindex, 1921 .fc_ifindex = dev->ifindex,
1918 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1922 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1919 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1923 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1920 .fc_nlinfo.pid = 0, 1924 .fc_nlinfo.portid = 0,
1921 .fc_nlinfo.nlh = NULL, 1925 .fc_nlinfo.nlh = NULL,
1922 .fc_nlinfo.nl_net = dev_net(dev), 1926 .fc_nlinfo.nl_net = dev_net(dev),
1923 }; 1927 };
@@ -2266,14 +2270,18 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2266 cfg->fc_src_len = rtm->rtm_src_len; 2270 cfg->fc_src_len = rtm->rtm_src_len;
2267 cfg->fc_flags = RTF_UP; 2271 cfg->fc_flags = RTF_UP;
2268 cfg->fc_protocol = rtm->rtm_protocol; 2272 cfg->fc_protocol = rtm->rtm_protocol;
2273 cfg->fc_type = rtm->rtm_type;
2269 2274
2270 if (rtm->rtm_type == RTN_UNREACHABLE) 2275 if (rtm->rtm_type == RTN_UNREACHABLE ||
2276 rtm->rtm_type == RTN_BLACKHOLE ||
2277 rtm->rtm_type == RTN_PROHIBIT ||
2278 rtm->rtm_type == RTN_THROW)
2271 cfg->fc_flags |= RTF_REJECT; 2279 cfg->fc_flags |= RTF_REJECT;
2272 2280
2273 if (rtm->rtm_type == RTN_LOCAL) 2281 if (rtm->rtm_type == RTN_LOCAL)
2274 cfg->fc_flags |= RTF_LOCAL; 2282 cfg->fc_flags |= RTF_LOCAL;
2275 2283
2276 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2284 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2277 cfg->fc_nlinfo.nlh = nlh; 2285 cfg->fc_nlinfo.nlh = nlh;
2278 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2286 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2279 2287
@@ -2364,7 +2372,7 @@ static inline size_t rt6_nlmsg_size(void)
2364static int rt6_fill_node(struct net *net, 2372static int rt6_fill_node(struct net *net,
2365 struct sk_buff *skb, struct rt6_info *rt, 2373 struct sk_buff *skb, struct rt6_info *rt,
2366 struct in6_addr *dst, struct in6_addr *src, 2374 struct in6_addr *dst, struct in6_addr *src,
2367 int iif, int type, u32 pid, u32 seq, 2375 int iif, int type, u32 portid, u32 seq,
2368 int prefix, int nowait, unsigned int flags) 2376 int prefix, int nowait, unsigned int flags)
2369{ 2377{
2370 struct rtmsg *rtm; 2378 struct rtmsg *rtm;
@@ -2380,7 +2388,7 @@ static int rt6_fill_node(struct net *net,
2380 } 2388 }
2381 } 2389 }
2382 2390
2383 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2391 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2384 if (!nlh) 2392 if (!nlh)
2385 return -EMSGSIZE; 2393 return -EMSGSIZE;
2386 2394
@@ -2396,8 +2404,22 @@ static int rt6_fill_node(struct net *net,
2396 rtm->rtm_table = table; 2404 rtm->rtm_table = table;
2397 if (nla_put_u32(skb, RTA_TABLE, table)) 2405 if (nla_put_u32(skb, RTA_TABLE, table))
2398 goto nla_put_failure; 2406 goto nla_put_failure;
2399 if (rt->rt6i_flags & RTF_REJECT) 2407 if (rt->rt6i_flags & RTF_REJECT) {
2400 rtm->rtm_type = RTN_UNREACHABLE; 2408 switch (rt->dst.error) {
2409 case -EINVAL:
2410 rtm->rtm_type = RTN_BLACKHOLE;
2411 break;
2412 case -EACCES:
2413 rtm->rtm_type = RTN_PROHIBIT;
2414 break;
2415 case -EAGAIN:
2416 rtm->rtm_type = RTN_THROW;
2417 break;
2418 default:
2419 rtm->rtm_type = RTN_UNREACHABLE;
2420 break;
2421 }
2422 }
2401 else if (rt->rt6i_flags & RTF_LOCAL) 2423 else if (rt->rt6i_flags & RTF_LOCAL)
2402 rtm->rtm_type = RTN_LOCAL; 2424 rtm->rtm_type = RTN_LOCAL;
2403 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2425 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
@@ -2470,15 +2492,11 @@ static int rt6_fill_node(struct net *net,
2470 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2492 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2471 goto nla_put_failure; 2493 goto nla_put_failure;
2472 2494
2473 rcu_read_lock();
2474 n = rt->n; 2495 n = rt->n;
2475 if (n) { 2496 if (n) {
2476 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { 2497 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2477 rcu_read_unlock();
2478 goto nla_put_failure; 2498 goto nla_put_failure;
2479 }
2480 } 2499 }
2481 rcu_read_unlock();
2482 2500
2483 if (rt->dst.dev && 2501 if (rt->dst.dev &&
2484 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2502 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -2511,7 +2529,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2511 2529
2512 return rt6_fill_node(arg->net, 2530 return rt6_fill_node(arg->net,
2513 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2531 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2514 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2532 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2515 prefix, 0, NLM_F_MULTI); 2533 prefix, 0, NLM_F_MULTI);
2516} 2534}
2517 2535
@@ -2591,14 +2609,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2591 skb_dst_set(skb, &rt->dst); 2609 skb_dst_set(skb, &rt->dst);
2592 2610
2593 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2611 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2594 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2612 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2595 nlh->nlmsg_seq, 0, 0, 0); 2613 nlh->nlmsg_seq, 0, 0, 0);
2596 if (err < 0) { 2614 if (err < 0) {
2597 kfree_skb(skb); 2615 kfree_skb(skb);
2598 goto errout; 2616 goto errout;
2599 } 2617 }
2600 2618
2601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2619 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2602errout: 2620errout:
2603 return err; 2621 return err;
2604} 2622}
@@ -2618,14 +2636,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2618 goto errout; 2636 goto errout;
2619 2637
2620 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2638 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2621 event, info->pid, seq, 0, 0, 0); 2639 event, info->portid, seq, 0, 0, 0);
2622 if (err < 0) { 2640 if (err < 0) {
2623 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2641 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624 WARN_ON(err == -EMSGSIZE); 2642 WARN_ON(err == -EMSGSIZE);
2625 kfree_skb(skb); 2643 kfree_skb(skb);
2626 goto errout; 2644 goto errout;
2627 } 2645 }
2628 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2646 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2629 info->nlh, gfp_any()); 2647 info->nlh, gfp_any());
2630 return; 2648 return;
2631errout: 2649errout:
@@ -2680,14 +2698,12 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2680#else 2698#else
2681 seq_puts(m, "00000000000000000000000000000000 00 "); 2699 seq_puts(m, "00000000000000000000000000000000 00 ");
2682#endif 2700#endif
2683 rcu_read_lock();
2684 n = rt->n; 2701 n = rt->n;
2685 if (n) { 2702 if (n) {
2686 seq_printf(m, "%pi6", n->primary_key); 2703 seq_printf(m, "%pi6", n->primary_key);
2687 } else { 2704 } else {
2688 seq_puts(m, "00000000000000000000000000000000"); 2705 seq_puts(m, "00000000000000000000000000000000");
2689 } 2706 }
2690 rcu_read_unlock();
2691 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2707 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2692 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2708 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693 rt->dst.__use, rt->rt6i_flags, 2709 rt->dst.__use, rt->rt6i_flags,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3bd1bfc01f85..3ed54ffd8d50 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
545 545
546 err = -ENOENT; 546 err = -ENOENT;
547 547
548 rcu_read_lock();
549 t = ipip6_tunnel_lookup(dev_net(skb->dev), 548 t = ipip6_tunnel_lookup(dev_net(skb->dev),
550 skb->dev, 549 skb->dev,
551 iph->daddr, 550 iph->daddr,
@@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
579 t->err_count = 1; 578 t->err_count = 1;
580 t->err_time = jiffies; 579 t->err_time = jiffies;
581out: 580out:
582 rcu_read_unlock();
583 return err; 581 return err;
584} 582}
585 583
@@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb)
599 597
600 iph = ip_hdr(skb); 598 iph = ip_hdr(skb);
601 599
602 rcu_read_lock();
603 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 600 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
604 iph->saddr, iph->daddr); 601 iph->saddr, iph->daddr);
605 if (tunnel != NULL) { 602 if (tunnel != NULL) {
@@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb)
615 if ((tunnel->dev->priv_flags & IFF_ISATAP) && 612 if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
616 !isatap_chksrc(skb, iph, tunnel)) { 613 !isatap_chksrc(skb, iph, tunnel)) {
617 tunnel->dev->stats.rx_errors++; 614 tunnel->dev->stats.rx_errors++;
618 rcu_read_unlock();
619 kfree_skb(skb); 615 kfree_skb(skb);
620 return 0; 616 return 0;
621 } 617 }
@@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb)
630 626
631 netif_rx(skb); 627 netif_rx(skb);
632 628
633 rcu_read_unlock();
634 return 0; 629 return 0;
635 } 630 }
636 631
637 /* no tunnel matched, let upstream know, ipsec may handle it */ 632 /* no tunnel matched, let upstream know, ipsec may handle it */
638 rcu_read_unlock();
639 return 1; 633 return 1;
640out: 634out:
641 kfree_skb(skb); 635 kfree_skb(skb);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb46061c813a..182ab9a85d6c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
190 ireq = inet_rsk(req); 190 ireq = inet_rsk(req);
191 ireq6 = inet6_rsk(req); 191 ireq6 = inet6_rsk(req);
192 treq = tcp_rsk(req); 192 treq = tcp_rsk(req);
193 treq->listener = NULL;
193 194
194 if (security_inet_conn_request(sk, skb, req)) 195 if (security_inet_conn_request(sk, skb, req))
195 goto out_free; 196 goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 342ec62cdbde..49c890386ce9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) 476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
477 goto done; 477 goto done;
478 478
479 skb = tcp_make_synack(sk, dst, req, rvp); 479 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
480 480
481 if (skb) { 481 if (skb) {
482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -763,6 +763,8 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
763 struct sk_buff *skb) 763 struct sk_buff *skb)
764{ 764{
765 const struct ipv6hdr *iph = skb_gro_network_header(skb); 765 const struct ipv6hdr *iph = skb_gro_network_header(skb);
766 __wsum wsum;
767 __sum16 sum;
766 768
767 switch (skb->ip_summed) { 769 switch (skb->ip_summed) {
768 case CHECKSUM_COMPLETE: 770 case CHECKSUM_COMPLETE:
@@ -771,11 +773,23 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
771 skb->ip_summed = CHECKSUM_UNNECESSARY; 773 skb->ip_summed = CHECKSUM_UNNECESSARY;
772 break; 774 break;
773 } 775 }
774 776flush:
775 /* fall through */
776 case CHECKSUM_NONE:
777 NAPI_GRO_CB(skb)->flush = 1; 777 NAPI_GRO_CB(skb)->flush = 1;
778 return NULL; 778 return NULL;
779
780 case CHECKSUM_NONE:
781 wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
782 skb_gro_len(skb),
783 IPPROTO_TCP, 0));
784 sum = csum_fold(skb_checksum(skb,
785 skb_gro_offset(skb),
786 skb_gro_len(skb),
787 wsum));
788 if (sum)
789 goto flush;
790
791 skb->ip_summed = CHECKSUM_UNNECESSARY;
792 break;
779 } 793 }
780 794
781 return tcp_gro_receive(head, skb); 795 return tcp_gro_receive(head, skb);
@@ -988,7 +1002,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
988 &ipv6_hdr(skb)->saddr, 1002 &ipv6_hdr(skb)->saddr,
989 &ipv6_hdr(skb)->daddr, inet6_iif(skb)); 1003 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
990 if (req) 1004 if (req)
991 return tcp_check_req(sk, skb, req, prev); 1005 return tcp_check_req(sk, skb, req, prev, false);
992 1006
993 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 1007 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
994 &ipv6_hdr(skb)->saddr, th->source, 1008 &ipv6_hdr(skb)->saddr, th->source,
@@ -1169,7 +1183,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1169 } 1183 }
1170have_isn: 1184have_isn:
1171 tcp_rsk(req)->snt_isn = isn; 1185 tcp_rsk(req)->snt_isn = isn;
1172 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1173 1186
1174 if (security_inet_conn_request(sk, skb, req)) 1187 if (security_inet_conn_request(sk, skb, req))
1175 goto drop_and_release; 1188 goto drop_and_release;
@@ -1180,6 +1193,8 @@ have_isn:
1180 want_cookie) 1193 want_cookie)
1181 goto drop_and_free; 1194 goto drop_and_free;
1182 1195
1196 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1197 tcp_rsk(req)->listener = NULL;
1183 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1198 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1184 return 0; 1199 return 0;
1185 1200
@@ -1347,9 +1362,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1347 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1362 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1348 1363
1349 tcp_initialize_rcv_mss(newsk); 1364 tcp_initialize_rcv_mss(newsk);
1350 if (tcp_rsk(req)->snt_synack) 1365 tcp_synack_rtt_meas(newsk, req);
1351 tcp_valid_rtt_meas(newsk,
1352 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1353 newtp->total_retrans = req->retrans; 1366 newtp->total_retrans = req->retrans;
1354 1367
1355 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1368 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
@@ -1901,7 +1914,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1901 tp->write_seq-tp->snd_una, 1914 tp->write_seq-tp->snd_una,
1902 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), 1915 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1903 timer_active, 1916 timer_active,
1904 jiffies_to_clock_t(timer_expires - jiffies), 1917 jiffies_delta_to_clock_t(timer_expires - jiffies),
1905 icsk->icsk_retransmits, 1918 icsk->icsk_retransmits,
1906 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 1919 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1907 icsk->icsk_probes_out, 1920 icsk->icsk_probes_out,
@@ -1921,10 +1934,7 @@ static void get_timewait6_sock(struct seq_file *seq,
1921 const struct in6_addr *dest, *src; 1934 const struct in6_addr *dest, *src;
1922 __u16 destp, srcp; 1935 __u16 destp, srcp;
1923 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); 1936 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1924 int ttd = tw->tw_ttd - jiffies; 1937 long delta = tw->tw_ttd - jiffies;
1925
1926 if (ttd < 0)
1927 ttd = 0;
1928 1938
1929 dest = &tw6->tw_v6_daddr; 1939 dest = &tw6->tw_v6_daddr;
1930 src = &tw6->tw_v6_rcv_saddr; 1940 src = &tw6->tw_v6_rcv_saddr;
@@ -1940,7 +1950,7 @@ static void get_timewait6_sock(struct seq_file *seq,
1940 dest->s6_addr32[0], dest->s6_addr32[1], 1950 dest->s6_addr32[0], dest->s6_addr32[1],
1941 dest->s6_addr32[2], dest->s6_addr32[3], destp, 1951 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1942 tw->tw_substate, 0, 0, 1952 tw->tw_substate, 0, 0,
1943 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 1953 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1944 atomic_read(&tw->tw_refcnt), tw); 1954 atomic_read(&tw->tw_refcnt), tw);
1945} 1955}
1946 1956