diff options
Diffstat (limited to 'net/ipv4/ipip.c')
-rw-r--r-- | net/ipv4/ipip.c | 271 |
1 files changed, 219 insertions, 52 deletions
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e15b45297c09..191fc24a745a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -138,22 +138,7 @@ struct ipip_net { | |||
138 | static int ipip_tunnel_init(struct net_device *dev); | 138 | static int ipip_tunnel_init(struct net_device *dev); |
139 | static void ipip_tunnel_setup(struct net_device *dev); | 139 | static void ipip_tunnel_setup(struct net_device *dev); |
140 | static void ipip_dev_free(struct net_device *dev); | 140 | static void ipip_dev_free(struct net_device *dev); |
141 | 141 | static struct rtnl_link_ops ipip_link_ops __read_mostly; | |
142 | /* | ||
143 | * Locking : hash tables are protected by RCU and RTNL | ||
144 | */ | ||
145 | |||
146 | #define for_each_ip_tunnel_rcu(start) \ | ||
147 | for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) | ||
148 | |||
149 | /* often modified stats are per cpu, other are shared (netdev->stats) */ | ||
150 | struct pcpu_tstats { | ||
151 | u64 rx_packets; | ||
152 | u64 rx_bytes; | ||
153 | u64 tx_packets; | ||
154 | u64 tx_bytes; | ||
155 | struct u64_stats_sync syncp; | ||
156 | }; | ||
157 | 142 | ||
158 | static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, | 143 | static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, |
159 | struct rtnl_link_stats64 *tot) | 144 | struct rtnl_link_stats64 *tot) |
@@ -197,16 +182,16 @@ static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, | |||
197 | struct ip_tunnel *t; | 182 | struct ip_tunnel *t; |
198 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 183 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
199 | 184 | ||
200 | for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) | 185 | for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1]) |
201 | if (local == t->parms.iph.saddr && | 186 | if (local == t->parms.iph.saddr && |
202 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 187 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
203 | return t; | 188 | return t; |
204 | 189 | ||
205 | for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) | 190 | for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0]) |
206 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 191 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
207 | return t; | 192 | return t; |
208 | 193 | ||
209 | for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) | 194 | for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1]) |
210 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | 195 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) |
211 | return t; | 196 | return t; |
212 | 197 | ||
@@ -264,6 +249,32 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | |||
264 | rcu_assign_pointer(*tp, t); | 249 | rcu_assign_pointer(*tp, t); |
265 | } | 250 | } |
266 | 251 | ||
252 | static int ipip_tunnel_create(struct net_device *dev) | ||
253 | { | ||
254 | struct ip_tunnel *t = netdev_priv(dev); | ||
255 | struct net *net = dev_net(dev); | ||
256 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
257 | int err; | ||
258 | |||
259 | err = ipip_tunnel_init(dev); | ||
260 | if (err < 0) | ||
261 | goto out; | ||
262 | |||
263 | err = register_netdevice(dev); | ||
264 | if (err < 0) | ||
265 | goto out; | ||
266 | |||
267 | strcpy(t->parms.name, dev->name); | ||
268 | dev->rtnl_link_ops = &ipip_link_ops; | ||
269 | |||
270 | dev_hold(dev); | ||
271 | ipip_tunnel_link(ipn, t); | ||
272 | return 0; | ||
273 | |||
274 | out: | ||
275 | return err; | ||
276 | } | ||
277 | |||
267 | static struct ip_tunnel *ipip_tunnel_locate(struct net *net, | 278 | static struct ip_tunnel *ipip_tunnel_locate(struct net *net, |
268 | struct ip_tunnel_parm *parms, int create) | 279 | struct ip_tunnel_parm *parms, int create) |
269 | { | 280 | { |
@@ -298,16 +309,9 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net, | |||
298 | nt = netdev_priv(dev); | 309 | nt = netdev_priv(dev); |
299 | nt->parms = *parms; | 310 | nt->parms = *parms; |
300 | 311 | ||
301 | if (ipip_tunnel_init(dev) < 0) | 312 | if (ipip_tunnel_create(dev) < 0) |
302 | goto failed_free; | 313 | goto failed_free; |
303 | 314 | ||
304 | if (register_netdevice(dev) < 0) | ||
305 | goto failed_free; | ||
306 | |||
307 | strcpy(nt->parms.name, dev->name); | ||
308 | |||
309 | dev_hold(dev); | ||
310 | ipip_tunnel_link(ipn, nt); | ||
311 | return nt; | 315 | return nt; |
312 | 316 | ||
313 | failed_free: | 317 | failed_free: |
@@ -463,7 +467,6 @@ drop: | |||
463 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 467 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
464 | { | 468 | { |
465 | struct ip_tunnel *tunnel = netdev_priv(dev); | 469 | struct ip_tunnel *tunnel = netdev_priv(dev); |
466 | struct pcpu_tstats *tstats; | ||
467 | const struct iphdr *tiph = &tunnel->parms.iph; | 470 | const struct iphdr *tiph = &tunnel->parms.iph; |
468 | u8 tos = tunnel->parms.iph.tos; | 471 | u8 tos = tunnel->parms.iph.tos; |
469 | __be16 df = tiph->frag_off; | 472 | __be16 df = tiph->frag_off; |
@@ -479,6 +482,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
479 | if (skb->protocol != htons(ETH_P_IP)) | 482 | if (skb->protocol != htons(ETH_P_IP)) |
480 | goto tx_error; | 483 | goto tx_error; |
481 | 484 | ||
485 | if (skb->ip_summed == CHECKSUM_PARTIAL && | ||
486 | skb_checksum_help(skb)) | ||
487 | goto tx_error; | ||
488 | |||
482 | if (tos & 1) | 489 | if (tos & 1) |
483 | tos = old_iph->tos; | 490 | tos = old_iph->tos; |
484 | 491 | ||
@@ -586,9 +593,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
586 | if ((iph->ttl = tiph->ttl) == 0) | 593 | if ((iph->ttl = tiph->ttl) == 0) |
587 | iph->ttl = old_iph->ttl; | 594 | iph->ttl = old_iph->ttl; |
588 | 595 | ||
589 | nf_reset(skb); | 596 | iptunnel_xmit(skb, dev); |
590 | tstats = this_cpu_ptr(dev->tstats); | ||
591 | __IPTUNNEL_XMIT(tstats, &dev->stats); | ||
592 | return NETDEV_TX_OK; | 597 | return NETDEV_TX_OK; |
593 | 598 | ||
594 | tx_error_icmp: | 599 | tx_error_icmp: |
@@ -635,6 +640,28 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
635 | dev->iflink = tunnel->parms.link; | 640 | dev->iflink = tunnel->parms.link; |
636 | } | 641 | } |
637 | 642 | ||
643 | static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) | ||
644 | { | ||
645 | struct net *net = dev_net(t->dev); | ||
646 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
647 | |||
648 | ipip_tunnel_unlink(ipn, t); | ||
649 | synchronize_net(); | ||
650 | t->parms.iph.saddr = p->iph.saddr; | ||
651 | t->parms.iph.daddr = p->iph.daddr; | ||
652 | memcpy(t->dev->dev_addr, &p->iph.saddr, 4); | ||
653 | memcpy(t->dev->broadcast, &p->iph.daddr, 4); | ||
654 | ipip_tunnel_link(ipn, t); | ||
655 | t->parms.iph.ttl = p->iph.ttl; | ||
656 | t->parms.iph.tos = p->iph.tos; | ||
657 | t->parms.iph.frag_off = p->iph.frag_off; | ||
658 | if (t->parms.link != p->link) { | ||
659 | t->parms.link = p->link; | ||
660 | ipip_tunnel_bind_dev(t->dev); | ||
661 | } | ||
662 | netdev_state_change(t->dev); | ||
663 | } | ||
664 | |||
638 | static int | 665 | static int |
639 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 666 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) |
640 | { | 667 | { |
@@ -664,7 +691,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
664 | case SIOCADDTUNNEL: | 691 | case SIOCADDTUNNEL: |
665 | case SIOCCHGTUNNEL: | 692 | case SIOCCHGTUNNEL: |
666 | err = -EPERM; | 693 | err = -EPERM; |
667 | if (!capable(CAP_NET_ADMIN)) | 694 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
668 | goto done; | 695 | goto done; |
669 | 696 | ||
670 | err = -EFAULT; | 697 | err = -EFAULT; |
@@ -693,29 +720,13 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
693 | break; | 720 | break; |
694 | } | 721 | } |
695 | t = netdev_priv(dev); | 722 | t = netdev_priv(dev); |
696 | ipip_tunnel_unlink(ipn, t); | ||
697 | synchronize_net(); | ||
698 | t->parms.iph.saddr = p.iph.saddr; | ||
699 | t->parms.iph.daddr = p.iph.daddr; | ||
700 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | ||
701 | memcpy(dev->broadcast, &p.iph.daddr, 4); | ||
702 | ipip_tunnel_link(ipn, t); | ||
703 | netdev_state_change(dev); | ||
704 | } | 723 | } |
724 | |||
725 | ipip_tunnel_update(t, &p); | ||
705 | } | 726 | } |
706 | 727 | ||
707 | if (t) { | 728 | if (t) { |
708 | err = 0; | 729 | err = 0; |
709 | if (cmd == SIOCCHGTUNNEL) { | ||
710 | t->parms.iph.ttl = p.iph.ttl; | ||
711 | t->parms.iph.tos = p.iph.tos; | ||
712 | t->parms.iph.frag_off = p.iph.frag_off; | ||
713 | if (t->parms.link != p.link) { | ||
714 | t->parms.link = p.link; | ||
715 | ipip_tunnel_bind_dev(dev); | ||
716 | netdev_state_change(dev); | ||
717 | } | ||
718 | } | ||
719 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | 730 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) |
720 | err = -EFAULT; | 731 | err = -EFAULT; |
721 | } else | 732 | } else |
@@ -724,7 +735,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
724 | 735 | ||
725 | case SIOCDELTUNNEL: | 736 | case SIOCDELTUNNEL: |
726 | err = -EPERM; | 737 | err = -EPERM; |
727 | if (!capable(CAP_NET_ADMIN)) | 738 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) |
728 | goto done; | 739 | goto done; |
729 | 740 | ||
730 | if (dev == ipn->fb_tunnel_dev) { | 741 | if (dev == ipn->fb_tunnel_dev) { |
@@ -773,6 +784,11 @@ static void ipip_dev_free(struct net_device *dev) | |||
773 | free_netdev(dev); | 784 | free_netdev(dev); |
774 | } | 785 | } |
775 | 786 | ||
787 | #define IPIP_FEATURES (NETIF_F_SG | \ | ||
788 | NETIF_F_FRAGLIST | \ | ||
789 | NETIF_F_HIGHDMA | \ | ||
790 | NETIF_F_HW_CSUM) | ||
791 | |||
776 | static void ipip_tunnel_setup(struct net_device *dev) | 792 | static void ipip_tunnel_setup(struct net_device *dev) |
777 | { | 793 | { |
778 | dev->netdev_ops = &ipip_netdev_ops; | 794 | dev->netdev_ops = &ipip_netdev_ops; |
@@ -787,6 +803,9 @@ static void ipip_tunnel_setup(struct net_device *dev) | |||
787 | dev->features |= NETIF_F_NETNS_LOCAL; | 803 | dev->features |= NETIF_F_NETNS_LOCAL; |
788 | dev->features |= NETIF_F_LLTX; | 804 | dev->features |= NETIF_F_LLTX; |
789 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; | 805 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
806 | |||
807 | dev->features |= IPIP_FEATURES; | ||
808 | dev->hw_features |= IPIP_FEATURES; | ||
790 | } | 809 | } |
791 | 810 | ||
792 | static int ipip_tunnel_init(struct net_device *dev) | 811 | static int ipip_tunnel_init(struct net_device *dev) |
@@ -829,6 +848,142 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) | |||
829 | return 0; | 848 | return 0; |
830 | } | 849 | } |
831 | 850 | ||
851 | static void ipip_netlink_parms(struct nlattr *data[], | ||
852 | struct ip_tunnel_parm *parms) | ||
853 | { | ||
854 | memset(parms, 0, sizeof(*parms)); | ||
855 | |||
856 | parms->iph.version = 4; | ||
857 | parms->iph.protocol = IPPROTO_IPIP; | ||
858 | parms->iph.ihl = 5; | ||
859 | |||
860 | if (!data) | ||
861 | return; | ||
862 | |||
863 | if (data[IFLA_IPTUN_LINK]) | ||
864 | parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); | ||
865 | |||
866 | if (data[IFLA_IPTUN_LOCAL]) | ||
867 | parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); | ||
868 | |||
869 | if (data[IFLA_IPTUN_REMOTE]) | ||
870 | parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); | ||
871 | |||
872 | if (data[IFLA_IPTUN_TTL]) { | ||
873 | parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); | ||
874 | if (parms->iph.ttl) | ||
875 | parms->iph.frag_off = htons(IP_DF); | ||
876 | } | ||
877 | |||
878 | if (data[IFLA_IPTUN_TOS]) | ||
879 | parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); | ||
880 | |||
881 | if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) | ||
882 | parms->iph.frag_off = htons(IP_DF); | ||
883 | } | ||
884 | |||
885 | static int ipip_newlink(struct net *src_net, struct net_device *dev, | ||
886 | struct nlattr *tb[], struct nlattr *data[]) | ||
887 | { | ||
888 | struct net *net = dev_net(dev); | ||
889 | struct ip_tunnel *nt; | ||
890 | |||
891 | nt = netdev_priv(dev); | ||
892 | ipip_netlink_parms(data, &nt->parms); | ||
893 | |||
894 | if (ipip_tunnel_locate(net, &nt->parms, 0)) | ||
895 | return -EEXIST; | ||
896 | |||
897 | return ipip_tunnel_create(dev); | ||
898 | } | ||
899 | |||
900 | static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], | ||
901 | struct nlattr *data[]) | ||
902 | { | ||
903 | struct ip_tunnel *t; | ||
904 | struct ip_tunnel_parm p; | ||
905 | struct net *net = dev_net(dev); | ||
906 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
907 | |||
908 | if (dev == ipn->fb_tunnel_dev) | ||
909 | return -EINVAL; | ||
910 | |||
911 | ipip_netlink_parms(data, &p); | ||
912 | |||
913 | if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || | ||
914 | (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) | ||
915 | return -EINVAL; | ||
916 | |||
917 | t = ipip_tunnel_locate(net, &p, 0); | ||
918 | |||
919 | if (t) { | ||
920 | if (t->dev != dev) | ||
921 | return -EEXIST; | ||
922 | } else | ||
923 | t = netdev_priv(dev); | ||
924 | |||
925 | ipip_tunnel_update(t, &p); | ||
926 | return 0; | ||
927 | } | ||
928 | |||
929 | static size_t ipip_get_size(const struct net_device *dev) | ||
930 | { | ||
931 | return | ||
932 | /* IFLA_IPTUN_LINK */ | ||
933 | nla_total_size(4) + | ||
934 | /* IFLA_IPTUN_LOCAL */ | ||
935 | nla_total_size(4) + | ||
936 | /* IFLA_IPTUN_REMOTE */ | ||
937 | nla_total_size(4) + | ||
938 | /* IFLA_IPTUN_TTL */ | ||
939 | nla_total_size(1) + | ||
940 | /* IFLA_IPTUN_TOS */ | ||
941 | nla_total_size(1) + | ||
942 | /* IFLA_IPTUN_PMTUDISC */ | ||
943 | nla_total_size(1) + | ||
944 | 0; | ||
945 | } | ||
946 | |||
947 | static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) | ||
948 | { | ||
949 | struct ip_tunnel *tunnel = netdev_priv(dev); | ||
950 | struct ip_tunnel_parm *parm = &tunnel->parms; | ||
951 | |||
952 | if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || | ||
953 | nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || | ||
954 | nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || | ||
955 | nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || | ||
956 | nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || | ||
957 | nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, | ||
958 | !!(parm->iph.frag_off & htons(IP_DF)))) | ||
959 | goto nla_put_failure; | ||
960 | return 0; | ||
961 | |||
962 | nla_put_failure: | ||
963 | return -EMSGSIZE; | ||
964 | } | ||
965 | |||
966 | static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { | ||
967 | [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, | ||
968 | [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 }, | ||
969 | [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 }, | ||
970 | [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, | ||
971 | [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, | ||
972 | [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, | ||
973 | }; | ||
974 | |||
975 | static struct rtnl_link_ops ipip_link_ops __read_mostly = { | ||
976 | .kind = "ipip", | ||
977 | .maxtype = IFLA_IPTUN_MAX, | ||
978 | .policy = ipip_policy, | ||
979 | .priv_size = sizeof(struct ip_tunnel), | ||
980 | .setup = ipip_tunnel_setup, | ||
981 | .newlink = ipip_newlink, | ||
982 | .changelink = ipip_changelink, | ||
983 | .get_size = ipip_get_size, | ||
984 | .fill_info = ipip_fill_info, | ||
985 | }; | ||
986 | |||
832 | static struct xfrm_tunnel ipip_handler __read_mostly = { | 987 | static struct xfrm_tunnel ipip_handler __read_mostly = { |
833 | .handler = ipip_rcv, | 988 | .handler = ipip_rcv, |
834 | .err_handler = ipip_err, | 989 | .err_handler = ipip_err, |
@@ -925,14 +1080,26 @@ static int __init ipip_init(void) | |||
925 | return err; | 1080 | return err; |
926 | err = xfrm4_tunnel_register(&ipip_handler, AF_INET); | 1081 | err = xfrm4_tunnel_register(&ipip_handler, AF_INET); |
927 | if (err < 0) { | 1082 | if (err < 0) { |
928 | unregister_pernet_device(&ipip_net_ops); | ||
929 | pr_info("%s: can't register tunnel\n", __func__); | 1083 | pr_info("%s: can't register tunnel\n", __func__); |
1084 | goto xfrm_tunnel_failed; | ||
930 | } | 1085 | } |
1086 | err = rtnl_link_register(&ipip_link_ops); | ||
1087 | if (err < 0) | ||
1088 | goto rtnl_link_failed; | ||
1089 | |||
1090 | out: | ||
931 | return err; | 1091 | return err; |
1092 | |||
1093 | rtnl_link_failed: | ||
1094 | xfrm4_tunnel_deregister(&ipip_handler, AF_INET); | ||
1095 | xfrm_tunnel_failed: | ||
1096 | unregister_pernet_device(&ipip_net_ops); | ||
1097 | goto out; | ||
932 | } | 1098 | } |
933 | 1099 | ||
934 | static void __exit ipip_fini(void) | 1100 | static void __exit ipip_fini(void) |
935 | { | 1101 | { |
1102 | rtnl_link_unregister(&ipip_link_ops); | ||
936 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) | 1103 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) |
937 | pr_info("%s: can't deregister tunnel\n", __func__); | 1104 | pr_info("%s: can't deregister tunnel\n", __func__); |
938 | 1105 | ||