aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPravin B Shelar <pshelar@nicira.com>2013-03-25 10:49:35 -0400
committerDavid S. Miller <davem@davemloft.net>2013-03-26 12:27:18 -0400
commitc54419321455631079c7d6e60bc732dd0c5914c5 (patch)
treea0ad703b72313b98e70f4166bcea9a328d09e937
parenteaac5f3d3ad33547b299935e6db0cfc7be9a576a (diff)
GRE: Refactor GRE tunneling code.
Following patch refactors GRE code into ip tunneling code and GRE specific code. Common tunneling code is moved to ip_tunnel module. ip_tunnel module is written as generic library which can be used by different tunneling implementations. ip_tunnel module contains following components: - packet xmit and rcv generic code. xmit flow looks like (gre_xmit/ipip_xmit)->ip_tunnel_xmit->ip_local_out. - hash table of all devices. - lookup for tunnel devices. - control plane operations like device create, destroy, ioctl, netlink operations code. - registration for tunneling modules, like gre, ipip etc. - define single pcpu_tstats dev->tstats. - struct tnl_ptk_info added to pass parsed tunnel packet parameters. ipip.h header is renamed to ip_tunnel.h Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vxlan.c2
-rw-r--r--include/net/gre.h51
-rw-r--r--include/net/ip6_tunnel.h1
-rw-r--r--include/net/ip_tunnels.h177
-rw-r--r--include/net/ipip.h84
-rw-r--r--net/ipv4/Kconfig5
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/ip_gre.c1504
-rw-r--r--net/ipv4/ip_tunnel.c1035
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/sit.c2
18 files changed, 1594 insertions, 1283 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 33427fd62515..fe9ea7d14951 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -33,7 +33,7 @@
33#include <net/arp.h> 33#include <net/arp.h>
34#include <net/ndisc.h> 34#include <net/ndisc.h>
35#include <net/ip.h> 35#include <net/ip.h>
36#include <net/ipip.h> 36#include <net/ip_tunnels.h>
37#include <net/icmp.h> 37#include <net/icmp.h>
38#include <net/udp.h> 38#include <net/udp.h>
39#include <net/rtnetlink.h> 39#include <net/rtnetlink.h>
diff --git a/include/net/gre.h b/include/net/gre.h
index 82665474bcb7..9f03a390c826 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -2,6 +2,7 @@
2#define __LINUX_GRE_H 2#define __LINUX_GRE_H
3 3
4#include <linux/skbuff.h> 4#include <linux/skbuff.h>
5#include <net/ip_tunnels.h>
5 6
6#define GREPROTO_CISCO 0 7#define GREPROTO_CISCO 0
7#define GREPROTO_PPTP 1 8#define GREPROTO_PPTP 1
@@ -12,7 +13,57 @@ struct gre_protocol {
12 void (*err_handler)(struct sk_buff *skb, u32 info); 13 void (*err_handler)(struct sk_buff *skb, u32 info);
13}; 14};
14 15
16struct gre_base_hdr {
17 __be16 flags;
18 __be16 protocol;
19};
20#define GRE_HEADER_SECTION 4
21
15int gre_add_protocol(const struct gre_protocol *proto, u8 version); 22int gre_add_protocol(const struct gre_protocol *proto, u8 version);
16int gre_del_protocol(const struct gre_protocol *proto, u8 version); 23int gre_del_protocol(const struct gre_protocol *proto, u8 version);
17 24
25static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
26{
27 __be16 tflags = 0;
28
29 if (flags & GRE_CSUM)
30 tflags |= TUNNEL_CSUM;
31 if (flags & GRE_ROUTING)
32 tflags |= TUNNEL_ROUTING;
33 if (flags & GRE_KEY)
34 tflags |= TUNNEL_KEY;
35 if (flags & GRE_SEQ)
36 tflags |= TUNNEL_SEQ;
37 if (flags & GRE_STRICT)
38 tflags |= TUNNEL_STRICT;
39 if (flags & GRE_REC)
40 tflags |= TUNNEL_REC;
41 if (flags & GRE_VERSION)
42 tflags |= TUNNEL_VERSION;
43
44 return tflags;
45}
46
47static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
48{
49 __be16 flags = 0;
50
51 if (tflags & TUNNEL_CSUM)
52 flags |= GRE_CSUM;
53 if (tflags & TUNNEL_ROUTING)
54 flags |= GRE_ROUTING;
55 if (tflags & TUNNEL_KEY)
56 flags |= GRE_KEY;
57 if (tflags & TUNNEL_SEQ)
58 flags |= GRE_SEQ;
59 if (tflags & TUNNEL_STRICT)
60 flags |= GRE_STRICT;
61 if (tflags & TUNNEL_REC)
62 flags |= GRE_REC;
63 if (tflags & TUNNEL_VERSION)
64 flags |= GRE_VERSION;
65
66 return flags;
67}
68
18#endif 69#endif
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index ebdef7f60862..4da5de10d1d4 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/ipv6.h> 4#include <linux/ipv6.h>
5#include <linux/netdevice.h> 5#include <linux/netdevice.h>
6#include <linux/if_tunnel.h>
6#include <linux/ip6_tunnel.h> 7#include <linux/ip6_tunnel.h>
7 8
8#define IP6TUNNEL_ERR_TIMEO (30*HZ) 9#define IP6TUNNEL_ERR_TIMEO (30*HZ)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
new file mode 100644
index 000000000000..4b6f0b28f41f
--- /dev/null
+++ b/include/net/ip_tunnels.h
@@ -0,0 +1,177 @@
1#ifndef __NET_IP_TUNNELS_H
2#define __NET_IP_TUNNELS_H 1
3
4#include <linux/if_tunnel.h>
5#include <linux/netdevice.h>
6#include <linux/skbuff.h>
7#include <linux/types.h>
8#include <linux/u64_stats_sync.h>
9#include <net/dsfield.h>
10#include <net/gro_cells.h>
11#include <net/inet_ecn.h>
12#include <net/ip.h>
13#include <net/rtnetlink.h>
14
15#if IS_ENABLED(CONFIG_IPV6)
16#include <net/ipv6.h>
17#include <net/ip6_fib.h>
18#include <net/ip6_route.h>
19#endif
20
21/* Keep error state on tunnel for 30 sec */
22#define IPTUNNEL_ERR_TIMEO (30*HZ)
23
24/* 6rd prefix/relay information */
25#ifdef CONFIG_IPV6_SIT_6RD
26struct ip_tunnel_6rd_parm {
27 struct in6_addr prefix;
28 __be32 relay_prefix;
29 u16 prefixlen;
30 u16 relay_prefixlen;
31};
32#endif
33
34struct ip_tunnel_prl_entry {
35 struct ip_tunnel_prl_entry __rcu *next;
36 __be32 addr;
37 u16 flags;
38 struct rcu_head rcu_head;
39};
40
41struct ip_tunnel {
42 struct ip_tunnel __rcu *next;
43 struct hlist_node hash_node;
44 struct net_device *dev;
45
46 int err_count; /* Number of arrived ICMP errors */
47 unsigned long err_time; /* Time when the last ICMP error
48 * arrived */
49
50 /* These four fields used only by GRE */
51 __u32 i_seqno; /* The last seen seqno */
52 __u32 o_seqno; /* The last output seqno */
53 int hlen; /* Precalculated header length */
54 int mlink;
55
56 struct ip_tunnel_parm parms;
57
58 /* for SIT */
59#ifdef CONFIG_IPV6_SIT_6RD
60 struct ip_tunnel_6rd_parm ip6rd;
61#endif
62 struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
63 unsigned int prl_count; /* # of entries in PRL */
64 int ip_tnl_net_id;
65 struct gro_cells gro_cells;
66};
67
68#define TUNNEL_CSUM __cpu_to_be16(0x01)
69#define TUNNEL_ROUTING __cpu_to_be16(0x02)
70#define TUNNEL_KEY __cpu_to_be16(0x04)
71#define TUNNEL_SEQ __cpu_to_be16(0x08)
72#define TUNNEL_STRICT __cpu_to_be16(0x10)
73#define TUNNEL_REC __cpu_to_be16(0x20)
74#define TUNNEL_VERSION __cpu_to_be16(0x40)
75#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
76
77struct tnl_ptk_info {
78 __be16 flags;
79 __be16 proto;
80 __be32 key;
81 __be32 seq;
82};
83
84#define PACKET_RCVD 0
85#define PACKET_REJECT 1
86
87#define IP_TNL_HASH_BITS 10
88#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS)
89
90struct ip_tunnel_net {
91 struct hlist_head *tunnels;
92 struct net_device *fb_tunnel_dev;
93};
94
95int ip_tunnel_init(struct net_device *dev);
96void ip_tunnel_uninit(struct net_device *dev);
97void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
98int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
99 struct rtnl_link_ops *ops, char *devname);
100
101void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn);
102
103void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
104 const struct iphdr *tnl_params);
105int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
106int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
107
108struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
109 struct rtnl_link_stats64 *tot);
110struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
111 int link, __be16 flags,
112 __be32 remote, __be32 local,
113 __be32 key);
114
115int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
116 const struct tnl_ptk_info *tpi, bool log_ecn_error);
117int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
118 struct ip_tunnel_parm *p);
119int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
120 struct ip_tunnel_parm *p);
121void ip_tunnel_setup(struct net_device *dev, int net_id);
122
123/* Extract dsfield from inner protocol */
124static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
125 const struct sk_buff *skb)
126{
127 if (skb->protocol == htons(ETH_P_IP))
128 return iph->tos;
129 else if (skb->protocol == htons(ETH_P_IPV6))
130 return ipv6_get_dsfield((const struct ipv6hdr *)iph);
131 else
132 return 0;
133}
134
135/* Propogate ECN bits out */
136static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
137 const struct sk_buff *skb)
138{
139 u8 inner = ip_tunnel_get_dsfield(iph, skb);
140
141 return INET_ECN_encapsulate(tos, inner);
142}
143
144static inline void tunnel_ip_select_ident(struct sk_buff *skb,
145 const struct iphdr *old_iph,
146 struct dst_entry *dst)
147{
148 struct iphdr *iph = ip_hdr(skb);
149
150 /* Use inner packet iph-id if possible. */
151 if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
152 iph->id = old_iph->id;
153 else
154 __ip_select_ident(iph, dst,
155 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
156}
157
158static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
159{
160 int err;
161 int pkt_len = skb->len - skb_transport_offset(skb);
162 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
163
164 nf_reset(skb);
165
166 err = ip_local_out(skb);
167 if (likely(net_xmit_eval(err) == 0)) {
168 u64_stats_update_begin(&tstats->syncp);
169 tstats->tx_bytes += pkt_len;
170 tstats->tx_packets++;
171 u64_stats_update_end(&tstats->syncp);
172 } else {
173 dev->stats.tx_errors++;
174 dev->stats.tx_aborted_errors++;
175 }
176}
177#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/ipip.h b/include/net/ipip.h
deleted file mode 100644
index 483b91a10bb2..000000000000
--- a/include/net/ipip.h
+++ /dev/null
@@ -1,84 +0,0 @@
1#ifndef __NET_IPIP_H
2#define __NET_IPIP_H 1
3
4#include <linux/if_tunnel.h>
5#include <net/gro_cells.h>
6#include <net/ip.h>
7
8/* Keep error state on tunnel for 30 sec */
9#define IPTUNNEL_ERR_TIMEO (30*HZ)
10
11/* 6rd prefix/relay information */
12struct ip_tunnel_6rd_parm {
13 struct in6_addr prefix;
14 __be32 relay_prefix;
15 u16 prefixlen;
16 u16 relay_prefixlen;
17};
18
19struct ip_tunnel {
20 struct ip_tunnel __rcu *next;
21 struct net_device *dev;
22
23 int err_count; /* Number of arrived ICMP errors */
24 unsigned long err_time; /* Time when the last ICMP error arrived */
25
26 /* These four fields used only by GRE */
27 __u32 i_seqno; /* The last seen seqno */
28 __u32 o_seqno; /* The last output seqno */
29 int hlen; /* Precalculated GRE header length */
30 int mlink;
31
32 struct ip_tunnel_parm parms;
33
34 /* for SIT */
35#ifdef CONFIG_IPV6_SIT_6RD
36 struct ip_tunnel_6rd_parm ip6rd;
37#endif
38 struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
39 unsigned int prl_count; /* # of entries in PRL */
40
41 struct gro_cells gro_cells;
42};
43
44struct ip_tunnel_prl_entry {
45 struct ip_tunnel_prl_entry __rcu *next;
46 __be32 addr;
47 u16 flags;
48 struct rcu_head rcu_head;
49};
50
51static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
52{
53 int err;
54 int pkt_len = skb->len - skb_transport_offset(skb);
55 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
56
57 nf_reset(skb);
58
59 err = ip_local_out(skb);
60 if (likely(net_xmit_eval(err) == 0)) {
61 u64_stats_update_begin(&tstats->syncp);
62 tstats->tx_bytes += pkt_len;
63 tstats->tx_packets++;
64 u64_stats_update_end(&tstats->syncp);
65 } else {
66 dev->stats.tx_errors++;
67 dev->stats.tx_aborted_errors++;
68 }
69}
70
71static inline void tunnel_ip_select_ident(struct sk_buff *skb,
72 const struct iphdr *old_iph,
73 struct dst_entry *dst)
74{
75 struct iphdr *iph = ip_hdr(skb);
76
77 /* Use inner packet iph-id if possible. */
78 if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
79 iph->id = old_iph->id;
80 else
81 __ip_select_ident(iph, dst,
82 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
83}
84#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7944df768454..2073226a8a63 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -186,9 +186,14 @@ config NET_IPGRE_DEMUX
186 This is helper module to demultiplex GRE packets on GRE version field criteria. 186 This is helper module to demultiplex GRE packets on GRE version field criteria.
187 Required by ip_gre and pptp modules. 187 Required by ip_gre and pptp modules.
188 188
189config NET_IP_TUNNEL
190 tristate
191 default n
192
189config NET_IPGRE 193config NET_IPGRE
190 tristate "IP: GRE tunnels over IP" 194 tristate "IP: GRE tunnels over IP"
191 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX 195 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
196 select NET_IP_TUNNEL
192 help 197 help
193 Tunneling means encapsulating data of one protocol type within 198 Tunneling means encapsulating data of one protocol type within
194 another protocol and sending it over a channel that understands the 199 another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15ca63ec604e..089cb9f36387 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o ping.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 18obj-$(CONFIG_PROC_FS) += proc.o
18obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70b2d4cf5801..93824c57b108 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -111,7 +111,6 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/raw.h> 112#include <net/raw.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h>
115#include <net/inet_common.h> 114#include <net/inet_common.h>
116#include <net/xfrm.h> 115#include <net/xfrm.h>
117#include <net/net_namespace.h> 116#include <net/net_namespace.h>
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 7a4c710c4cdd..d2d5a99fba09 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -27,11 +27,6 @@
27 27
28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; 28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
29static DEFINE_SPINLOCK(gre_proto_lock); 29static DEFINE_SPINLOCK(gre_proto_lock);
30struct gre_base_hdr {
31 __be16 flags;
32 __be16 protocol;
33};
34#define GRE_HEADER_SECTION 4
35 30
36int gre_add_protocol(const struct gre_protocol *proto, u8 version) 31int gre_add_protocol(const struct gre_protocol *proto, u8 version)
37{ 32{
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2e94289a17e8..ad662e906f7e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -37,7 +37,7 @@
37#include <net/ip.h> 37#include <net/ip.h>
38#include <net/icmp.h> 38#include <net/icmp.h>
39#include <net/protocol.h> 39#include <net/protocol.h>
40#include <net/ipip.h> 40#include <net/ip_tunnels.h>
41#include <net/arp.h> 41#include <net/arp.h>
42#include <net/checksum.h> 42#include <net/checksum.h>
43#include <net/dsfield.h> 43#include <net/dsfield.h>
@@ -108,15 +108,6 @@
108 fatal route to network, even if it were you who configured 108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-) 109 fatal static route: you are innocent. :-)
110 110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov. 111 Alexey Kuznetsov.
121 */ 112 */
122 113
@@ -126,400 +117,135 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 117
127static struct rtnl_link_ops ipgre_link_ops __read_mostly; 118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
128static int ipgre_tunnel_init(struct net_device *dev); 119static int ipgre_tunnel_init(struct net_device *dev);
129static void ipgre_tunnel_setup(struct net_device *dev);
130static int ipgre_tunnel_bind_dev(struct net_device *dev);
131
132/* Fallback tunnel: no source, no destination, no key, no options */
133
134#define HASH_SIZE 16
135 120
136static int ipgre_net_id __read_mostly; 121static int ipgre_net_id __read_mostly;
137struct ipgre_net { 122static int gre_tap_net_id __read_mostly;
138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
139
140 struct net_device *fb_tunnel_dev;
141};
142
143/* Tunnel hash table */
144
145/*
146 4 hash tables:
147
148 3: (remote,local)
149 2: (remote,*)
150 1: (*,local)
151 0: (*,*)
152 123
153 We require exact key match i.e. if a key is present in packet 124static __sum16 check_checksum(struct sk_buff *skb)
154 it will match only tunnel with the same key; if it is not present, 125{
155 it will match only keyless tunnel. 126 __sum16 csum = 0;
156
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
159 */
160 127
161#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
162 131
163#define tunnels_r_l tunnels[3] 132 if (!csum)
164#define tunnels_r tunnels[2] 133 break;
165#define tunnels_l tunnels[1] 134 /* Fall through. */
166#define tunnels_wc tunnels[0]
167 135
168static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, 136 case CHECKSUM_NONE:
169 struct rtnl_link_stats64 *tot) 137 skb->csum = 0;
170{ 138 csum = __skb_checksum_complete(skb);
171 int i; 139 skb->ip_summed = CHECKSUM_COMPLETE;
172 140 break;
173 for_each_possible_cpu(i) {
174 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
175 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
176 unsigned int start;
177
178 do {
179 start = u64_stats_fetch_begin_bh(&tstats->syncp);
180 rx_packets = tstats->rx_packets;
181 tx_packets = tstats->tx_packets;
182 rx_bytes = tstats->rx_bytes;
183 tx_bytes = tstats->tx_bytes;
184 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
185
186 tot->rx_packets += rx_packets;
187 tot->tx_packets += tx_packets;
188 tot->rx_bytes += rx_bytes;
189 tot->tx_bytes += tx_bytes;
190 } 141 }
191 142
192 tot->multicast = dev->stats.multicast; 143 return csum;
193 tot->rx_crc_errors = dev->stats.rx_crc_errors;
194 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
195 tot->rx_length_errors = dev->stats.rx_length_errors;
196 tot->rx_frame_errors = dev->stats.rx_frame_errors;
197 tot->rx_errors = dev->stats.rx_errors;
198
199 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
200 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
201 tot->tx_dropped = dev->stats.tx_dropped;
202 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
203 tot->tx_errors = dev->stats.tx_errors;
204
205 return tot;
206} 144}
207 145
208/* Does key in tunnel parameters match packet */ 146static int ip_gre_calc_hlen(__be16 o_flags)
209static bool ipgre_key_match(const struct ip_tunnel_parm *p,
210 __be16 flags, __be32 key)
211{ 147{
212 if (p->i_flags & GRE_KEY) { 148 int addend = 4;
213 if (flags & GRE_KEY)
214 return key == p->i_key;
215 else
216 return false; /* key expected, none present */
217 } else
218 return !(flags & GRE_KEY);
219}
220 149
221/* Given src, dst and key, find appropriate for input tunnel. */ 150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157}
222 158
223static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, 159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
224 __be32 remote, __be32 local, 160 bool *csum_err, int *hdr_len)
225 __be16 flags, __be32 key,
226 __be16 gre_proto)
227{ 161{
228 struct net *net = dev_net(dev); 162 struct iphdr *iph = ip_hdr(skb);
229 int link = dev->ifindex; 163 struct gre_base_hdr *greh;
230 unsigned int h0 = HASH(remote); 164 __be32 *options;
231 unsigned int h1 = HASH(key);
232 struct ip_tunnel *t, *cand = NULL;
233 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
234 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
235 ARPHRD_ETHER : ARPHRD_IPGRE;
236 int score, cand_score = 4;
237
238 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
239 if (local != t->parms.iph.saddr ||
240 remote != t->parms.iph.daddr ||
241 !(t->dev->flags & IFF_UP))
242 continue;
243
244 if (!ipgre_key_match(&t->parms, flags, key))
245 continue;
246
247 if (t->dev->type != ARPHRD_IPGRE &&
248 t->dev->type != dev_type)
249 continue;
250
251 score = 0;
252 if (t->parms.link != link)
253 score |= 1;
254 if (t->dev->type != dev_type)
255 score |= 2;
256 if (score == 0)
257 return t;
258
259 if (score < cand_score) {
260 cand = t;
261 cand_score = score;
262 }
263 }
264
265 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
266 if (remote != t->parms.iph.daddr ||
267 !(t->dev->flags & IFF_UP))
268 continue;
269
270 if (!ipgre_key_match(&t->parms, flags, key))
271 continue;
272
273 if (t->dev->type != ARPHRD_IPGRE &&
274 t->dev->type != dev_type)
275 continue;
276
277 score = 0;
278 if (t->parms.link != link)
279 score |= 1;
280 if (t->dev->type != dev_type)
281 score |= 2;
282 if (score == 0)
283 return t;
284
285 if (score < cand_score) {
286 cand = t;
287 cand_score = score;
288 }
289 }
290 165
291 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { 166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
292 if ((local != t->parms.iph.saddr && 167 return -EINVAL;
293 (local != t->parms.iph.daddr ||
294 !ipv4_is_multicast(local))) ||
295 !(t->dev->flags & IFF_UP))
296 continue;
297
298 if (!ipgre_key_match(&t->parms, flags, key))
299 continue;
300
301 if (t->dev->type != ARPHRD_IPGRE &&
302 t->dev->type != dev_type)
303 continue;
304
305 score = 0;
306 if (t->parms.link != link)
307 score |= 1;
308 if (t->dev->type != dev_type)
309 score |= 2;
310 if (score == 0)
311 return t;
312
313 if (score < cand_score) {
314 cand = t;
315 cand_score = score;
316 }
317 }
318 168
319 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { 169 greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
320 if (t->parms.i_key != key || 170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
321 !(t->dev->flags & IFF_UP)) 171 return -EINVAL;
322 continue;
323
324 if (t->dev->type != ARPHRD_IPGRE &&
325 t->dev->type != dev_type)
326 continue;
327
328 score = 0;
329 if (t->parms.link != link)
330 score |= 1;
331 if (t->dev->type != dev_type)
332 score |= 2;
333 if (score == 0)
334 return t;
335
336 if (score < cand_score) {
337 cand = t;
338 cand_score = score;
339 }
340 }
341 172
342 if (cand != NULL) 173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
343 return cand; 174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
344 175
345 dev = ign->fb_tunnel_dev; 176 if (!pskb_may_pull(skb, *hdr_len))
346 if (dev->flags & IFF_UP) 177 return -EINVAL;
347 return netdev_priv(dev);
348 178
349 return NULL; 179 tpi->proto = greh->protocol;
350}
351 180
352static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, 181 options = (__be32 *)(greh + 1);
353 struct ip_tunnel_parm *parms) 182 if (greh->flags & GRE_CSUM) {
354{ 183 if (check_checksum(skb)) {
355 __be32 remote = parms->iph.daddr; 184 *csum_err = true;
356 __be32 local = parms->iph.saddr; 185 return -EINVAL;
357 __be32 key = parms->i_key; 186 }
358 unsigned int h = HASH(key); 187 options++;
359 int prio = 0;
360
361 if (local)
362 prio |= 1;
363 if (remote && !ipv4_is_multicast(remote)) {
364 prio |= 2;
365 h ^= HASH(remote);
366 } 188 }
367 189
368 return &ign->tunnels[prio][h]; 190 if (greh->flags & GRE_KEY) {
369} 191 tpi->key = *options;
370 192 options++;
371static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, 193 } else
372 struct ip_tunnel *t) 194 tpi->key = 0;
373{
374 return __ipgre_bucket(ign, &t->parms);
375}
376
377static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
378{
379 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
380 195
381 rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 196 if (unlikely(greh->flags & GRE_SEQ)) {
382 rcu_assign_pointer(*tp, t); 197 tpi->seq = *options;
383} 198 options++;
199 } else
200 tpi->seq = 0;
384 201
385static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 202 /* WCCP version 1 and 2 protocol decoding.
386{ 203 * - Change protocol to IP
387 struct ip_tunnel __rcu **tp; 204 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
388 struct ip_tunnel *iter; 205 */
389 206 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
390 for (tp = ipgre_bucket(ign, t); 207 tpi->proto = htons(ETH_P_IP);
391 (iter = rtnl_dereference(*tp)) != NULL; 208 if ((*(u8 *)options & 0xF0) != 0x40) {
392 tp = &iter->next) { 209 *hdr_len += 4;
393 if (t == iter) { 210 if (!pskb_may_pull(skb, *hdr_len))
394 rcu_assign_pointer(*tp, t->next); 211 return -EINVAL;
395 break;
396 } 212 }
397 } 213 }
398}
399
400static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
401 struct ip_tunnel_parm *parms,
402 int type)
403{
404 __be32 remote = parms->iph.daddr;
405 __be32 local = parms->iph.saddr;
406 __be32 key = parms->i_key;
407 int link = parms->link;
408 struct ip_tunnel *t;
409 struct ip_tunnel __rcu **tp;
410 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
411
412 for (tp = __ipgre_bucket(ign, parms);
413 (t = rtnl_dereference(*tp)) != NULL;
414 tp = &t->next)
415 if (local == t->parms.iph.saddr &&
416 remote == t->parms.iph.daddr &&
417 key == t->parms.i_key &&
418 link == t->parms.link &&
419 type == t->dev->type)
420 break;
421
422 return t;
423}
424
425static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
426 struct ip_tunnel_parm *parms, int create)
427{
428 struct ip_tunnel *t, *nt;
429 struct net_device *dev;
430 char name[IFNAMSIZ];
431 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
432
433 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
434 if (t || !create)
435 return t;
436
437 if (parms->name[0])
438 strlcpy(name, parms->name, IFNAMSIZ);
439 else
440 strcpy(name, "gre%d");
441
442 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
443 if (!dev)
444 return NULL;
445
446 dev_net_set(dev, net);
447
448 nt = netdev_priv(dev);
449 nt->parms = *parms;
450 dev->rtnl_link_ops = &ipgre_link_ops;
451 214
452 dev->mtu = ipgre_tunnel_bind_dev(dev); 215 return 0;
453
454 if (register_netdevice(dev) < 0)
455 goto failed_free;
456
457 /* Can use a lockless transmit, unless we generate output sequences */
458 if (!(nt->parms.o_flags & GRE_SEQ))
459 dev->features |= NETIF_F_LLTX;
460
461 dev_hold(dev);
462 ipgre_tunnel_link(ign, nt);
463 return nt;
464
465failed_free:
466 free_netdev(dev);
467 return NULL;
468}
469
470static void ipgre_tunnel_uninit(struct net_device *dev)
471{
472 struct net *net = dev_net(dev);
473 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
474
475 ipgre_tunnel_unlink(ign, netdev_priv(dev));
476 dev_put(dev);
477} 216}
478 217
479
480static void ipgre_err(struct sk_buff *skb, u32 info) 218static void ipgre_err(struct sk_buff *skb, u32 info)
481{ 219{
482 220
483/* All the routers (except for Linux) return only 221 /* All the routers (except for Linux) return only
484 8 bytes of packet payload. It means, that precise relaying of 222 8 bytes of packet payload. It means, that precise relaying of
485 ICMP in the real Internet is absolutely infeasible. 223 ICMP in the real Internet is absolutely infeasible.
486 224
487 Moreover, Cisco "wise men" put GRE key to the third word 225 Moreover, Cisco "wise men" put GRE key to the third word
488 in GRE header. It makes impossible maintaining even soft state for keyed 226 in GRE header. It makes impossible maintaining even soft
489 GRE tunnels with enabled checksum. Tell them "thank you". 227 state for keyed GRE tunnels with enabled checksum. Tell
490 228 them "thank you".
491 Well, I wonder, rfc1812 was written by Cisco employee,
492 what the hell these idiots break standards established
493 by themselves???
494 */
495 229
230 Well, I wonder, rfc1812 was written by Cisco employee,
231 what the hell these idiots break standards established
232 by themselves???
233 */
234 struct net *net = dev_net(skb->dev);
235 struct ip_tunnel_net *itn;
496 const struct iphdr *iph = (const struct iphdr *)skb->data; 236 const struct iphdr *iph = (const struct iphdr *)skb->data;
497 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
498 int grehlen = (iph->ihl<<2) + 4;
499 const int type = icmp_hdr(skb)->type; 237 const int type = icmp_hdr(skb)->type;
500 const int code = icmp_hdr(skb)->code; 238 const int code = icmp_hdr(skb)->code;
501 struct ip_tunnel *t; 239 struct ip_tunnel *t;
502 __be16 flags; 240 struct tnl_ptk_info tpi;
503 __be32 key = 0; 241 int hdr_len;
242 bool csum_err = false;
504 243
505 flags = p[0]; 244 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
506 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 245 if (!csum_err) /* ignore csum errors. */
507 if (flags&(GRE_VERSION|GRE_ROUTING))
508 return; 246 return;
509 if (flags&GRE_KEY) {
510 grehlen += 4;
511 if (flags&GRE_CSUM)
512 grehlen += 4;
513 }
514 } 247 }
515 248
516 /* If only 8 bytes returned, keyed message will be dropped here */
517 if (skb_headlen(skb) < grehlen)
518 return;
519
520 if (flags & GRE_KEY)
521 key = *(((__be32 *)p) + (grehlen / 4) - 1);
522
523 switch (type) { 249 switch (type) {
524 default: 250 default:
525 case ICMP_PARAMETERPROB: 251 case ICMP_PARAMETERPROB:
@@ -548,8 +274,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
548 break; 274 break;
549 } 275 }
550 276
551 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, 277 if (tpi.proto == htons(ETH_P_TEB))
552 flags, key, p[1]); 278 itn = net_generic(net, gre_tap_net_id);
279 else
280 itn = net_generic(net, ipgre_net_id);
281
282 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
283 iph->daddr, iph->saddr, tpi.key);
553 284
554 if (t == NULL) 285 if (t == NULL)
555 return; 286 return;
@@ -578,158 +309,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
578 t->err_time = jiffies; 309 t->err_time = jiffies;
579} 310}
580 311
581static inline u8
582ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
583{
584 u8 inner = 0;
585 if (skb->protocol == htons(ETH_P_IP))
586 inner = old_iph->tos;
587 else if (skb->protocol == htons(ETH_P_IPV6))
588 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
589 return INET_ECN_encapsulate(tos, inner);
590}
591
592static int ipgre_rcv(struct sk_buff *skb) 312static int ipgre_rcv(struct sk_buff *skb)
593{ 313{
314 struct net *net = dev_net(skb->dev);
315 struct ip_tunnel_net *itn;
594 const struct iphdr *iph; 316 const struct iphdr *iph;
595 u8 *h;
596 __be16 flags;
597 __sum16 csum = 0;
598 __be32 key = 0;
599 u32 seqno = 0;
600 struct ip_tunnel *tunnel; 317 struct ip_tunnel *tunnel;
601 int offset = 4; 318 struct tnl_ptk_info tpi;
602 __be16 gre_proto; 319 int hdr_len;
603 int err; 320 bool csum_err = false;
604 321
605 if (!pskb_may_pull(skb, 16)) 322 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
606 goto drop; 323 goto drop;
607 324
608 iph = ip_hdr(skb); 325 if (tpi.proto == htons(ETH_P_TEB))
609 h = skb->data; 326 itn = net_generic(net, gre_tap_net_id);
610 flags = *(__be16 *)h; 327 else
611 328 itn = net_generic(net, ipgre_net_id);
612 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
613 /* - Version must be 0.
614 - We do not support routing headers.
615 */
616 if (flags&(GRE_VERSION|GRE_ROUTING))
617 goto drop;
618
619 if (flags&GRE_CSUM) {
620 switch (skb->ip_summed) {
621 case CHECKSUM_COMPLETE:
622 csum = csum_fold(skb->csum);
623 if (!csum)
624 break;
625 /* fall through */
626 case CHECKSUM_NONE:
627 skb->csum = 0;
628 csum = __skb_checksum_complete(skb);
629 skb->ip_summed = CHECKSUM_COMPLETE;
630 }
631 offset += 4;
632 }
633 if (flags&GRE_KEY) {
634 key = *(__be32 *)(h + offset);
635 offset += 4;
636 }
637 if (flags&GRE_SEQ) {
638 seqno = ntohl(*(__be32 *)(h + offset));
639 offset += 4;
640 }
641 }
642 329
643 gre_proto = *(__be16 *)(h + 2); 330 iph = ip_hdr(skb);
331 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
332 iph->saddr, iph->daddr, tpi.key);
644 333
645 tunnel = ipgre_tunnel_lookup(skb->dev,
646 iph->saddr, iph->daddr, flags, key,
647 gre_proto);
648 if (tunnel) { 334 if (tunnel) {
649 struct pcpu_tstats *tstats; 335 ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
650
651 secpath_reset(skb);
652
653 skb->protocol = gre_proto;
654 /* WCCP version 1 and 2 protocol decoding.
655 * - Change protocol to IP
656 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 */
658 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
659 skb->protocol = htons(ETH_P_IP);
660 if ((*(h + offset) & 0xF0) != 0x40)
661 offset += 4;
662 }
663
664 skb->mac_header = skb->network_header;
665 __pskb_pull(skb, offset);
666 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
667 skb->pkt_type = PACKET_HOST;
668#ifdef CONFIG_NET_IPGRE_BROADCAST
669 if (ipv4_is_multicast(iph->daddr)) {
670 /* Looped back packet, drop it! */
671 if (rt_is_output_route(skb_rtable(skb)))
672 goto drop;
673 tunnel->dev->stats.multicast++;
674 skb->pkt_type = PACKET_BROADCAST;
675 }
676#endif
677
678 if (((flags&GRE_CSUM) && csum) ||
679 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
680 tunnel->dev->stats.rx_crc_errors++;
681 tunnel->dev->stats.rx_errors++;
682 goto drop;
683 }
684 if (tunnel->parms.i_flags&GRE_SEQ) {
685 if (!(flags&GRE_SEQ) ||
686 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
687 tunnel->dev->stats.rx_fifo_errors++;
688 tunnel->dev->stats.rx_errors++;
689 goto drop;
690 }
691 tunnel->i_seqno = seqno + 1;
692 }
693
694 /* Warning: All skb pointers will be invalidated! */
695 if (tunnel->dev->type == ARPHRD_ETHER) {
696 if (!pskb_may_pull(skb, ETH_HLEN)) {
697 tunnel->dev->stats.rx_length_errors++;
698 tunnel->dev->stats.rx_errors++;
699 goto drop;
700 }
701
702 iph = ip_hdr(skb);
703 skb->protocol = eth_type_trans(skb, tunnel->dev);
704 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
705 }
706
707 __skb_tunnel_rx(skb, tunnel->dev);
708
709 skb_reset_network_header(skb);
710 err = IP_ECN_decapsulate(iph, skb);
711 if (unlikely(err)) {
712 if (log_ecn_error)
713 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
714 &iph->saddr, iph->tos);
715 if (err > 1) {
716 ++tunnel->dev->stats.rx_frame_errors;
717 ++tunnel->dev->stats.rx_errors;
718 goto drop;
719 }
720 }
721
722 tstats = this_cpu_ptr(tunnel->dev->tstats);
723 u64_stats_update_begin(&tstats->syncp);
724 tstats->rx_packets++;
725 tstats->rx_bytes += skb->len;
726 u64_stats_update_end(&tstats->syncp);
727
728 gro_cells_receive(&tunnel->gro_cells, skb);
729 return 0; 336 return 0;
730 } 337 }
731 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 338 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
732
733drop: 339drop:
734 kfree_skb(skb); 340 kfree_skb(skb);
735 return 0; 341 return 0;
@@ -746,7 +352,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff
746 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; 352 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
747 return skb; 353 return skb;
748 } else if (skb->ip_summed == CHECKSUM_PARTIAL && 354 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 tunnel->parms.o_flags&GRE_CSUM) { 355 tunnel->parms.o_flags&TUNNEL_CSUM) {
750 err = skb_checksum_help(skb); 356 err = skb_checksum_help(skb);
751 if (unlikely(err)) 357 if (unlikely(err))
752 goto error; 358 goto error;
@@ -760,480 +366,157 @@ error:
760 return ERR_PTR(err); 366 return ERR_PTR(err);
761} 367}
762 368
763static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 369static struct sk_buff *gre_build_header(struct sk_buff *skb,
370 const struct tnl_ptk_info *tpi,
371 int hdr_len)
764{ 372{
765 struct ip_tunnel *tunnel = netdev_priv(dev); 373 struct gre_base_hdr *greh;
766 const struct iphdr *old_iph;
767 const struct iphdr *tiph;
768 struct flowi4 fl4;
769 u8 tos;
770 __be16 df;
771 struct rtable *rt; /* Route to the other host */
772 struct net_device *tdev; /* Device to other host */
773 struct iphdr *iph; /* Our new IP header */
774 unsigned int max_headroom; /* The extra header space needed */
775 int gre_hlen;
776 __be32 dst;
777 int mtu;
778 u8 ttl;
779 int err;
780
781 skb = handle_offloads(tunnel, skb);
782 if (IS_ERR(skb)) {
783 dev->stats.tx_dropped++;
784 return NETDEV_TX_OK;
785 }
786 374
787 if (!skb->encapsulation) { 375 skb_push(skb, hdr_len);
788 skb_reset_inner_headers(skb);
789 skb->encapsulation = 1;
790 }
791 376
792 old_iph = ip_hdr(skb); 377 greh = (struct gre_base_hdr *)skb->data;
378 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
379 greh->protocol = tpi->proto;
793 380
794 if (dev->type == ARPHRD_ETHER) 381 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
795 IPCB(skb)->flags = 0; 382 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
796 383
797 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 384 if (tpi->flags&TUNNEL_SEQ) {
798 gre_hlen = 0; 385 *ptr = tpi->seq;
799 tiph = (const struct iphdr *)skb->data; 386 ptr--;
800 } else {
801 gre_hlen = tunnel->hlen;
802 tiph = &tunnel->parms.iph;
803 }
804
805 if ((dst = tiph->daddr) == 0) {
806 /* NBMA tunnel */
807
808 if (skb_dst(skb) == NULL) {
809 dev->stats.tx_fifo_errors++;
810 goto tx_error;
811 } 387 }
812 388 if (tpi->flags&TUNNEL_KEY) {
813 if (skb->protocol == htons(ETH_P_IP)) { 389 *ptr = tpi->key;
814 rt = skb_rtable(skb); 390 ptr--;
815 dst = rt_nexthop(rt, old_iph->daddr);
816 } 391 }
817#if IS_ENABLED(CONFIG_IPV6) 392 if (tpi->flags&TUNNEL_CSUM &&
818 else if (skb->protocol == htons(ETH_P_IPV6)) { 393 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
819 const struct in6_addr *addr6; 394 *(__sum16 *)ptr = 0;
820 struct neighbour *neigh; 395 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
821 bool do_tx_error_icmp; 396 skb->len, 0));
822 int addr_type;
823
824 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
825 if (neigh == NULL)
826 goto tx_error;
827
828 addr6 = (const struct in6_addr *)&neigh->primary_key;
829 addr_type = ipv6_addr_type(addr6);
830
831 if (addr_type == IPV6_ADDR_ANY) {
832 addr6 = &ipv6_hdr(skb)->daddr;
833 addr_type = ipv6_addr_type(addr6);
834 }
835
836 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
837 do_tx_error_icmp = true;
838 else {
839 do_tx_error_icmp = false;
840 dst = addr6->s6_addr32[3];
841 }
842 neigh_release(neigh);
843 if (do_tx_error_icmp)
844 goto tx_error_icmp;
845 } 397 }
846#endif
847 else
848 goto tx_error;
849 } 398 }
850 399
851 ttl = tiph->ttl; 400 return skb;
852 tos = tiph->tos; 401}
853 if (tos & 0x1) {
854 tos &= ~0x1;
855 if (skb->protocol == htons(ETH_P_IP))
856 tos = old_iph->tos;
857 else if (skb->protocol == htons(ETH_P_IPV6))
858 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
859 }
860 402
861 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, 403static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
862 tunnel->parms.o_key, RT_TOS(tos), 404 const struct iphdr *tnl_params,
863 tunnel->parms.link); 405 __be16 proto)
864 if (IS_ERR(rt)) { 406{
865 dev->stats.tx_carrier_errors++; 407 struct ip_tunnel *tunnel = netdev_priv(dev);
866 goto tx_error; 408 struct tnl_ptk_info tpi;
867 }
868 tdev = rt->dst.dev;
869 409
870 if (tdev == dev) { 410 if (likely(!skb->encapsulation)) {
871 ip_rt_put(rt); 411 skb_reset_inner_headers(skb);
872 dev->stats.collisions++; 412 skb->encapsulation = 1;
873 goto tx_error;
874 } 413 }
875 414
876 df = tiph->frag_off; 415 tpi.flags = tunnel->parms.o_flags;
877 if (df) 416 tpi.proto = proto;
878 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; 417 tpi.key = tunnel->parms.o_key;
879 else 418 if (tunnel->parms.o_flags & TUNNEL_SEQ)
880 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 419 tunnel->o_seqno++;
881 420 tpi.seq = htonl(tunnel->o_seqno);
882 if (skb_dst(skb))
883 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
884
885 if (skb->protocol == htons(ETH_P_IP)) {
886 df |= (old_iph->frag_off&htons(IP_DF));
887 421
888 if (!skb_is_gso(skb) && 422 /* Push GRE header. */
889 (old_iph->frag_off&htons(IP_DF)) && 423 skb = gre_build_header(skb, &tpi, tunnel->hlen);
890 mtu < ntohs(old_iph->tot_len)) { 424 if (unlikely(!skb)) {
891 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 425 dev->stats.tx_dropped++;
892 ip_rt_put(rt); 426 return;
893 goto tx_error;
894 }
895 } 427 }
896#if IS_ENABLED(CONFIG_IPV6)
897 else if (skb->protocol == htons(ETH_P_IPV6)) {
898 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
899
900 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
901 if ((tunnel->parms.iph.daddr &&
902 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
903 rt6->rt6i_dst.plen == 128) {
904 rt6->rt6i_flags |= RTF_MODIFIED;
905 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
906 }
907 }
908 428
909 if (!skb_is_gso(skb) && 429 ip_tunnel_xmit(skb, dev, tnl_params);
910 mtu >= IPV6_MIN_MTU && 430}
911 mtu < skb->len - tunnel->hlen + gre_hlen) {
912 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
913 ip_rt_put(rt);
914 goto tx_error;
915 }
916 }
917#endif
918 431
919 if (tunnel->err_count > 0) { 432static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
920 if (time_before(jiffies, 433 struct net_device *dev)
921 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 434{
922 tunnel->err_count--; 435 struct ip_tunnel *tunnel = netdev_priv(dev);
436 const struct iphdr *tnl_params;
923 437
924 dst_link_failure(skb); 438 skb = handle_offloads(tunnel, skb);
925 } else 439 if (IS_ERR(skb))
926 tunnel->err_count = 0; 440 goto out;
927 }
928 441
929 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; 442 if (dev->header_ops) {
930 443 /* Need space for new headers */
931 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 444 if (skb_cow_head(skb, dev->needed_headroom -
932 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 445 (tunnel->hlen + sizeof(struct iphdr))));
933 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 446 goto free_skb;
934 if (max_headroom > dev->needed_headroom)
935 dev->needed_headroom = max_headroom;
936 if (!new_skb) {
937 ip_rt_put(rt);
938 dev->stats.tx_dropped++;
939 dev_kfree_skb(skb);
940 return NETDEV_TX_OK;
941 }
942 if (skb->sk)
943 skb_set_owner_w(new_skb, skb->sk);
944 dev_kfree_skb(skb);
945 skb = new_skb;
946 old_iph = ip_hdr(skb);
947 /* Warning : tiph value might point to freed memory */
948 }
949 447
950 skb_push(skb, gre_hlen); 448 tnl_params = (const struct iphdr *)skb->data;
951 skb_reset_network_header(skb);
952 skb_set_transport_header(skb, sizeof(*iph));
953 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
954 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
955 IPSKB_REROUTED);
956 skb_dst_drop(skb);
957 skb_dst_set(skb, &rt->dst);
958
959 /*
960 * Push down and install the IPIP header.
961 */
962 449
963 iph = ip_hdr(skb); 450 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
964 iph->version = 4; 451 * to gre header.
965 iph->ihl = sizeof(struct iphdr) >> 2; 452 */
966 iph->frag_off = df; 453 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
967 iph->protocol = IPPROTO_GRE; 454 } else {
968 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 455 if (skb_cow_head(skb, dev->needed_headroom))
969 iph->daddr = fl4.daddr; 456 goto free_skb;
970 iph->saddr = fl4.saddr;
971 iph->ttl = ttl;
972
973 tunnel_ip_select_ident(skb, old_iph, &rt->dst);
974
975 if (ttl == 0) {
976 if (skb->protocol == htons(ETH_P_IP))
977 iph->ttl = old_iph->ttl;
978#if IS_ENABLED(CONFIG_IPV6)
979 else if (skb->protocol == htons(ETH_P_IPV6))
980 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
981#endif
982 else
983 iph->ttl = ip4_dst_hoplimit(&rt->dst);
984 }
985
986 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
987 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
988 htons(ETH_P_TEB) : skb->protocol;
989
990 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
991 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
992 457
993 if (tunnel->parms.o_flags&GRE_SEQ) { 458 tnl_params = &tunnel->parms.iph;
994 ++tunnel->o_seqno;
995 *ptr = htonl(tunnel->o_seqno);
996 ptr--;
997 }
998 if (tunnel->parms.o_flags&GRE_KEY) {
999 *ptr = tunnel->parms.o_key;
1000 ptr--;
1001 }
1002 /* Skip GRE checksum if skb is getting offloaded. */
1003 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
1004 (tunnel->parms.o_flags&GRE_CSUM)) {
1005 int offset = skb_transport_offset(skb);
1006
1007 if (skb_has_shared_frag(skb)) {
1008 err = __skb_linearize(skb);
1009 if (err)
1010 goto tx_error;
1011 }
1012
1013 *ptr = 0;
1014 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
1015 skb->len - offset,
1016 0));
1017 }
1018 } 459 }
1019 460
1020 iptunnel_xmit(skb, dev); 461 __gre_xmit(skb, dev, tnl_params, skb->protocol);
462
1021 return NETDEV_TX_OK; 463 return NETDEV_TX_OK;
1022 464
1023#if IS_ENABLED(CONFIG_IPV6) 465free_skb:
1024tx_error_icmp:
1025 dst_link_failure(skb);
1026#endif
1027tx_error:
1028 dev->stats.tx_errors++;
1029 dev_kfree_skb(skb); 466 dev_kfree_skb(skb);
467out:
468 dev->stats.tx_dropped++;
1030 return NETDEV_TX_OK; 469 return NETDEV_TX_OK;
1031} 470}
1032 471
1033static int ipgre_tunnel_bind_dev(struct net_device *dev) 472static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
473 struct net_device *dev)
1034{ 474{
1035 struct net_device *tdev = NULL; 475 struct ip_tunnel *tunnel = netdev_priv(dev);
1036 struct ip_tunnel *tunnel;
1037 const struct iphdr *iph;
1038 int hlen = LL_MAX_HEADER;
1039 int mtu = ETH_DATA_LEN;
1040 int addend = sizeof(struct iphdr) + 4;
1041
1042 tunnel = netdev_priv(dev);
1043 iph = &tunnel->parms.iph;
1044
1045 /* Guess output device to choose reasonable mtu and needed_headroom */
1046
1047 if (iph->daddr) {
1048 struct flowi4 fl4;
1049 struct rtable *rt;
1050
1051 rt = ip_route_output_gre(dev_net(dev), &fl4,
1052 iph->daddr, iph->saddr,
1053 tunnel->parms.o_key,
1054 RT_TOS(iph->tos),
1055 tunnel->parms.link);
1056 if (!IS_ERR(rt)) {
1057 tdev = rt->dst.dev;
1058 ip_rt_put(rt);
1059 }
1060
1061 if (dev->type != ARPHRD_ETHER)
1062 dev->flags |= IFF_POINTOPOINT;
1063 }
1064 476
1065 if (!tdev && tunnel->parms.link) 477 skb = handle_offloads(tunnel, skb);
1066 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 478 if (IS_ERR(skb))
479 goto out;
1067 480
1068 if (tdev) { 481 if (skb_cow_head(skb, dev->needed_headroom))
1069 hlen = tdev->hard_header_len + tdev->needed_headroom; 482 goto free_skb;
1070 mtu = tdev->mtu;
1071 }
1072 dev->iflink = tunnel->parms.link;
1073
1074 /* Precalculate GRE options length */
1075 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1076 if (tunnel->parms.o_flags&GRE_CSUM)
1077 addend += 4;
1078 if (tunnel->parms.o_flags&GRE_KEY)
1079 addend += 4;
1080 if (tunnel->parms.o_flags&GRE_SEQ)
1081 addend += 4;
1082 }
1083 dev->needed_headroom = addend + hlen;
1084 mtu -= dev->hard_header_len + addend;
1085 483
1086 if (mtu < 68) 484 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
1087 mtu = 68;
1088 485
1089 tunnel->hlen = addend; 486 return NETDEV_TX_OK;
1090 /* TCP offload with GRE SEQ is not supported. */
1091 if (!(tunnel->parms.o_flags & GRE_SEQ)) {
1092 dev->features |= NETIF_F_GSO_SOFTWARE;
1093 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1094 }
1095 487
1096 return mtu; 488free_skb:
489 dev_kfree_skb(skb);
490out:
491 dev->stats.tx_dropped++;
492 return NETDEV_TX_OK;
1097} 493}
1098 494
1099static int 495static int ipgre_tunnel_ioctl(struct net_device *dev,
1100ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 496 struct ifreq *ifr, int cmd)
1101{ 497{
1102 int err = 0; 498 int err = 0;
1103 struct ip_tunnel_parm p; 499 struct ip_tunnel_parm p;
1104 struct ip_tunnel *t;
1105 struct net *net = dev_net(dev);
1106 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1107
1108 switch (cmd) {
1109 case SIOCGETTUNNEL:
1110 t = NULL;
1111 if (dev == ign->fb_tunnel_dev) {
1112 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1113 err = -EFAULT;
1114 break;
1115 }
1116 t = ipgre_tunnel_locate(net, &p, 0);
1117 }
1118 if (t == NULL)
1119 t = netdev_priv(dev);
1120 memcpy(&p, &t->parms, sizeof(p));
1121 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1122 err = -EFAULT;
1123 break;
1124
1125 case SIOCADDTUNNEL:
1126 case SIOCCHGTUNNEL:
1127 err = -EPERM;
1128 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1129 goto done;
1130
1131 err = -EFAULT;
1132 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1133 goto done;
1134
1135 err = -EINVAL;
1136 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1137 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1138 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1139 goto done;
1140 if (p.iph.ttl)
1141 p.iph.frag_off |= htons(IP_DF);
1142
1143 if (!(p.i_flags&GRE_KEY))
1144 p.i_key = 0;
1145 if (!(p.o_flags&GRE_KEY))
1146 p.o_key = 0;
1147
1148 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1149
1150 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1151 if (t != NULL) {
1152 if (t->dev != dev) {
1153 err = -EEXIST;
1154 break;
1155 }
1156 } else {
1157 unsigned int nflags = 0;
1158
1159 t = netdev_priv(dev);
1160
1161 if (ipv4_is_multicast(p.iph.daddr))
1162 nflags = IFF_BROADCAST;
1163 else if (p.iph.daddr)
1164 nflags = IFF_POINTOPOINT;
1165
1166 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1167 err = -EINVAL;
1168 break;
1169 }
1170 ipgre_tunnel_unlink(ign, t);
1171 synchronize_net();
1172 t->parms.iph.saddr = p.iph.saddr;
1173 t->parms.iph.daddr = p.iph.daddr;
1174 t->parms.i_key = p.i_key;
1175 t->parms.o_key = p.o_key;
1176 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1177 memcpy(dev->broadcast, &p.iph.daddr, 4);
1178 ipgre_tunnel_link(ign, t);
1179 netdev_state_change(dev);
1180 }
1181 }
1182 500
1183 if (t) { 501 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1184 err = 0; 502 return -EFAULT;
1185 if (cmd == SIOCCHGTUNNEL) { 503 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1186 t->parms.iph.ttl = p.iph.ttl; 504 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1187 t->parms.iph.tos = p.iph.tos; 505 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
1188 t->parms.iph.frag_off = p.iph.frag_off; 506 return -EINVAL;
1189 if (t->parms.link != p.link) {
1190 t->parms.link = p.link;
1191 dev->mtu = ipgre_tunnel_bind_dev(dev);
1192 netdev_state_change(dev);
1193 }
1194 }
1195 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1196 err = -EFAULT;
1197 } else
1198 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1199 break;
1200
1201 case SIOCDELTUNNEL:
1202 err = -EPERM;
1203 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1204 goto done;
1205
1206 if (dev == ign->fb_tunnel_dev) {
1207 err = -EFAULT;
1208 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1209 goto done;
1210 err = -ENOENT;
1211 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1212 goto done;
1213 err = -EPERM;
1214 if (t == netdev_priv(ign->fb_tunnel_dev))
1215 goto done;
1216 dev = t->dev;
1217 }
1218 unregister_netdevice(dev);
1219 err = 0;
1220 break;
1221
1222 default:
1223 err = -EINVAL;
1224 } 507 }
508 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
509 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1225 510
1226done: 511 err = ip_tunnel_ioctl(dev, &p, cmd);
1227 return err; 512 if (err)
1228} 513 return err;
1229 514
1230static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 515 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
1231{ 516 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
1232 struct ip_tunnel *tunnel = netdev_priv(dev); 517
1233 if (new_mtu < 68 || 518 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1234 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 519 return -EFAULT;
1235 return -EINVAL;
1236 dev->mtu = new_mtu;
1237 return 0; 520 return 0;
1238} 521}
1239 522
@@ -1263,25 +546,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1263 ... 546 ...
1264 ftp fec0:6666:6666::193.233.7.65 547 ftp fec0:6666:6666::193.233.7.65
1265 ... 548 ...
1266
1267 */ 549 */
1268
1269static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 550static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1270 unsigned short type, 551 unsigned short type,
1271 const void *daddr, const void *saddr, unsigned int len) 552 const void *daddr, const void *saddr, unsigned int len)
1272{ 553{
1273 struct ip_tunnel *t = netdev_priv(dev); 554 struct ip_tunnel *t = netdev_priv(dev);
1274 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 555 struct iphdr *iph;
1275 __be16 *p = (__be16 *)(iph+1); 556 struct gre_base_hdr *greh;
1276 557
1277 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 558 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
1278 p[0] = t->parms.o_flags; 559 greh = (struct gre_base_hdr *)(iph+1);
1279 p[1] = htons(type); 560 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
561 greh->protocol = htons(type);
1280 562
1281 /* 563 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1282 * Set the source hardware address.
1283 */
1284 564
565 /* Set the source hardware address. */
1285 if (saddr) 566 if (saddr)
1286 memcpy(&iph->saddr, saddr, 4); 567 memcpy(&iph->saddr, saddr, 4);
1287 if (daddr) 568 if (daddr)
@@ -1289,7 +570,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1289 if (iph->daddr) 570 if (iph->daddr)
1290 return t->hlen; 571 return t->hlen;
1291 572
1292 return -t->hlen; 573 return -(t->hlen + sizeof(*iph));
1293} 574}
1294 575
1295static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 576static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
@@ -1343,31 +624,21 @@ static int ipgre_close(struct net_device *dev)
1343 } 624 }
1344 return 0; 625 return 0;
1345} 626}
1346
1347#endif 627#endif
1348 628
1349static const struct net_device_ops ipgre_netdev_ops = { 629static const struct net_device_ops ipgre_netdev_ops = {
1350 .ndo_init = ipgre_tunnel_init, 630 .ndo_init = ipgre_tunnel_init,
1351 .ndo_uninit = ipgre_tunnel_uninit, 631 .ndo_uninit = ip_tunnel_uninit,
1352#ifdef CONFIG_NET_IPGRE_BROADCAST 632#ifdef CONFIG_NET_IPGRE_BROADCAST
1353 .ndo_open = ipgre_open, 633 .ndo_open = ipgre_open,
1354 .ndo_stop = ipgre_close, 634 .ndo_stop = ipgre_close,
1355#endif 635#endif
1356 .ndo_start_xmit = ipgre_tunnel_xmit, 636 .ndo_start_xmit = ipgre_xmit,
1357 .ndo_do_ioctl = ipgre_tunnel_ioctl, 637 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1358 .ndo_change_mtu = ipgre_tunnel_change_mtu, 638 .ndo_change_mtu = ip_tunnel_change_mtu,
1359 .ndo_get_stats64 = ipgre_get_stats64, 639 .ndo_get_stats64 = ip_tunnel_get_stats64,
1360}; 640};
1361 641
1362static void ipgre_dev_free(struct net_device *dev)
1363{
1364 struct ip_tunnel *tunnel = netdev_priv(dev);
1365
1366 gro_cells_destroy(&tunnel->gro_cells);
1367 free_percpu(dev->tstats);
1368 free_netdev(dev);
1369}
1370
1371#define GRE_FEATURES (NETIF_F_SG | \ 642#define GRE_FEATURES (NETIF_F_SG | \
1372 NETIF_F_FRAGLIST | \ 643 NETIF_F_FRAGLIST | \
1373 NETIF_F_HIGHDMA | \ 644 NETIF_F_HIGHDMA | \
@@ -1376,35 +647,49 @@ static void ipgre_dev_free(struct net_device *dev)
1376static void ipgre_tunnel_setup(struct net_device *dev) 647static void ipgre_tunnel_setup(struct net_device *dev)
1377{ 648{
1378 dev->netdev_ops = &ipgre_netdev_ops; 649 dev->netdev_ops = &ipgre_netdev_ops;
1379 dev->destructor = ipgre_dev_free; 650 ip_tunnel_setup(dev, ipgre_net_id);
651}
1380 652
1381 dev->type = ARPHRD_IPGRE; 653static void __gre_tunnel_init(struct net_device *dev)
1382 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 654{
655 struct ip_tunnel *tunnel;
656
657 tunnel = netdev_priv(dev);
658 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
659 tunnel->parms.iph.protocol = IPPROTO_GRE;
660
661 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1383 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 662 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1384 dev->flags = IFF_NOARP;
1385 dev->iflink = 0; 663 dev->iflink = 0;
1386 dev->addr_len = 4;
1387 dev->features |= NETIF_F_NETNS_LOCAL;
1388 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1389 664
1390 dev->features |= GRE_FEATURES; 665 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
1391 dev->hw_features |= GRE_FEATURES; 666 dev->hw_features |= GRE_FEATURES;
667
668 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
669 /* TCP offload with GRE SEQ is not supported. */
670 dev->features |= NETIF_F_GSO_SOFTWARE;
671 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
672 /* Can use a lockless transmit, unless we generate
673 * output sequences
674 */
675 dev->features |= NETIF_F_LLTX;
676 }
1392} 677}
1393 678
1394static int ipgre_tunnel_init(struct net_device *dev) 679static int ipgre_tunnel_init(struct net_device *dev)
1395{ 680{
1396 struct ip_tunnel *tunnel; 681 struct ip_tunnel *tunnel = netdev_priv(dev);
1397 struct iphdr *iph; 682 struct iphdr *iph = &tunnel->parms.iph;
1398 int err;
1399 683
1400 tunnel = netdev_priv(dev); 684 __gre_tunnel_init(dev);
1401 iph = &tunnel->parms.iph;
1402 685
1403 tunnel->dev = dev; 686 memcpy(dev->dev_addr, &iph->saddr, 4);
1404 strcpy(tunnel->parms.name, dev->name); 687 memcpy(dev->broadcast, &iph->daddr, 4);
1405 688
1406 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 689 dev->type = ARPHRD_IPGRE;
1407 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 690 dev->flags = IFF_NOARP;
691 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
692 dev->addr_len = 4;
1408 693
1409 if (iph->daddr) { 694 if (iph->daddr) {
1410#ifdef CONFIG_NET_IPGRE_BROADCAST 695#ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -1418,106 +703,30 @@ static int ipgre_tunnel_init(struct net_device *dev)
1418 } else 703 } else
1419 dev->header_ops = &ipgre_header_ops; 704 dev->header_ops = &ipgre_header_ops;
1420 705
1421 dev->tstats = alloc_percpu(struct pcpu_tstats); 706 return ip_tunnel_init(dev);
1422 if (!dev->tstats)
1423 return -ENOMEM;
1424
1425 err = gro_cells_init(&tunnel->gro_cells, dev);
1426 if (err) {
1427 free_percpu(dev->tstats);
1428 return err;
1429 }
1430
1431 return 0;
1432} 707}
1433 708
1434static void ipgre_fb_tunnel_init(struct net_device *dev)
1435{
1436 struct ip_tunnel *tunnel = netdev_priv(dev);
1437 struct iphdr *iph = &tunnel->parms.iph;
1438
1439 tunnel->dev = dev;
1440 strcpy(tunnel->parms.name, dev->name);
1441
1442 iph->version = 4;
1443 iph->protocol = IPPROTO_GRE;
1444 iph->ihl = 5;
1445 tunnel->hlen = sizeof(struct iphdr) + 4;
1446
1447 dev_hold(dev);
1448}
1449
1450
1451static const struct gre_protocol ipgre_protocol = { 709static const struct gre_protocol ipgre_protocol = {
1452 .handler = ipgre_rcv, 710 .handler = ipgre_rcv,
1453 .err_handler = ipgre_err, 711 .err_handler = ipgre_err,
1454}; 712};
1455 713
1456static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1457{
1458 int prio;
1459
1460 for (prio = 0; prio < 4; prio++) {
1461 int h;
1462 for (h = 0; h < HASH_SIZE; h++) {
1463 struct ip_tunnel *t;
1464
1465 t = rtnl_dereference(ign->tunnels[prio][h]);
1466
1467 while (t != NULL) {
1468 unregister_netdevice_queue(t->dev, head);
1469 t = rtnl_dereference(t->next);
1470 }
1471 }
1472 }
1473}
1474
1475static int __net_init ipgre_init_net(struct net *net) 714static int __net_init ipgre_init_net(struct net *net)
1476{ 715{
1477 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 716 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1478 int err;
1479
1480 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1481 ipgre_tunnel_setup);
1482 if (!ign->fb_tunnel_dev) {
1483 err = -ENOMEM;
1484 goto err_alloc_dev;
1485 }
1486 dev_net_set(ign->fb_tunnel_dev, net);
1487
1488 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1489 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1490
1491 if ((err = register_netdev(ign->fb_tunnel_dev)))
1492 goto err_reg_dev;
1493
1494 rcu_assign_pointer(ign->tunnels_wc[0],
1495 netdev_priv(ign->fb_tunnel_dev));
1496 return 0;
1497
1498err_reg_dev:
1499 ipgre_dev_free(ign->fb_tunnel_dev);
1500err_alloc_dev:
1501 return err;
1502} 717}
1503 718
1504static void __net_exit ipgre_exit_net(struct net *net) 719static void __net_exit ipgre_exit_net(struct net *net)
1505{ 720{
1506 struct ipgre_net *ign; 721 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
1507 LIST_HEAD(list); 722 ip_tunnel_delete_net(itn);
1508
1509 ign = net_generic(net, ipgre_net_id);
1510 rtnl_lock();
1511 ipgre_destroy_tunnels(ign, &list);
1512 unregister_netdevice_many(&list);
1513 rtnl_unlock();
1514} 723}
1515 724
1516static struct pernet_operations ipgre_net_ops = { 725static struct pernet_operations ipgre_net_ops = {
1517 .init = ipgre_init_net, 726 .init = ipgre_init_net,
1518 .exit = ipgre_exit_net, 727 .exit = ipgre_exit_net,
1519 .id = &ipgre_net_id, 728 .id = &ipgre_net_id,
1520 .size = sizeof(struct ipgre_net), 729 .size = sizeof(struct ip_tunnel_net),
1521}; 730};
1522 731
1523static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 732static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1562,8 +771,8 @@ out:
1562 return ipgre_tunnel_validate(tb, data); 771 return ipgre_tunnel_validate(tb, data);
1563} 772}
1564 773
1565static void ipgre_netlink_parms(struct nlattr *data[], 774static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
1566 struct ip_tunnel_parm *parms) 775 struct ip_tunnel_parm *parms)
1567{ 776{
1568 memset(parms, 0, sizeof(*parms)); 777 memset(parms, 0, sizeof(*parms));
1569 778
@@ -1576,10 +785,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],
1576 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 785 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1577 786
1578 if (data[IFLA_GRE_IFLAGS]) 787 if (data[IFLA_GRE_IFLAGS])
1579 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 788 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1580 789
1581 if (data[IFLA_GRE_OFLAGS]) 790 if (data[IFLA_GRE_OFLAGS])
1582 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 791 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1583 792
1584 if (data[IFLA_GRE_IKEY]) 793 if (data[IFLA_GRE_IKEY])
1585 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 794 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1603,148 +812,46 @@ static void ipgre_netlink_parms(struct nlattr *data[],
1603 parms->iph.frag_off = htons(IP_DF); 812 parms->iph.frag_off = htons(IP_DF);
1604} 813}
1605 814
1606static int ipgre_tap_init(struct net_device *dev) 815static int gre_tap_init(struct net_device *dev)
1607{ 816{
1608 struct ip_tunnel *tunnel; 817 __gre_tunnel_init(dev);
1609
1610 tunnel = netdev_priv(dev);
1611
1612 tunnel->dev = dev;
1613 strcpy(tunnel->parms.name, dev->name);
1614
1615 ipgre_tunnel_bind_dev(dev);
1616 818
1617 dev->tstats = alloc_percpu(struct pcpu_tstats); 819 return ip_tunnel_init(dev);
1618 if (!dev->tstats)
1619 return -ENOMEM;
1620
1621 return 0;
1622} 820}
1623 821
1624static const struct net_device_ops ipgre_tap_netdev_ops = { 822static const struct net_device_ops gre_tap_netdev_ops = {
1625 .ndo_init = ipgre_tap_init, 823 .ndo_init = gre_tap_init,
1626 .ndo_uninit = ipgre_tunnel_uninit, 824 .ndo_uninit = ip_tunnel_uninit,
1627 .ndo_start_xmit = ipgre_tunnel_xmit, 825 .ndo_start_xmit = gre_tap_xmit,
1628 .ndo_set_mac_address = eth_mac_addr, 826 .ndo_set_mac_address = eth_mac_addr,
1629 .ndo_validate_addr = eth_validate_addr, 827 .ndo_validate_addr = eth_validate_addr,
1630 .ndo_change_mtu = ipgre_tunnel_change_mtu, 828 .ndo_change_mtu = ip_tunnel_change_mtu,
1631 .ndo_get_stats64 = ipgre_get_stats64, 829 .ndo_get_stats64 = ip_tunnel_get_stats64,
1632}; 830};
1633 831
1634static void ipgre_tap_setup(struct net_device *dev) 832static void ipgre_tap_setup(struct net_device *dev)
1635{ 833{
1636
1637 ether_setup(dev); 834 ether_setup(dev);
1638 835 dev->netdev_ops = &gre_tap_netdev_ops;
1639 dev->netdev_ops = &ipgre_tap_netdev_ops; 836 ip_tunnel_setup(dev, gre_tap_net_id);
1640 dev->destructor = ipgre_dev_free;
1641
1642 dev->iflink = 0;
1643 dev->features |= NETIF_F_NETNS_LOCAL;
1644
1645 dev->features |= GRE_FEATURES;
1646 dev->hw_features |= GRE_FEATURES;
1647} 837}
1648 838
1649static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], 839static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1650 struct nlattr *data[]) 840 struct nlattr *tb[], struct nlattr *data[])
1651{ 841{
1652 struct ip_tunnel *nt; 842 struct ip_tunnel_parm p;
1653 struct net *net = dev_net(dev);
1654 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1655 int mtu;
1656 int err;
1657
1658 nt = netdev_priv(dev);
1659 ipgre_netlink_parms(data, &nt->parms);
1660
1661 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1662 return -EEXIST;
1663
1664 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1665 eth_hw_addr_random(dev);
1666
1667 mtu = ipgre_tunnel_bind_dev(dev);
1668 if (!tb[IFLA_MTU])
1669 dev->mtu = mtu;
1670
1671 /* Can use a lockless transmit, unless we generate output sequences */
1672 if (!(nt->parms.o_flags & GRE_SEQ))
1673 dev->features |= NETIF_F_LLTX;
1674
1675 err = register_netdevice(dev);
1676 if (err)
1677 goto out;
1678
1679 dev_hold(dev);
1680 ipgre_tunnel_link(ign, nt);
1681 843
1682out: 844 ipgre_netlink_parms(data, tb, &p);
1683 return err; 845 return ip_tunnel_newlink(dev, tb, &p);
1684} 846}
1685 847
1686static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 848static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1687 struct nlattr *data[]) 849 struct nlattr *data[])
1688{ 850{
1689 struct ip_tunnel *t, *nt;
1690 struct net *net = dev_net(dev);
1691 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1692 struct ip_tunnel_parm p; 851 struct ip_tunnel_parm p;
1693 int mtu;
1694
1695 if (dev == ign->fb_tunnel_dev)
1696 return -EINVAL;
1697
1698 nt = netdev_priv(dev);
1699 ipgre_netlink_parms(data, &p);
1700
1701 t = ipgre_tunnel_locate(net, &p, 0);
1702
1703 if (t) {
1704 if (t->dev != dev)
1705 return -EEXIST;
1706 } else {
1707 t = nt;
1708
1709 if (dev->type != ARPHRD_ETHER) {
1710 unsigned int nflags = 0;
1711
1712 if (ipv4_is_multicast(p.iph.daddr))
1713 nflags = IFF_BROADCAST;
1714 else if (p.iph.daddr)
1715 nflags = IFF_POINTOPOINT;
1716
1717 if ((dev->flags ^ nflags) &
1718 (IFF_POINTOPOINT | IFF_BROADCAST))
1719 return -EINVAL;
1720 }
1721 852
1722 ipgre_tunnel_unlink(ign, t); 853 ipgre_netlink_parms(data, tb, &p);
1723 t->parms.iph.saddr = p.iph.saddr; 854 return ip_tunnel_changelink(dev, tb, &p);
1724 t->parms.iph.daddr = p.iph.daddr;
1725 t->parms.i_key = p.i_key;
1726 if (dev->type != ARPHRD_ETHER) {
1727 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1728 memcpy(dev->broadcast, &p.iph.daddr, 4);
1729 }
1730 ipgre_tunnel_link(ign, t);
1731 netdev_state_change(dev);
1732 }
1733
1734 t->parms.o_key = p.o_key;
1735 t->parms.iph.ttl = p.iph.ttl;
1736 t->parms.iph.tos = p.iph.tos;
1737 t->parms.iph.frag_off = p.iph.frag_off;
1738
1739 if (t->parms.link != p.link) {
1740 t->parms.link = p.link;
1741 mtu = ipgre_tunnel_bind_dev(dev);
1742 if (!tb[IFLA_MTU])
1743 dev->mtu = mtu;
1744 netdev_state_change(dev);
1745 }
1746
1747 return 0;
1748} 855}
1749 856
1750static size_t ipgre_get_size(const struct net_device *dev) 857static size_t ipgre_get_size(const struct net_device *dev)
@@ -1779,8 +886,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1779 struct ip_tunnel_parm *p = &t->parms; 886 struct ip_tunnel_parm *p = &t->parms;
1780 887
1781 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 888 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1782 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || 889 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1783 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || 890 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1784 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 891 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1785 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 892 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1786 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 893 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
@@ -1818,6 +925,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1818 .validate = ipgre_tunnel_validate, 925 .validate = ipgre_tunnel_validate,
1819 .newlink = ipgre_newlink, 926 .newlink = ipgre_newlink,
1820 .changelink = ipgre_changelink, 927 .changelink = ipgre_changelink,
928 .dellink = ip_tunnel_dellink,
1821 .get_size = ipgre_get_size, 929 .get_size = ipgre_get_size,
1822 .fill_info = ipgre_fill_info, 930 .fill_info = ipgre_fill_info,
1823}; 931};
@@ -1831,13 +939,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1831 .validate = ipgre_tap_validate, 939 .validate = ipgre_tap_validate,
1832 .newlink = ipgre_newlink, 940 .newlink = ipgre_newlink,
1833 .changelink = ipgre_changelink, 941 .changelink = ipgre_changelink,
942 .dellink = ip_tunnel_dellink,
1834 .get_size = ipgre_get_size, 943 .get_size = ipgre_get_size,
1835 .fill_info = ipgre_fill_info, 944 .fill_info = ipgre_fill_info,
1836}; 945};
1837 946
1838/* 947static int __net_init ipgre_tap_init_net(struct net *net)
1839 * And now the modules code and kernel interface. 948{
1840 */ 949 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
950}
951
952static void __net_exit ipgre_tap_exit_net(struct net *net)
953{
954 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
955 ip_tunnel_delete_net(itn);
956}
957
958static struct pernet_operations ipgre_tap_net_ops = {
959 .init = ipgre_tap_init_net,
960 .exit = ipgre_tap_exit_net,
961 .id = &gre_tap_net_id,
962 .size = sizeof(struct ip_tunnel_net),
963};
1841 964
1842static int __init ipgre_init(void) 965static int __init ipgre_init(void)
1843{ 966{
@@ -1849,6 +972,10 @@ static int __init ipgre_init(void)
1849 if (err < 0) 972 if (err < 0)
1850 return err; 973 return err;
1851 974
975 err = register_pernet_device(&ipgre_tap_net_ops);
976 if (err < 0)
977 goto pnet_tap_faied;
978
1852 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 979 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1853 if (err < 0) { 980 if (err < 0) {
1854 pr_info("%s: can't add protocol\n", __func__); 981 pr_info("%s: can't add protocol\n", __func__);
@@ -1863,16 +990,17 @@ static int __init ipgre_init(void)
1863 if (err < 0) 990 if (err < 0)
1864 goto tap_ops_failed; 991 goto tap_ops_failed;
1865 992
1866out: 993 return 0;
1867 return err;
1868 994
1869tap_ops_failed: 995tap_ops_failed:
1870 rtnl_link_unregister(&ipgre_link_ops); 996 rtnl_link_unregister(&ipgre_link_ops);
1871rtnl_link_failed: 997rtnl_link_failed:
1872 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 998 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1873add_proto_failed: 999add_proto_failed:
1000 unregister_pernet_device(&ipgre_tap_net_ops);
1001pnet_tap_faied:
1874 unregister_pernet_device(&ipgre_net_ops); 1002 unregister_pernet_device(&ipgre_net_ops);
1875 goto out; 1003 return err;
1876} 1004}
1877 1005
1878static void __exit ipgre_fini(void) 1006static void __exit ipgre_fini(void)
@@ -1881,6 +1009,7 @@ static void __exit ipgre_fini(void)
1881 rtnl_link_unregister(&ipgre_link_ops); 1009 rtnl_link_unregister(&ipgre_link_ops);
1882 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) 1010 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1883 pr_info("%s: can't remove protocol\n", __func__); 1011 pr_info("%s: can't remove protocol\n", __func__);
1012 unregister_pernet_device(&ipgre_tap_net_ops);
1884 unregister_pernet_device(&ipgre_net_ops); 1013 unregister_pernet_device(&ipgre_net_ops);
1885} 1014}
1886 1015
@@ -1890,3 +1019,4 @@ MODULE_LICENSE("GPL");
1890MODULE_ALIAS_RTNL_LINK("gre"); 1019MODULE_ALIAS_RTNL_LINK("gre");
1891MODULE_ALIAS_RTNL_LINK("gretap"); 1020MODULE_ALIAS_RTNL_LINK("gretap");
1892MODULE_ALIAS_NETDEV("gre0"); 1021MODULE_ALIAS_NETDEV("gre0");
1022MODULE_ALIAS_NETDEV("gretap0");
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
new file mode 100644
index 000000000000..9d96b6853f21
--- /dev/null
+++ b/net/ipv4/ip_tunnel.c
@@ -0,0 +1,1035 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71/* Often modified stats are per cpu, other are shared (netdev->stats) */
72struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
74{
75 int i;
76
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
81
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
94 }
95
96 tot->multicast = dev->stats.multicast;
97
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
103
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
109
110 tot->collisions = dev->stats.collisions;
111
112 return tot;
113}
114EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
118{
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
127}
128
129/* Fallback tunnel: no source, no destination, no key, no options
130
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
135
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
139*/
140struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
144{
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
148
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
151
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
157
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
160
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
165 }
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 !(t->dev->flags & IFF_UP))
170 continue;
171
172 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 continue;
174
175 if (t->parms.link == link)
176 return t;
177 else if (!cand)
178 cand = t;
179 }
180
181 hash = ip_tunnel_hash(itn, key, 0);
182 head = &itn->tunnels[hash];
183
184 hlist_for_each_entry_rcu(t, head, hash_node) {
185 if ((local != t->parms.iph.saddr &&
186 (local != t->parms.iph.daddr ||
187 !ipv4_is_multicast(local))) ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (!ip_tunnel_key_match(&t->parms, flags, key))
192 continue;
193
194 if (t->parms.link == link)
195 return t;
196 else if (!cand)
197 cand = t;
198 }
199
200 if (flags & TUNNEL_NO_KEY)
201 goto skip_key_lookup;
202
203 hlist_for_each_entry_rcu(t, head, hash_node) {
204 if (t->parms.i_key != key ||
205 !(t->dev->flags & IFF_UP))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214skip_key_lookup:
215 if (cand)
216 return cand;
217
218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219 return netdev_priv(itn->fb_tunnel_dev);
220
221
222 return NULL;
223}
224EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227 struct ip_tunnel_parm *parms)
228{
229 unsigned int h;
230 __be32 remote;
231
232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233 remote = parms->iph.daddr;
234 else
235 remote = 0;
236
237 h = ip_tunnel_hash(itn, parms->i_key, remote);
238 return &itn->tunnels[h];
239}
240
241static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242{
243 struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245 hlist_add_head_rcu(&t->hash_node, head);
246}
247
248static void ip_tunnel_del(struct ip_tunnel *t)
249{
250 hlist_del_init_rcu(&t->hash_node);
251}
252
253static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254 struct ip_tunnel_parm *parms,
255 int type)
256{
257 __be32 remote = parms->iph.daddr;
258 __be32 local = parms->iph.saddr;
259 __be32 key = parms->i_key;
260 int link = parms->link;
261 struct ip_tunnel *t = NULL;
262 struct hlist_head *head = ip_bucket(itn, parms);
263
264 hlist_for_each_entry_rcu(t, head, hash_node) {
265 if (local == t->parms.iph.saddr &&
266 remote == t->parms.iph.daddr &&
267 key == t->parms.i_key &&
268 link == t->parms.link &&
269 type == t->dev->type)
270 break;
271 }
272 return t;
273}
274
275static struct net_device *__ip_tunnel_create(struct net *net,
276 const struct rtnl_link_ops *ops,
277 struct ip_tunnel_parm *parms)
278{
279 int err;
280 struct ip_tunnel *tunnel;
281 struct net_device *dev;
282 char name[IFNAMSIZ];
283
284 if (parms->name[0])
285 strlcpy(name, parms->name, IFNAMSIZ);
286 else {
287 if (strlen(ops->kind) + 3 >= IFNAMSIZ) {
288 err = -E2BIG;
289 goto failed;
290 }
291 strlcpy(name, ops->kind, IFNAMSIZ);
292 strncat(name, "%d", 2);
293 }
294
295 ASSERT_RTNL();
296 dev = alloc_netdev(ops->priv_size, name, ops->setup);
297 if (!dev) {
298 err = -ENOMEM;
299 goto failed;
300 }
301 dev_net_set(dev, net);
302
303 dev->rtnl_link_ops = ops;
304
305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms;
307
308 err = register_netdevice(dev);
309 if (err)
310 goto failed_free;
311
312 return dev;
313
314failed_free:
315 free_netdev(dev);
316failed:
317 return ERR_PTR(err);
318}
319
320static inline struct rtable *ip_route_output_tunnel(struct net *net,
321 struct flowi4 *fl4,
322 int proto,
323 __be32 daddr, __be32 saddr,
324 __be32 key, __u8 tos, int oif)
325{
326 memset(fl4, 0, sizeof(*fl4));
327 fl4->flowi4_oif = oif;
328 fl4->daddr = daddr;
329 fl4->saddr = saddr;
330 fl4->flowi4_tos = tos;
331 fl4->flowi4_proto = proto;
332 fl4->fl4_gre_key = key;
333 return ip_route_output_key(net, fl4);
334}
335
336static int ip_tunnel_bind_dev(struct net_device *dev)
337{
338 struct net_device *tdev = NULL;
339 struct ip_tunnel *tunnel = netdev_priv(dev);
340 const struct iphdr *iph;
341 int hlen = LL_MAX_HEADER;
342 int mtu = ETH_DATA_LEN;
343 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345 iph = &tunnel->parms.iph;
346
347 /* Guess output device to choose reasonable mtu and needed_headroom */
348 if (iph->daddr) {
349 struct flowi4 fl4;
350 struct rtable *rt;
351
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr,
355 tunnel->parms.o_key,
356 RT_TOS(iph->tos),
357 tunnel->parms.link);
358 if (!IS_ERR(rt)) {
359 tdev = rt->dst.dev;
360 ip_rt_put(rt);
361 }
362 if (dev->type != ARPHRD_ETHER)
363 dev->flags |= IFF_POINTOPOINT;
364 }
365
366 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom;
371 mtu = tdev->mtu;
372 }
373 dev->iflink = tunnel->parms.link;
374
375 dev->needed_headroom = t_hlen + hlen;
376 mtu -= (dev->hard_header_len + t_hlen);
377
378 if (mtu < 68)
379 mtu = 68;
380
381 return mtu;
382}
383
384static struct ip_tunnel *ip_tunnel_create(struct net *net,
385 struct ip_tunnel_net *itn,
386 struct ip_tunnel_parm *parms)
387{
388 struct ip_tunnel *nt, *fbt;
389 struct net_device *dev;
390
391 BUG_ON(!itn->fb_tunnel_dev);
392 fbt = netdev_priv(itn->fb_tunnel_dev);
393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394 if (IS_ERR(dev))
395 return NULL;
396
397 dev->mtu = ip_tunnel_bind_dev(dev);
398
399 nt = netdev_priv(dev);
400 ip_tunnel_add(itn, nt);
401 return nt;
402}
403
404int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405 const struct tnl_ptk_info *tpi, bool log_ecn_error)
406{
407 struct pcpu_tstats *tstats;
408 const struct iphdr *iph = ip_hdr(skb);
409 int err;
410
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */
421 if (rt_is_output_route(skb_rtable(skb)))
422 goto drop;
423 tunnel->dev->stats.multicast++;
424 skb->pkt_type = PACKET_BROADCAST;
425 }
426#endif
427
428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430 tunnel->dev->stats.rx_crc_errors++;
431 tunnel->dev->stats.rx_errors++;
432 goto drop;
433 }
434
435 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436 if (!(tpi->flags&TUNNEL_SEQ) ||
437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438 tunnel->dev->stats.rx_fifo_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
441 }
442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 }
444
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
471 }
472 }
473
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
479
480 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0;
482
483drop:
484 kfree_skb(skb);
485 return 0;
486}
487EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490 const struct iphdr *tnl_params)
491{
492 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4;
496 u8 tos, ttl;
497 __be16 df;
498 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst;
502 int mtu;
503
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505
506 dst = tnl_params->daddr;
507 if (dst == 0) {
508 /* NBMA tunnel */
509
510 if (skb_dst(skb) == NULL) {
511 dev->stats.tx_fifo_errors++;
512 goto tx_error;
513 }
514
515 if (skb->protocol == htons(ETH_P_IP)) {
516 rt = skb_rtable(skb);
517 dst = rt_nexthop(rt, inner_iph->daddr);
518 }
519#if IS_ENABLED(CONFIG_IPV6)
520 else if (skb->protocol == htons(ETH_P_IPV6)) {
521 const struct in6_addr *addr6;
522 struct neighbour *neigh;
523 bool do_tx_error_icmp;
524 int addr_type;
525
526 neigh = dst_neigh_lookup(skb_dst(skb),
527 &ipv6_hdr(skb)->daddr);
528 if (neigh == NULL)
529 goto tx_error;
530
531 addr6 = (const struct in6_addr *)&neigh->primary_key;
532 addr_type = ipv6_addr_type(addr6);
533
534 if (addr_type == IPV6_ADDR_ANY) {
535 addr6 = &ipv6_hdr(skb)->daddr;
536 addr_type = ipv6_addr_type(addr6);
537 }
538
539 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
540 do_tx_error_icmp = true;
541 else {
542 do_tx_error_icmp = false;
543 dst = addr6->s6_addr32[3];
544 }
545 neigh_release(neigh);
546 if (do_tx_error_icmp)
547 goto tx_error_icmp;
548 }
549#endif
550 else
551 goto tx_error;
552 }
553
554 tos = tnl_params->tos;
555 if (tos & 0x1) {
556 tos &= ~0x1;
557 if (skb->protocol == htons(ETH_P_IP))
558 tos = inner_iph->tos;
559 else if (skb->protocol == htons(ETH_P_IPV6))
560 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
561 }
562
563 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
564 tunnel->parms.iph.protocol,
565 dst, tnl_params->saddr,
566 tunnel->parms.o_key,
567 RT_TOS(tos),
568 tunnel->parms.link);
569 if (IS_ERR(rt)) {
570 dev->stats.tx_carrier_errors++;
571 goto tx_error;
572 }
573 tdev = rt->dst.dev;
574
575 if (tdev == dev) {
576 ip_rt_put(rt);
577 dev->stats.collisions++;
578 goto tx_error;
579 }
580
581 df = tnl_params->frag_off;
582
583 if (df)
584 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
585 - sizeof(struct iphdr);
586 else
587 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
588
589 if (skb_dst(skb))
590 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
591
592 if (skb->protocol == htons(ETH_P_IP)) {
593 df |= (inner_iph->frag_off&htons(IP_DF));
594
595 if (!skb_is_gso(skb) &&
596 (inner_iph->frag_off&htons(IP_DF)) &&
597 mtu < ntohs(inner_iph->tot_len)) {
598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
599 ip_rt_put(rt);
600 goto tx_error;
601 }
602 }
603#if IS_ENABLED(CONFIG_IPV6)
604 else if (skb->protocol == htons(ETH_P_IPV6)) {
605 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
606
607 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
608 mtu >= IPV6_MIN_MTU) {
609 if ((tunnel->parms.iph.daddr &&
610 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
611 rt6->rt6i_dst.plen == 128) {
612 rt6->rt6i_flags |= RTF_MODIFIED;
613 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
614 }
615 }
616
617 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
618 mtu < skb->len) {
619 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
620 ip_rt_put(rt);
621 goto tx_error;
622 }
623 }
624#endif
625
626 if (tunnel->err_count > 0) {
627 if (time_before(jiffies,
628 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
629 tunnel->err_count--;
630
631 dst_link_failure(skb);
632 } else
633 tunnel->err_count = 0;
634 }
635
636 ttl = tnl_params->ttl;
637 if (ttl == 0) {
638 if (skb->protocol == htons(ETH_P_IP))
639 ttl = inner_iph->ttl;
640#if IS_ENABLED(CONFIG_IPV6)
641 else if (skb->protocol == htons(ETH_P_IPV6))
642 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
643#endif
644 else
645 ttl = ip4_dst_hoplimit(&rt->dst);
646 }
647
648 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
649 + rt->dst.header_len;
650 if (max_headroom > dev->needed_headroom) {
651 dev->needed_headroom = max_headroom;
652 if (skb_cow_head(skb, dev->needed_headroom)) {
653 dev->stats.tx_dropped++;
654 dev_kfree_skb(skb);
655 return;
656 }
657 }
658
659 skb_dst_drop(skb);
660 skb_dst_set(skb, &rt->dst);
661 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
662
663 /* Push down and install the IP header. */
664 skb_push(skb, sizeof(struct iphdr));
665 skb_reset_network_header(skb);
666
667 iph = ip_hdr(skb);
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
669
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = tnl_params->protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680 iptunnel_xmit(skb, dev);
681 return;
682
683#if IS_ENABLED(CONFIG_IPV6)
684tx_error_icmp:
685 dst_link_failure(skb);
686#endif
687tx_error:
688 dev->stats.tx_errors++;
689 dev_kfree_skb(skb);
690}
691EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
692
693static void ip_tunnel_update(struct ip_tunnel_net *itn,
694 struct ip_tunnel *t,
695 struct net_device *dev,
696 struct ip_tunnel_parm *p,
697 bool set_mtu)
698{
699 ip_tunnel_del(t);
700 t->parms.iph.saddr = p->iph.saddr;
701 t->parms.iph.daddr = p->iph.daddr;
702 t->parms.i_key = p->i_key;
703 t->parms.o_key = p->o_key;
704 if (dev->type != ARPHRD_ETHER) {
705 memcpy(dev->dev_addr, &p->iph.saddr, 4);
706 memcpy(dev->broadcast, &p->iph.daddr, 4);
707 }
708 ip_tunnel_add(itn, t);
709
710 t->parms.iph.ttl = p->iph.ttl;
711 t->parms.iph.tos = p->iph.tos;
712 t->parms.iph.frag_off = p->iph.frag_off;
713
714 if (t->parms.link != p->link) {
715 int mtu;
716
717 t->parms.link = p->link;
718 mtu = ip_tunnel_bind_dev(dev);
719 if (set_mtu)
720 dev->mtu = mtu;
721 }
722 netdev_state_change(dev);
723}
724
725int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726{
727 int err = 0;
728 struct ip_tunnel *t;
729 struct net *net = dev_net(dev);
730 struct ip_tunnel *tunnel = netdev_priv(dev);
731 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
732
733 BUG_ON(!itn->fb_tunnel_dev);
734 switch (cmd) {
735 case SIOCGETTUNNEL:
736 t = NULL;
737 if (dev == itn->fb_tunnel_dev)
738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739 if (t == NULL)
740 t = netdev_priv(dev);
741 memcpy(p, &t->parms, sizeof(*p));
742 break;
743
744 case SIOCADDTUNNEL:
745 case SIOCCHGTUNNEL:
746 err = -EPERM;
747 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748 goto done;
749 if (p->iph.ttl)
750 p->iph.frag_off |= htons(IP_DF);
751 if (!(p->i_flags&TUNNEL_KEY))
752 p->i_key = 0;
753 if (!(p->o_flags&TUNNEL_KEY))
754 p->o_key = 0;
755
756 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
757
758 if (!t && (cmd == SIOCADDTUNNEL))
759 t = ip_tunnel_create(net, itn, p);
760
761 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762 if (t != NULL) {
763 if (t->dev != dev) {
764 err = -EEXIST;
765 break;
766 }
767 } else {
768 unsigned int nflags = 0;
769
770 if (ipv4_is_multicast(p->iph.daddr))
771 nflags = IFF_BROADCAST;
772 else if (p->iph.daddr)
773 nflags = IFF_POINTOPOINT;
774
775 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776 err = -EINVAL;
777 break;
778 }
779
780 t = netdev_priv(dev);
781 }
782 }
783
784 if (t) {
785 err = 0;
786 ip_tunnel_update(itn, t, dev, p, true);
787 } else
788 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789 break;
790
791 case SIOCDELTUNNEL:
792 err = -EPERM;
793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794 goto done;
795
796 if (dev == itn->fb_tunnel_dev) {
797 err = -ENOENT;
798 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799 if (t == NULL)
800 goto done;
801 err = -EPERM;
802 if (t == netdev_priv(itn->fb_tunnel_dev))
803 goto done;
804 dev = t->dev;
805 }
806 unregister_netdevice(dev);
807 err = 0;
808 break;
809
810 default:
811 err = -EINVAL;
812 }
813
814done:
815 return err;
816}
817EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
818
819int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
820{
821 struct ip_tunnel *tunnel = netdev_priv(dev);
822 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
823
824 if (new_mtu < 68 ||
825 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826 return -EINVAL;
827 dev->mtu = new_mtu;
828 return 0;
829}
830EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
831
832static void ip_tunnel_dev_free(struct net_device *dev)
833{
834 struct ip_tunnel *tunnel = netdev_priv(dev);
835
836 gro_cells_destroy(&tunnel->gro_cells);
837 free_percpu(dev->tstats);
838 free_netdev(dev);
839}
840
841void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
842{
843 struct net *net = dev_net(dev);
844 struct ip_tunnel *tunnel = netdev_priv(dev);
845 struct ip_tunnel_net *itn;
846
847 itn = net_generic(net, tunnel->ip_tnl_net_id);
848
849 if (itn->fb_tunnel_dev != dev) {
850 ip_tunnel_del(netdev_priv(dev));
851 unregister_netdevice_queue(dev, head);
852 }
853}
854EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855
856int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857 struct rtnl_link_ops *ops, char *devname)
858{
859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860 struct ip_tunnel_parm parms;
861
862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
863 if (!itn->tunnels)
864 return -ENOMEM;
865
866 if (!ops) {
867 itn->fb_tunnel_dev = NULL;
868 return 0;
869 }
870 memset(&parms, 0, sizeof(parms));
871 if (devname)
872 strlcpy(parms.name, devname, IFNAMSIZ);
873
874 rtnl_lock();
875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876 rtnl_unlock();
877 if (IS_ERR(itn->fb_tunnel_dev)) {
878 kfree(itn->tunnels);
879 return PTR_ERR(itn->fb_tunnel_dev);
880 }
881
882 return 0;
883}
884EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
886static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
887{
888 int h;
889
890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891 struct ip_tunnel *t;
892 struct hlist_node *n;
893 struct hlist_head *thead = &itn->tunnels[h];
894
895 hlist_for_each_entry_safe(t, n, thead, hash_node)
896 unregister_netdevice_queue(t->dev, head);
897 }
898 if (itn->fb_tunnel_dev)
899 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
900}
901
902void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn)
903{
904 LIST_HEAD(list);
905
906 rtnl_lock();
907 ip_tunnel_destroy(itn, &list);
908 unregister_netdevice_many(&list);
909 rtnl_unlock();
910 kfree(itn->tunnels);
911}
912EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
913
914int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
915 struct ip_tunnel_parm *p)
916{
917 struct ip_tunnel *nt;
918 struct net *net = dev_net(dev);
919 struct ip_tunnel_net *itn;
920 int mtu;
921 int err;
922
923 nt = netdev_priv(dev);
924 itn = net_generic(net, nt->ip_tnl_net_id);
925
926 if (ip_tunnel_find(itn, p, dev->type))
927 return -EEXIST;
928
929 nt->parms = *p;
930 err = register_netdevice(dev);
931 if (err)
932 goto out;
933
934 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
935 eth_hw_addr_random(dev);
936
937 mtu = ip_tunnel_bind_dev(dev);
938 if (!tb[IFLA_MTU])
939 dev->mtu = mtu;
940
941 ip_tunnel_add(itn, nt);
942
943out:
944 return err;
945}
946EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
947
948int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949 struct ip_tunnel_parm *p)
950{
951 struct ip_tunnel *t, *nt;
952 struct net *net = dev_net(dev);
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
955
956 if (dev == itn->fb_tunnel_dev)
957 return -EINVAL;
958
959 nt = netdev_priv(dev);
960
961 t = ip_tunnel_find(itn, p, dev->type);
962
963 if (t) {
964 if (t->dev != dev)
965 return -EEXIST;
966 } else {
967 t = nt;
968
969 if (dev->type != ARPHRD_ETHER) {
970 unsigned int nflags = 0;
971
972 if (ipv4_is_multicast(p->iph.daddr))
973 nflags = IFF_BROADCAST;
974 else if (p->iph.daddr)
975 nflags = IFF_POINTOPOINT;
976
977 if ((dev->flags ^ nflags) &
978 (IFF_POINTOPOINT | IFF_BROADCAST))
979 return -EINVAL;
980 }
981 }
982
983 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
984 return 0;
985}
986EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
987
988int ip_tunnel_init(struct net_device *dev)
989{
990 struct ip_tunnel *tunnel = netdev_priv(dev);
991 struct iphdr *iph = &tunnel->parms.iph;
992 int err;
993
994 dev->destructor = ip_tunnel_dev_free;
995 dev->tstats = alloc_percpu(struct pcpu_tstats);
996 if (!dev->tstats)
997 return -ENOMEM;
998
999 err = gro_cells_init(&tunnel->gro_cells, dev);
1000 if (err) {
1001 free_percpu(dev->tstats);
1002 return err;
1003 }
1004
1005 tunnel->dev = dev;
1006 strcpy(tunnel->parms.name, dev->name);
1007 iph->version = 4;
1008 iph->ihl = 5;
1009
1010 return 0;
1011}
1012EXPORT_SYMBOL_GPL(ip_tunnel_init);
1013
1014void ip_tunnel_uninit(struct net_device *dev)
1015{
1016 struct net *net = dev_net(dev);
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018 struct ip_tunnel_net *itn;
1019
1020 itn = net_generic(net, tunnel->ip_tnl_net_id);
1021 /* fb_tunnel_dev will be unregisted in net-exit call. */
1022 if (itn->fb_tunnel_dev != dev)
1023 ip_tunnel_del(netdev_priv(dev));
1024}
1025EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1026
1027/* Do least required initialization, rest of init is done in tunnel_init call */
1028void ip_tunnel_setup(struct net_device *dev, int net_id)
1029{
1030 struct ip_tunnel *tunnel = netdev_priv(dev);
1031 tunnel->ip_tnl_net_id = net_id;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1034
1035MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index c3a4233c0ac2..6a628fb3349f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -38,7 +38,7 @@
38#include <net/sock.h> 38#include <net/sock.h>
39#include <net/ip.h> 39#include <net/ip.h>
40#include <net/icmp.h> 40#include <net/icmp.h>
41#include <net/ipip.h> 41#include <net/ip_tunnels.h>
42#include <net/inet_ecn.h> 42#include <net/inet_ecn.h>
43#include <net/xfrm.h> 43#include <net/xfrm.h>
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 34e006fe2d87..a557d6ab127a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -111,7 +111,7 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/ip.h> 112#include <net/ip.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h> 114#include <net/ip_tunnels.h>
115#include <net/inet_ecn.h> 115#include <net/inet_ecn.h>
116#include <net/xfrm.h> 116#include <net/xfrm.h>
117#include <net/net_namespace.h> 117#include <net/net_namespace.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f95b3aa579e..fd61fe16679f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -61,7 +61,7 @@
61#include <linux/netfilter_ipv4.h> 61#include <linux/netfilter_ipv4.h>
62#include <linux/compat.h> 62#include <linux/compat.h>
63#include <linux/export.h> 63#include <linux/export.h>
64#include <net/ipip.h> 64#include <net/ip_tunnels.h>
65#include <net/checksum.h> 65#include <net/checksum.h>
66#include <net/netlink.h> 66#include <net/netlink.h>
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f56277f15903..ab5c7ad482cd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,7 +49,6 @@
49#include <net/udp.h> 49#include <net/udp.h>
50#include <net/udplite.h> 50#include <net/udplite.h>
51#include <net/tcp.h> 51#include <net/tcp.h>
52#include <net/ipip.h>
53#include <net/protocol.h> 52#include <net/protocol.h>
54#include <net/inet_common.h> 53#include <net/inet_common.h>
55#include <net/route.h> 54#include <net/route.h>
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6a6ba73ff265..df89ccaaceaa 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -38,6 +38,7 @@
38 38
39#include <net/sock.h> 39#include <net/sock.h>
40#include <net/ip.h> 40#include <net/ip.h>
41#include <net/ip_tunnels.h>
41#include <net/icmp.h> 42#include <net/icmp.h>
42#include <net/protocol.h> 43#include <net/protocol.h>
43#include <net/addrconf.h> 44#include <net/addrconf.h>
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index bef3fedfdc56..1e55866cead7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -47,6 +47,7 @@
47 47
48#include <net/icmp.h> 48#include <net/icmp.h>
49#include <net/ip.h> 49#include <net/ip.h>
50#include <net/ip_tunnels.h>
50#include <net/ipv6.h> 51#include <net/ipv6.h>
51#include <net/ip6_route.h> 52#include <net/ip6_route.h>
52#include <net/addrconf.h> 53#include <net/addrconf.h>
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 898e671a526b..ee4fc570cf2c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -49,7 +49,7 @@
49#include <net/ip.h> 49#include <net/ip.h>
50#include <net/udp.h> 50#include <net/udp.h>
51#include <net/icmp.h> 51#include <net/icmp.h>
52#include <net/ipip.h> 52#include <net/ip_tunnels.h>
53#include <net/inet_ecn.h> 53#include <net/inet_ecn.h>
54#include <net/xfrm.h> 54#include <net/xfrm.h>
55#include <net/dsfield.h> 55#include <net/dsfield.h>