aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_vti.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_vti.c')
-rw-r--r--net/ipv4/ip_vti.c325
1 files changed, 250 insertions, 75 deletions
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 48eafae51769..e453cb724a95 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -34,6 +34,7 @@
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/netfilter_ipv4.h> 35#include <linux/netfilter_ipv4.h>
36#include <linux/if_ether.h> 36#include <linux/if_ether.h>
37#include <linux/icmpv6.h>
37 38
38#include <net/sock.h> 39#include <net/sock.h>
39#include <net/ip.h> 40#include <net/ip.h>
@@ -49,8 +50,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;
49static int vti_net_id __read_mostly; 50static int vti_net_id __read_mostly;
50static int vti_tunnel_init(struct net_device *dev); 51static int vti_tunnel_init(struct net_device *dev);
51 52
52/* We dont digest the packet therefore let the packet pass */ 53static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
53static int vti_rcv(struct sk_buff *skb) 54 int encap_type)
54{ 55{
55 struct ip_tunnel *tunnel; 56 struct ip_tunnel *tunnel;
56 const struct iphdr *iph = ip_hdr(skb); 57 const struct iphdr *iph = ip_hdr(skb);
@@ -60,79 +61,120 @@ static int vti_rcv(struct sk_buff *skb)
60 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 61 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
61 iph->saddr, iph->daddr, 0); 62 iph->saddr, iph->daddr, 0);
62 if (tunnel != NULL) { 63 if (tunnel != NULL) {
63 struct pcpu_sw_netstats *tstats; 64 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
64 u32 oldmark = skb->mark; 65 goto drop;
65 int ret; 66
66 67 XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
67 68 skb->mark = be32_to_cpu(tunnel->parms.i_key);
68 /* temporarily mark the skb with the tunnel o_key, to 69
69 * only match policies with this mark. 70 return xfrm_input(skb, nexthdr, spi, encap_type);
70 */ 71 }
71 skb->mark = be32_to_cpu(tunnel->parms.o_key); 72
72 ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); 73 return -EINVAL;
73 skb->mark = oldmark; 74drop:
74 if (!ret) 75 kfree_skb(skb);
75 return -1; 76 return 0;
76 77}
77 tstats = this_cpu_ptr(tunnel->dev->tstats); 78
78 u64_stats_update_begin(&tstats->syncp); 79static int vti_rcv(struct sk_buff *skb)
79 tstats->rx_packets++; 80{
80 tstats->rx_bytes += skb->len; 81 XFRM_SPI_SKB_CB(skb)->family = AF_INET;
81 u64_stats_update_end(&tstats->syncp); 82 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
82 83
83 secpath_reset(skb); 84 return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
84 skb->dev = tunnel->dev; 85}
86
87static int vti_rcv_cb(struct sk_buff *skb, int err)
88{
89 unsigned short family;
90 struct net_device *dev;
91 struct pcpu_sw_netstats *tstats;
92 struct xfrm_state *x;
93 struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
94
95 if (!tunnel)
85 return 1; 96 return 1;
97
98 dev = tunnel->dev;
99
100 if (err) {
101 dev->stats.rx_errors++;
102 dev->stats.rx_dropped++;
103
104 return 0;
86 } 105 }
87 106
88 return -1; 107 x = xfrm_input_state(skb);
108 family = x->inner_mode->afinfo->family;
109
110 if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
111 return -EPERM;
112
113 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
114 skb->dev = dev;
115
116 tstats = this_cpu_ptr(dev->tstats);
117
118 u64_stats_update_begin(&tstats->syncp);
119 tstats->rx_packets++;
120 tstats->rx_bytes += skb->len;
121 u64_stats_update_end(&tstats->syncp);
122
123 return 0;
89} 124}
90 125
91/* This function assumes it is being called from dev_queue_xmit() 126static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
92 * and that skb is filled properly by that function. 127{
93 */ 128 xfrm_address_t *daddr = (xfrm_address_t *)&dst;
129 xfrm_address_t *saddr = (xfrm_address_t *)&src;
94 130
95static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 131 /* if there is no transform then this tunnel is not functional.
132 * Or if the xfrm is not mode tunnel.
133 */
134 if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
135 x->props.family != AF_INET)
136 return false;
137
138 if (!dst)
139 return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET);
140
141 if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET))
142 return false;
143
144 return true;
145}
146
147static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
148 struct flowi *fl)
96{ 149{
97 struct ip_tunnel *tunnel = netdev_priv(dev); 150 struct ip_tunnel *tunnel = netdev_priv(dev);
98 struct iphdr *tiph = &tunnel->parms.iph; 151 struct ip_tunnel_parm *parms = &tunnel->parms;
99 u8 tos; 152 struct dst_entry *dst = skb_dst(skb);
100 struct rtable *rt; /* Route to the other host */
101 struct net_device *tdev; /* Device to other host */ 153 struct net_device *tdev; /* Device to other host */
102 struct iphdr *old_iph = ip_hdr(skb);
103 __be32 dst = tiph->daddr;
104 struct flowi4 fl4;
105 int err; 154 int err;
106 155
107 if (skb->protocol != htons(ETH_P_IP)) 156 if (!dst) {
108 goto tx_error; 157 dev->stats.tx_carrier_errors++;
109 158 goto tx_error_icmp;
110 tos = old_iph->tos; 159 }
111 160
112 memset(&fl4, 0, sizeof(fl4)); 161 dst_hold(dst);
113 flowi4_init_output(&fl4, tunnel->parms.link, 162 dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
114 be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), 163 if (IS_ERR(dst)) {
115 RT_SCOPE_UNIVERSE,
116 IPPROTO_IPIP, 0,
117 dst, tiph->saddr, 0, 0);
118 rt = ip_route_output_key(dev_net(dev), &fl4);
119 if (IS_ERR(rt)) {
120 dev->stats.tx_carrier_errors++; 164 dev->stats.tx_carrier_errors++;
121 goto tx_error_icmp; 165 goto tx_error_icmp;
122 } 166 }
123 /* if there is no transform then this tunnel is not functional. 167
124 * Or if the xfrm is not mode tunnel. 168 if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
125 */
126 if (!rt->dst.xfrm ||
127 rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
128 dev->stats.tx_carrier_errors++; 169 dev->stats.tx_carrier_errors++;
129 ip_rt_put(rt); 170 dst_release(dst);
130 goto tx_error_icmp; 171 goto tx_error_icmp;
131 } 172 }
132 tdev = rt->dst.dev; 173
174 tdev = dst->dev;
133 175
134 if (tdev == dev) { 176 if (tdev == dev) {
135 ip_rt_put(rt); 177 dst_release(dst);
136 dev->stats.collisions++; 178 dev->stats.collisions++;
137 goto tx_error; 179 goto tx_error;
138 } 180 }
@@ -146,10 +188,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
146 tunnel->err_count = 0; 188 tunnel->err_count = 0;
147 } 189 }
148 190
149 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 191 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
150 skb_dst_drop(skb); 192 skb_dst_set(skb, dst);
151 skb_dst_set(skb, &rt->dst);
152 nf_reset(skb);
153 skb->dev = skb_dst(skb)->dev; 193 skb->dev = skb_dst(skb)->dev;
154 194
155 err = dst_output(skb); 195 err = dst_output(skb);
@@ -166,6 +206,98 @@ tx_error:
166 return NETDEV_TX_OK; 206 return NETDEV_TX_OK;
167} 207}
168 208
209/* This function assumes it is being called from dev_queue_xmit()
210 * and that skb is filled properly by that function.
211 */
212static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
213{
214 struct ip_tunnel *tunnel = netdev_priv(dev);
215 struct flowi fl;
216
217 memset(&fl, 0, sizeof(fl));
218
219 skb->mark = be32_to_cpu(tunnel->parms.o_key);
220
221 switch (skb->protocol) {
222 case htons(ETH_P_IP):
223 xfrm_decode_session(skb, &fl, AF_INET);
224 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
225 break;
226 case htons(ETH_P_IPV6):
227 xfrm_decode_session(skb, &fl, AF_INET6);
228 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
229 break;
230 default:
231 dev->stats.tx_errors++;
232 dev_kfree_skb(skb);
233 return NETDEV_TX_OK;
234 }
235
236 return vti_xmit(skb, dev, &fl);
237}
238
239static int vti4_err(struct sk_buff *skb, u32 info)
240{
241 __be32 spi;
242 __u32 mark;
243 struct xfrm_state *x;
244 struct ip_tunnel *tunnel;
245 struct ip_esp_hdr *esph;
246 struct ip_auth_hdr *ah ;
247 struct ip_comp_hdr *ipch;
248 struct net *net = dev_net(skb->dev);
249 const struct iphdr *iph = (const struct iphdr *)skb->data;
250 int protocol = iph->protocol;
251 struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
252
253 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
254 iph->daddr, iph->saddr, 0);
255 if (!tunnel)
256 return -1;
257
258 mark = be32_to_cpu(tunnel->parms.o_key);
259
260 switch (protocol) {
261 case IPPROTO_ESP:
262 esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
263 spi = esph->spi;
264 break;
265 case IPPROTO_AH:
266 ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
267 spi = ah->spi;
268 break;
269 case IPPROTO_COMP:
270 ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
271 spi = htonl(ntohs(ipch->cpi));
272 break;
273 default:
274 return 0;
275 }
276
277 switch (icmp_hdr(skb)->type) {
278 case ICMP_DEST_UNREACH:
279 if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
280 return 0;
281 case ICMP_REDIRECT:
282 break;
283 default:
284 return 0;
285 }
286
287 x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
288 spi, protocol, AF_INET);
289 if (!x)
290 return 0;
291
292 if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
293 ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
294 else
295 ipv4_redirect(skb, net, 0, 0, protocol, 0);
296 xfrm_state_put(x);
297
298 return 0;
299}
300
169static int 301static int
170vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 302vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
171{ 303{
@@ -181,12 +313,19 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
181 return -EINVAL; 313 return -EINVAL;
182 } 314 }
183 315
316 if (!(p.i_flags & GRE_KEY))
317 p.i_key = 0;
318 if (!(p.o_flags & GRE_KEY))
319 p.o_key = 0;
320
321 p.i_flags = VTI_ISVTI;
322
184 err = ip_tunnel_ioctl(dev, &p, cmd); 323 err = ip_tunnel_ioctl(dev, &p, cmd);
185 if (err) 324 if (err)
186 return err; 325 return err;
187 326
188 if (cmd != SIOCDELTUNNEL) { 327 if (cmd != SIOCDELTUNNEL) {
189 p.i_flags |= GRE_KEY | VTI_ISVTI; 328 p.i_flags |= GRE_KEY;
190 p.o_flags |= GRE_KEY; 329 p.o_flags |= GRE_KEY;
191 } 330 }
192 331
@@ -207,6 +346,7 @@ static const struct net_device_ops vti_netdev_ops = {
207static void vti_tunnel_setup(struct net_device *dev) 346static void vti_tunnel_setup(struct net_device *dev)
208{ 347{
209 dev->netdev_ops = &vti_netdev_ops; 348 dev->netdev_ops = &vti_netdev_ops;
349 dev->type = ARPHRD_TUNNEL;
210 ip_tunnel_setup(dev, vti_net_id); 350 ip_tunnel_setup(dev, vti_net_id);
211} 351}
212 352
@@ -218,13 +358,11 @@ static int vti_tunnel_init(struct net_device *dev)
218 memcpy(dev->dev_addr, &iph->saddr, 4); 358 memcpy(dev->dev_addr, &iph->saddr, 4);
219 memcpy(dev->broadcast, &iph->daddr, 4); 359 memcpy(dev->broadcast, &iph->daddr, 4);
220 360
221 dev->type = ARPHRD_TUNNEL;
222 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 361 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
223 dev->mtu = ETH_DATA_LEN; 362 dev->mtu = ETH_DATA_LEN;
224 dev->flags = IFF_NOARP; 363 dev->flags = IFF_NOARP;
225 dev->iflink = 0; 364 dev->iflink = 0;
226 dev->addr_len = 4; 365 dev->addr_len = 4;
227 dev->features |= NETIF_F_NETNS_LOCAL;
228 dev->features |= NETIF_F_LLTX; 366 dev->features |= NETIF_F_LLTX;
229 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 367 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
230 368
@@ -241,9 +379,28 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
241 iph->ihl = 5; 379 iph->ihl = 5;
242} 380}
243 381
244static struct xfrm_tunnel_notifier vti_handler __read_mostly = { 382static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
383 .handler = vti_rcv,
384 .input_handler = vti_input,
385 .cb_handler = vti_rcv_cb,
386 .err_handler = vti4_err,
387 .priority = 100,
388};
389
390static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
245 .handler = vti_rcv, 391 .handler = vti_rcv,
246 .priority = 1, 392 .input_handler = vti_input,
393 .cb_handler = vti_rcv_cb,
394 .err_handler = vti4_err,
395 .priority = 100,
396};
397
398static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
399 .handler = vti_rcv,
400 .input_handler = vti_input,
401 .cb_handler = vti_rcv_cb,
402 .err_handler = vti4_err,
403 .priority = 100,
247}; 404};
248 405
249static int __net_init vti_init_net(struct net *net) 406static int __net_init vti_init_net(struct net *net)
@@ -287,6 +444,8 @@ static void vti_netlink_parms(struct nlattr *data[],
287 if (!data) 444 if (!data)
288 return; 445 return;
289 446
447 parms->i_flags = VTI_ISVTI;
448
290 if (data[IFLA_VTI_LINK]) 449 if (data[IFLA_VTI_LINK])
291 parms->link = nla_get_u32(data[IFLA_VTI_LINK]); 450 parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
292 451
@@ -375,19 +534,28 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
375 534
376static int __init vti_init(void) 535static int __init vti_init(void)
377{ 536{
537 const char *msg;
378 int err; 538 int err;
379 539
380 pr_info("IPv4 over IPSec tunneling driver\n"); 540 pr_info("IPv4 over IPsec tunneling driver\n");
381 541
542 msg = "tunnel device";
382 err = register_pernet_device(&vti_net_ops); 543 err = register_pernet_device(&vti_net_ops);
383 if (err < 0) 544 if (err < 0)
384 return err; 545 goto pernet_dev_failed;
385 err = xfrm4_mode_tunnel_input_register(&vti_handler);
386 if (err < 0) {
387 unregister_pernet_device(&vti_net_ops);
388 pr_info("vti init: can't register tunnel\n");
389 }
390 546
547 msg = "tunnel protocols";
548 err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
549 if (err < 0)
550 goto xfrm_proto_esp_failed;
551 err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
552 if (err < 0)
553 goto xfrm_proto_ah_failed;
554 err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
555 if (err < 0)
556 goto xfrm_proto_comp_failed;
557
558 msg = "netlink interface";
391 err = rtnl_link_register(&vti_link_ops); 559 err = rtnl_link_register(&vti_link_ops);
392 if (err < 0) 560 if (err < 0)
393 goto rtnl_link_failed; 561 goto rtnl_link_failed;
@@ -395,17 +563,24 @@ static int __init vti_init(void)
395 return err; 563 return err;
396 564
397rtnl_link_failed: 565rtnl_link_failed:
398 xfrm4_mode_tunnel_input_deregister(&vti_handler); 566 xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
567xfrm_proto_comp_failed:
568 xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
569xfrm_proto_ah_failed:
570 xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
571xfrm_proto_esp_failed:
399 unregister_pernet_device(&vti_net_ops); 572 unregister_pernet_device(&vti_net_ops);
573pernet_dev_failed:
574 pr_err("vti init: failed to register %s\n", msg);
400 return err; 575 return err;
401} 576}
402 577
403static void __exit vti_fini(void) 578static void __exit vti_fini(void)
404{ 579{
405 rtnl_link_unregister(&vti_link_ops); 580 rtnl_link_unregister(&vti_link_ops);
406 if (xfrm4_mode_tunnel_input_deregister(&vti_handler)) 581 xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
407 pr_info("vti close: can't deregister tunnel\n"); 582 xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
408 583 xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
409 unregister_pernet_device(&vti_net_ops); 584 unregister_pernet_device(&vti_net_ops);
410} 585}
411 586