aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ipip.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ipip.c')
-rw-r--r--net/ipv4/ipip.c748
1 files changed, 59 insertions, 689 deletions
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8f024d41eefa..77bfcce64fe5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -111,227 +111,21 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/ip.h> 112#include <net/ip.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h> 114#include <net/ip_tunnels.h>
115#include <net/inet_ecn.h> 115#include <net/inet_ecn.h>
116#include <net/xfrm.h> 116#include <net/xfrm.h>
117#include <net/net_namespace.h> 117#include <net/net_namespace.h>
118#include <net/netns/generic.h> 118#include <net/netns/generic.h>
119 119
120#define HASH_SIZE 16
121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123static bool log_ecn_error = true; 120static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644); 121module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 123
127static int ipip_net_id __read_mostly; 124static int ipip_net_id __read_mostly;
128struct ipip_net {
129 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 struct ip_tunnel __rcu *tunnels_wc[1];
133 struct ip_tunnel __rcu **tunnels[4];
134
135 struct net_device *fb_tunnel_dev;
136};
137 125
138static int ipip_tunnel_init(struct net_device *dev); 126static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev);
141static struct rtnl_link_ops ipip_link_ops __read_mostly; 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
142 128
143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 struct rtnl_link_stats64 *tot)
145{
146 int i;
147
148 for_each_possible_cpu(i) {
149 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 unsigned int start;
152
153 do {
154 start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 rx_packets = tstats->rx_packets;
156 tx_packets = tstats->tx_packets;
157 rx_bytes = tstats->rx_bytes;
158 tx_bytes = tstats->tx_bytes;
159 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161 tot->rx_packets += rx_packets;
162 tot->tx_packets += tx_packets;
163 tot->rx_bytes += rx_bytes;
164 tot->tx_bytes += tx_bytes;
165 }
166
167 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 tot->tx_dropped = dev->stats.tx_dropped;
170 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 tot->tx_errors = dev->stats.tx_errors;
172 tot->collisions = dev->stats.collisions;
173
174 return tot;
175}
176
177static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178 __be32 remote, __be32 local)
179{
180 unsigned int h0 = HASH(remote);
181 unsigned int h1 = HASH(local);
182 struct ip_tunnel *t;
183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186 if (local == t->parms.iph.saddr &&
187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 return t;
189
190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 return t;
193
194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 return t;
197
198 t = rcu_dereference(ipn->tunnels_wc[0]);
199 if (t && (t->dev->flags&IFF_UP))
200 return t;
201 return NULL;
202}
203
204static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205 struct ip_tunnel_parm *parms)
206{
207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
209 unsigned int h = 0;
210 int prio = 0;
211
212 if (remote) {
213 prio |= 2;
214 h ^= HASH(remote);
215 }
216 if (local) {
217 prio |= 1;
218 h ^= HASH(local);
219 }
220 return &ipn->tunnels[prio][h];
221}
222
223static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224 struct ip_tunnel *t)
225{
226 return __ipip_bucket(ipn, &t->parms);
227}
228
229static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230{
231 struct ip_tunnel __rcu **tp;
232 struct ip_tunnel *iter;
233
234 for (tp = ipip_bucket(ipn, t);
235 (iter = rtnl_dereference(*tp)) != NULL;
236 tp = &iter->next) {
237 if (t == iter) {
238 rcu_assign_pointer(*tp, t->next);
239 break;
240 }
241 }
242}
243
244static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245{
246 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 rcu_assign_pointer(*tp, t);
250}
251
252static int ipip_tunnel_create(struct net_device *dev)
253{
254 struct ip_tunnel *t = netdev_priv(dev);
255 struct net *net = dev_net(dev);
256 struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 int err;
258
259 err = ipip_tunnel_init(dev);
260 if (err < 0)
261 goto out;
262
263 err = register_netdevice(dev);
264 if (err < 0)
265 goto out;
266
267 strcpy(t->parms.name, dev->name);
268 dev->rtnl_link_ops = &ipip_link_ops;
269
270 dev_hold(dev);
271 ipip_tunnel_link(ipn, t);
272 return 0;
273
274out:
275 return err;
276}
277
278static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279 struct ip_tunnel_parm *parms, int create)
280{
281 __be32 remote = parms->iph.daddr;
282 __be32 local = parms->iph.saddr;
283 struct ip_tunnel *t, *nt;
284 struct ip_tunnel __rcu **tp;
285 struct net_device *dev;
286 char name[IFNAMSIZ];
287 struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289 for (tp = __ipip_bucket(ipn, parms);
290 (t = rtnl_dereference(*tp)) != NULL;
291 tp = &t->next) {
292 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293 return t;
294 }
295 if (!create)
296 return NULL;
297
298 if (parms->name[0])
299 strlcpy(name, parms->name, IFNAMSIZ);
300 else
301 strcpy(name, "tunl%d");
302
303 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304 if (dev == NULL)
305 return NULL;
306
307 dev_net_set(dev, net);
308
309 nt = netdev_priv(dev);
310 nt->parms = *parms;
311
312 if (ipip_tunnel_create(dev) < 0)
313 goto failed_free;
314
315 return nt;
316
317failed_free:
318 ipip_dev_free(dev);
319 return NULL;
320}
321
322/* called with RTNL */
323static void ipip_tunnel_uninit(struct net_device *dev)
324{
325 struct net *net = dev_net(dev);
326 struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328 if (dev == ipn->fb_tunnel_dev)
329 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330 else
331 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332 dev_put(dev);
333}
334
335static int ipip_err(struct sk_buff *skb, u32 info) 129static int ipip_err(struct sk_buff *skb, u32 info)
336{ 130{
337 131
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
339 8 bytes of packet payload. It means, that precise relaying of 133 8 bytes of packet payload. It means, that precise relaying of
340 ICMP in the real Internet is absolutely infeasible. 134 ICMP in the real Internet is absolutely infeasible.
341 */ 135 */
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
342 const struct iphdr *iph = (const struct iphdr *)skb->data; 138 const struct iphdr *iph = (const struct iphdr *)skb->data;
343 const int type = icmp_hdr(skb)->type;
344 const int code = icmp_hdr(skb)->code;
345 struct ip_tunnel *t; 139 struct ip_tunnel *t;
346 int err; 140 int err;
347 141 const int type = icmp_hdr(skb)->type;
348 switch (type) { 142 const int code = icmp_hdr(skb)->code;
349 default:
350 case ICMP_PARAMETERPROB:
351 return 0;
352
353 case ICMP_DEST_UNREACH:
354 switch (code) {
355 case ICMP_SR_FAILED:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
358 return 0;
359 default:
360 /* All others are translated to HOST_UNREACH.
361 rfc2003 contains "deep thoughts" about NET_UNREACH,
362 I believe they are just ether pollution. --ANK
363 */
364 break;
365 }
366 break;
367 case ICMP_TIME_EXCEEDED:
368 if (code != ICMP_EXC_TTL)
369 return 0;
370 break;
371 case ICMP_REDIRECT:
372 break;
373 }
374 143
375 err = -ENOENT; 144 err = -ENOENT;
376 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 145 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146 iph->daddr, iph->saddr, 0);
377 if (t == NULL) 147 if (t == NULL)
378 goto out; 148 goto out;
379 149
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)
403 else 173 else
404 t->err_count = 1; 174 t->err_count = 1;
405 t->err_time = jiffies; 175 t->err_time = jiffies;
406out:
407 176
177out:
408 return err; 178 return err;
409} 179}
410 180
181static const struct tnl_ptk_info tpi = {
182 /* no tunnel info required for ipip. */
183 .proto = htons(ETH_P_IP),
184};
185
411static int ipip_rcv(struct sk_buff *skb) 186static int ipip_rcv(struct sk_buff *skb)
412{ 187{
188 struct net *net = dev_net(skb->dev);
189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
413 struct ip_tunnel *tunnel; 190 struct ip_tunnel *tunnel;
414 const struct iphdr *iph = ip_hdr(skb); 191 const struct iphdr *iph = ip_hdr(skb);
415 int err;
416
417 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418 if (tunnel != NULL) {
419 struct pcpu_tstats *tstats;
420 192
193 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
194 iph->saddr, iph->daddr, 0);
195 if (tunnel) {
421 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422 goto drop; 197 goto drop;
423 198 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
424 secpath_reset(skb);
425
426 skb->mac_header = skb->network_header;
427 skb_reset_network_header(skb);
428 skb->protocol = htons(ETH_P_IP);
429 skb->pkt_type = PACKET_HOST;
430
431 __skb_tunnel_rx(skb, tunnel->dev);
432
433 err = IP_ECN_decapsulate(iph, skb);
434 if (unlikely(err)) {
435 if (log_ecn_error)
436 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437 &iph->saddr, iph->tos);
438 if (err > 1) {
439 ++tunnel->dev->stats.rx_frame_errors;
440 ++tunnel->dev->stats.rx_errors;
441 goto drop;
442 }
443 }
444
445 tstats = this_cpu_ptr(tunnel->dev->tstats);
446 u64_stats_update_begin(&tstats->syncp);
447 tstats->rx_packets++;
448 tstats->rx_bytes += skb->len;
449 u64_stats_update_end(&tstats->syncp);
450
451 netif_rx(skb);
452 return 0;
453 } 199 }
454 200
455 return -1; 201 return -1;
@@ -463,329 +209,64 @@ drop:
463 * This function assumes it is being called from dev_queue_xmit() 209 * This function assumes it is being called from dev_queue_xmit()
464 * and that skb is filled properly by that function. 210 * and that skb is filled properly by that function.
465 */ 211 */
466
467static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 212static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468{ 213{
469 struct ip_tunnel *tunnel = netdev_priv(dev); 214 struct ip_tunnel *tunnel = netdev_priv(dev);
470 const struct iphdr *tiph = &tunnel->parms.iph; 215 const struct iphdr *tiph = &tunnel->parms.iph;
471 u8 tos = tunnel->parms.iph.tos;
472 __be16 df = tiph->frag_off;
473 struct rtable *rt; /* Route to the other host */
474 struct net_device *tdev; /* Device to other host */
475 const struct iphdr *old_iph;
476 struct iphdr *iph; /* Our new IP header */
477 unsigned int max_headroom; /* The extra header space needed */
478 __be32 dst = tiph->daddr;
479 struct flowi4 fl4;
480 int mtu;
481
482 if (skb->protocol != htons(ETH_P_IP))
483 goto tx_error;
484 216
485 if (skb->ip_summed == CHECKSUM_PARTIAL && 217 if (unlikely(skb->protocol != htons(ETH_P_IP)))
486 skb_checksum_help(skb))
487 goto tx_error; 218 goto tx_error;
488 219
489 old_iph = ip_hdr(skb); 220 if (likely(!skb->encapsulation)) {
490 221 skb_reset_inner_headers(skb);
491 if (tos & 1) 222 skb->encapsulation = 1;
492 tos = old_iph->tos;
493
494 if (!dst) {
495 /* NBMA tunnel */
496 if ((rt = skb_rtable(skb)) == NULL) {
497 dev->stats.tx_fifo_errors++;
498 goto tx_error;
499 }
500 dst = rt_nexthop(rt, old_iph->daddr);
501 } 223 }
502 224
503 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 225 ip_tunnel_xmit(skb, dev, tiph);
504 dst, tiph->saddr,
505 0, 0,
506 IPPROTO_IPIP, RT_TOS(tos),
507 tunnel->parms.link);
508 if (IS_ERR(rt)) {
509 dev->stats.tx_carrier_errors++;
510 goto tx_error_icmp;
511 }
512 tdev = rt->dst.dev;
513
514 if (tdev == dev) {
515 ip_rt_put(rt);
516 dev->stats.collisions++;
517 goto tx_error;
518 }
519
520 df |= old_iph->frag_off & htons(IP_DF);
521
522 if (df) {
523 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
524
525 if (mtu < 68) {
526 dev->stats.collisions++;
527 ip_rt_put(rt);
528 goto tx_error;
529 }
530
531 if (skb_dst(skb))
532 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
533
534 if ((old_iph->frag_off & htons(IP_DF)) &&
535 mtu < ntohs(old_iph->tot_len)) {
536 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
537 htonl(mtu));
538 ip_rt_put(rt);
539 goto tx_error;
540 }
541 }
542
543 if (tunnel->err_count > 0) {
544 if (time_before(jiffies,
545 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
546 tunnel->err_count--;
547 dst_link_failure(skb);
548 } else
549 tunnel->err_count = 0;
550 }
551
552 /*
553 * Okay, now see if we can stuff it in the buffer as-is.
554 */
555 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
556
557 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
558 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
559 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
560 if (!new_skb) {
561 ip_rt_put(rt);
562 dev->stats.tx_dropped++;
563 dev_kfree_skb(skb);
564 return NETDEV_TX_OK;
565 }
566 if (skb->sk)
567 skb_set_owner_w(new_skb, skb->sk);
568 dev_kfree_skb(skb);
569 skb = new_skb;
570 old_iph = ip_hdr(skb);
571 }
572
573 skb->transport_header = skb->network_header;
574 skb_push(skb, sizeof(struct iphdr));
575 skb_reset_network_header(skb);
576 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
578 IPSKB_REROUTED);
579 skb_dst_drop(skb);
580 skb_dst_set(skb, &rt->dst);
581
582 /*
583 * Push down and install the IPIP header.
584 */
585
586 iph = ip_hdr(skb);
587 iph->version = 4;
588 iph->ihl = sizeof(struct iphdr)>>2;
589 iph->frag_off = df;
590 iph->protocol = IPPROTO_IPIP;
591 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
592 iph->daddr = fl4.daddr;
593 iph->saddr = fl4.saddr;
594
595 if ((iph->ttl = tiph->ttl) == 0)
596 iph->ttl = old_iph->ttl;
597
598 iptunnel_xmit(skb, dev);
599 return NETDEV_TX_OK; 226 return NETDEV_TX_OK;
600 227
601tx_error_icmp:
602 dst_link_failure(skb);
603tx_error: 228tx_error:
604 dev->stats.tx_errors++; 229 dev->stats.tx_errors++;
605 dev_kfree_skb(skb); 230 dev_kfree_skb(skb);
606 return NETDEV_TX_OK; 231 return NETDEV_TX_OK;
607} 232}
608 233
609static void ipip_tunnel_bind_dev(struct net_device *dev)
610{
611 struct net_device *tdev = NULL;
612 struct ip_tunnel *tunnel;
613 const struct iphdr *iph;
614
615 tunnel = netdev_priv(dev);
616 iph = &tunnel->parms.iph;
617
618 if (iph->daddr) {
619 struct rtable *rt;
620 struct flowi4 fl4;
621
622 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
623 iph->daddr, iph->saddr,
624 0, 0,
625 IPPROTO_IPIP,
626 RT_TOS(iph->tos),
627 tunnel->parms.link);
628 if (!IS_ERR(rt)) {
629 tdev = rt->dst.dev;
630 ip_rt_put(rt);
631 }
632 dev->flags |= IFF_POINTOPOINT;
633 }
634
635 if (!tdev && tunnel->parms.link)
636 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
637
638 if (tdev) {
639 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
640 dev->mtu = tdev->mtu - sizeof(struct iphdr);
641 }
642 dev->iflink = tunnel->parms.link;
643}
644
645static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
646{
647 struct net *net = dev_net(t->dev);
648 struct ipip_net *ipn = net_generic(net, ipip_net_id);
649
650 ipip_tunnel_unlink(ipn, t);
651 synchronize_net();
652 t->parms.iph.saddr = p->iph.saddr;
653 t->parms.iph.daddr = p->iph.daddr;
654 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
655 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
656 ipip_tunnel_link(ipn, t);
657 t->parms.iph.ttl = p->iph.ttl;
658 t->parms.iph.tos = p->iph.tos;
659 t->parms.iph.frag_off = p->iph.frag_off;
660 if (t->parms.link != p->link) {
661 t->parms.link = p->link;
662 ipip_tunnel_bind_dev(t->dev);
663 }
664 netdev_state_change(t->dev);
665}
666
667static int 234static int
668ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 235ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
669{ 236{
670 int err = 0; 237 int err = 0;
671 struct ip_tunnel_parm p; 238 struct ip_tunnel_parm p;
672 struct ip_tunnel *t;
673 struct net *net = dev_net(dev);
674 struct ipip_net *ipn = net_generic(net, ipip_net_id);
675
676 switch (cmd) {
677 case SIOCGETTUNNEL:
678 t = NULL;
679 if (dev == ipn->fb_tunnel_dev) {
680 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
681 err = -EFAULT;
682 break;
683 }
684 t = ipip_tunnel_locate(net, &p, 0);
685 }
686 if (t == NULL)
687 t = netdev_priv(dev);
688 memcpy(&p, &t->parms, sizeof(p));
689 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
690 err = -EFAULT;
691 break;
692
693 case SIOCADDTUNNEL:
694 case SIOCCHGTUNNEL:
695 err = -EPERM;
696 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
697 goto done;
698
699 err = -EFAULT;
700 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
701 goto done;
702
703 err = -EINVAL;
704 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
705 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
706 goto done;
707 if (p.iph.ttl)
708 p.iph.frag_off |= htons(IP_DF);
709
710 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
711
712 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
713 if (t != NULL) {
714 if (t->dev != dev) {
715 err = -EEXIST;
716 break;
717 }
718 } else {
719 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
720 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
721 err = -EINVAL;
722 break;
723 }
724 t = netdev_priv(dev);
725 }
726
727 ipip_tunnel_update(t, &p);
728 }
729
730 if (t) {
731 err = 0;
732 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733 err = -EFAULT;
734 } else
735 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736 break;
737
738 case SIOCDELTUNNEL:
739 err = -EPERM;
740 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
741 goto done;
742
743 if (dev == ipn->fb_tunnel_dev) {
744 err = -EFAULT;
745 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746 goto done;
747 err = -ENOENT;
748 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
749 goto done;
750 err = -EPERM;
751 if (t->dev == ipn->fb_tunnel_dev)
752 goto done;
753 dev = t->dev;
754 }
755 unregister_netdevice(dev);
756 err = 0;
757 break;
758 239
759 default: 240 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
760 err = -EINVAL; 241 return -EFAULT;
761 }
762
763done:
764 return err;
765}
766 242
767static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 243 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
768{ 244 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
769 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 245 return -EINVAL;
246 if (p.i_key || p.o_key || p.i_flags || p.o_flags)
770 return -EINVAL; 247 return -EINVAL;
771 dev->mtu = new_mtu; 248 if (p.iph.ttl)
249 p.iph.frag_off |= htons(IP_DF);
250
251 err = ip_tunnel_ioctl(dev, &p, cmd);
252 if (err)
253 return err;
254
255 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
256 return -EFAULT;
257
772 return 0; 258 return 0;
773} 259}
774 260
775static const struct net_device_ops ipip_netdev_ops = { 261static const struct net_device_ops ipip_netdev_ops = {
776 .ndo_uninit = ipip_tunnel_uninit, 262 .ndo_init = ipip_tunnel_init,
263 .ndo_uninit = ip_tunnel_uninit,
777 .ndo_start_xmit = ipip_tunnel_xmit, 264 .ndo_start_xmit = ipip_tunnel_xmit,
778 .ndo_do_ioctl = ipip_tunnel_ioctl, 265 .ndo_do_ioctl = ipip_tunnel_ioctl,
779 .ndo_change_mtu = ipip_tunnel_change_mtu, 266 .ndo_change_mtu = ip_tunnel_change_mtu,
780 .ndo_get_stats64 = ipip_get_stats64, 267 .ndo_get_stats64 = ip_tunnel_get_stats64,
781}; 268};
782 269
783static void ipip_dev_free(struct net_device *dev)
784{
785 free_percpu(dev->tstats);
786 free_netdev(dev);
787}
788
789#define IPIP_FEATURES (NETIF_F_SG | \ 270#define IPIP_FEATURES (NETIF_F_SG | \
790 NETIF_F_FRAGLIST | \ 271 NETIF_F_FRAGLIST | \
791 NETIF_F_HIGHDMA | \ 272 NETIF_F_HIGHDMA | \
@@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)
794static void ipip_tunnel_setup(struct net_device *dev) 275static void ipip_tunnel_setup(struct net_device *dev)
795{ 276{
796 dev->netdev_ops = &ipip_netdev_ops; 277 dev->netdev_ops = &ipip_netdev_ops;
797 dev->destructor = ipip_dev_free;
798 278
799 dev->type = ARPHRD_TUNNEL; 279 dev->type = ARPHRD_TUNNEL;
800 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
801 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
802 dev->flags = IFF_NOARP; 280 dev->flags = IFF_NOARP;
803 dev->iflink = 0; 281 dev->iflink = 0;
804 dev->addr_len = 4; 282 dev->addr_len = 4;
@@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)
808 286
809 dev->features |= IPIP_FEATURES; 287 dev->features |= IPIP_FEATURES;
810 dev->hw_features |= IPIP_FEATURES; 288 dev->hw_features |= IPIP_FEATURES;
289 ip_tunnel_setup(dev, ipip_net_id);
811} 290}
812 291
813static int ipip_tunnel_init(struct net_device *dev) 292static int ipip_tunnel_init(struct net_device *dev)
814{ 293{
815 struct ip_tunnel *tunnel = netdev_priv(dev); 294 struct ip_tunnel *tunnel = netdev_priv(dev);
816 295
817 tunnel->dev = dev;
818
819 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 296 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
820 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 297 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
821 298
822 ipip_tunnel_bind_dev(dev); 299 tunnel->hlen = 0;
823 300 tunnel->parms.iph.protocol = IPPROTO_IPIP;
824 dev->tstats = alloc_percpu(struct pcpu_tstats); 301 return ip_tunnel_init(dev);
825 if (!dev->tstats)
826 return -ENOMEM;
827
828 return 0;
829}
830
831static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
832{
833 struct ip_tunnel *tunnel = netdev_priv(dev);
834 struct iphdr *iph = &tunnel->parms.iph;
835 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
836
837 tunnel->dev = dev;
838 strcpy(tunnel->parms.name, dev->name);
839
840 iph->version = 4;
841 iph->protocol = IPPROTO_IPIP;
842 iph->ihl = 5;
843
844 dev->tstats = alloc_percpu(struct pcpu_tstats);
845 if (!dev->tstats)
846 return -ENOMEM;
847
848 dev_hold(dev);
849 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
850 return 0;
851} 302}
852 303
853static void ipip_netlink_parms(struct nlattr *data[], 304static void ipip_netlink_parms(struct nlattr *data[],
@@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],
887static int ipip_newlink(struct net *src_net, struct net_device *dev, 338static int ipip_newlink(struct net *src_net, struct net_device *dev,
888 struct nlattr *tb[], struct nlattr *data[]) 339 struct nlattr *tb[], struct nlattr *data[])
889{ 340{
890 struct net *net = dev_net(dev); 341 struct ip_tunnel_parm p;
891 struct ip_tunnel *nt;
892
893 nt = netdev_priv(dev);
894 ipip_netlink_parms(data, &nt->parms);
895
896 if (ipip_tunnel_locate(net, &nt->parms, 0))
897 return -EEXIST;
898 342
899 return ipip_tunnel_create(dev); 343 ipip_netlink_parms(data, &p);
344 return ip_tunnel_newlink(dev, tb, &p);
900} 345}
901 346
902static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], 347static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
903 struct nlattr *data[]) 348 struct nlattr *data[])
904{ 349{
905 struct ip_tunnel *t;
906 struct ip_tunnel_parm p; 350 struct ip_tunnel_parm p;
907 struct net *net = dev_net(dev);
908 struct ipip_net *ipn = net_generic(net, ipip_net_id);
909
910 if (dev == ipn->fb_tunnel_dev)
911 return -EINVAL;
912 351
913 ipip_netlink_parms(data, &p); 352 ipip_netlink_parms(data, &p);
914 353
@@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
916 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) 355 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
917 return -EINVAL; 356 return -EINVAL;
918 357
919 t = ipip_tunnel_locate(net, &p, 0); 358 return ip_tunnel_changelink(dev, tb, &p);
920
921 if (t) {
922 if (t->dev != dev)
923 return -EEXIST;
924 } else
925 t = netdev_priv(dev);
926
927 ipip_tunnel_update(t, &p);
928 return 0;
929} 359}
930 360
931static size_t ipip_get_size(const struct net_device *dev) 361static size_t ipip_get_size(const struct net_device *dev)
@@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
982 .setup = ipip_tunnel_setup, 412 .setup = ipip_tunnel_setup,
983 .newlink = ipip_newlink, 413 .newlink = ipip_newlink,
984 .changelink = ipip_changelink, 414 .changelink = ipip_changelink,
415 .dellink = ip_tunnel_dellink,
985 .get_size = ipip_get_size, 416 .get_size = ipip_get_size,
986 .fill_info = ipip_fill_info, 417 .fill_info = ipip_fill_info,
987}; 418};
@@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
992 .priority = 1, 423 .priority = 1,
993}; 424};
994 425
995static const char banner[] __initconst =
996 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
997
998static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
999{
1000 int prio;
1001
1002 for (prio = 1; prio < 4; prio++) {
1003 int h;
1004 for (h = 0; h < HASH_SIZE; h++) {
1005 struct ip_tunnel *t;
1006
1007 t = rtnl_dereference(ipn->tunnels[prio][h]);
1008 while (t != NULL) {
1009 unregister_netdevice_queue(t->dev, head);
1010 t = rtnl_dereference(t->next);
1011 }
1012 }
1013 }
1014}
1015
1016static int __net_init ipip_init_net(struct net *net) 426static int __net_init ipip_init_net(struct net *net)
1017{ 427{
1018 struct ipip_net *ipn = net_generic(net, ipip_net_id); 428 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
1019 struct ip_tunnel *t;
1020 int err;
1021
1022 ipn->tunnels[0] = ipn->tunnels_wc;
1023 ipn->tunnels[1] = ipn->tunnels_l;
1024 ipn->tunnels[2] = ipn->tunnels_r;
1025 ipn->tunnels[3] = ipn->tunnels_r_l;
1026
1027 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1028 "tunl0",
1029 ipip_tunnel_setup);
1030 if (!ipn->fb_tunnel_dev) {
1031 err = -ENOMEM;
1032 goto err_alloc_dev;
1033 }
1034 dev_net_set(ipn->fb_tunnel_dev, net);
1035
1036 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1037 if (err)
1038 goto err_reg_dev;
1039
1040 if ((err = register_netdev(ipn->fb_tunnel_dev)))
1041 goto err_reg_dev;
1042
1043 t = netdev_priv(ipn->fb_tunnel_dev);
1044
1045 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1046 return 0;
1047
1048err_reg_dev:
1049 ipip_dev_free(ipn->fb_tunnel_dev);
1050err_alloc_dev:
1051 /* nothing */
1052 return err;
1053} 429}
1054 430
1055static void __net_exit ipip_exit_net(struct net *net) 431static void __net_exit ipip_exit_net(struct net *net)
1056{ 432{
1057 struct ipip_net *ipn = net_generic(net, ipip_net_id); 433 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
1058 LIST_HEAD(list); 434 ip_tunnel_delete_net(itn);
1059
1060 rtnl_lock();
1061 ipip_destroy_tunnels(ipn, &list);
1062 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1063 unregister_netdevice_many(&list);
1064 rtnl_unlock();
1065} 435}
1066 436
1067static struct pernet_operations ipip_net_ops = { 437static struct pernet_operations ipip_net_ops = {
1068 .init = ipip_init_net, 438 .init = ipip_init_net,
1069 .exit = ipip_exit_net, 439 .exit = ipip_exit_net,
1070 .id = &ipip_net_id, 440 .id = &ipip_net_id,
1071 .size = sizeof(struct ipip_net), 441 .size = sizeof(struct ip_tunnel_net),
1072}; 442};
1073 443
1074static int __init ipip_init(void) 444static int __init ipip_init(void)
1075{ 445{
1076 int err; 446 int err;
1077 447
1078 printk(banner); 448 pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
1079 449
1080 err = register_pernet_device(&ipip_net_ops); 450 err = register_pernet_device(&ipip_net_ops);
1081 if (err < 0) 451 if (err < 0)