aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ipip.c
diff options
context:
space:
mode:
authorPravin B Shelar <pshelar@nicira.com>2013-03-25 10:49:41 -0400
committerDavid S. Miller <davem@davemloft.net>2013-03-26 12:27:18 -0400
commitfd58156e456d9f68fe04486be378d0bc93641532 (patch)
tree50dd9ea7c5b048dad0f9b39e95f3c0299448d86c /net/ipv4/ipip.c
parentc54419321455631079c7d6e60bc732dd0c5914c5 (diff)
IPIP: Use ip-tunneling code.
Reuse common ip-tunneling code which is re-factored from GRE module. Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ipip.c')
-rw-r--r--net/ipv4/ipip.c746
1 files changed, 56 insertions, 690 deletions
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index a557d6ab127a..77bfcce64fe5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -117,221 +117,15 @@
117#include <net/net_namespace.h> 117#include <net/net_namespace.h>
118#include <net/netns/generic.h> 118#include <net/netns/generic.h>
119 119
120#define HASH_SIZE 16
121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123static bool log_ecn_error = true; 120static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644); 121module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 123
127static int ipip_net_id __read_mostly; 124static int ipip_net_id __read_mostly;
128struct ipip_net {
129 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 struct ip_tunnel __rcu *tunnels_wc[1];
133 struct ip_tunnel __rcu **tunnels[4];
134
135 struct net_device *fb_tunnel_dev;
136};
137 125
138static int ipip_tunnel_init(struct net_device *dev); 126static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev);
141static struct rtnl_link_ops ipip_link_ops __read_mostly; 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
142 128
143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 struct rtnl_link_stats64 *tot)
145{
146 int i;
147
148 for_each_possible_cpu(i) {
149 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 unsigned int start;
152
153 do {
154 start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 rx_packets = tstats->rx_packets;
156 tx_packets = tstats->tx_packets;
157 rx_bytes = tstats->rx_bytes;
158 tx_bytes = tstats->tx_bytes;
159 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161 tot->rx_packets += rx_packets;
162 tot->tx_packets += tx_packets;
163 tot->rx_bytes += rx_bytes;
164 tot->tx_bytes += tx_bytes;
165 }
166
167 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 tot->tx_dropped = dev->stats.tx_dropped;
170 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 tot->tx_errors = dev->stats.tx_errors;
172 tot->collisions = dev->stats.collisions;
173
174 return tot;
175}
176
177static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178 __be32 remote, __be32 local)
179{
180 unsigned int h0 = HASH(remote);
181 unsigned int h1 = HASH(local);
182 struct ip_tunnel *t;
183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186 if (local == t->parms.iph.saddr &&
187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 return t;
189
190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 return t;
193
194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 return t;
197
198 t = rcu_dereference(ipn->tunnels_wc[0]);
199 if (t && (t->dev->flags&IFF_UP))
200 return t;
201 return NULL;
202}
203
204static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205 struct ip_tunnel_parm *parms)
206{
207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
209 unsigned int h = 0;
210 int prio = 0;
211
212 if (remote) {
213 prio |= 2;
214 h ^= HASH(remote);
215 }
216 if (local) {
217 prio |= 1;
218 h ^= HASH(local);
219 }
220 return &ipn->tunnels[prio][h];
221}
222
223static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224 struct ip_tunnel *t)
225{
226 return __ipip_bucket(ipn, &t->parms);
227}
228
229static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230{
231 struct ip_tunnel __rcu **tp;
232 struct ip_tunnel *iter;
233
234 for (tp = ipip_bucket(ipn, t);
235 (iter = rtnl_dereference(*tp)) != NULL;
236 tp = &iter->next) {
237 if (t == iter) {
238 rcu_assign_pointer(*tp, t->next);
239 break;
240 }
241 }
242}
243
244static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245{
246 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 rcu_assign_pointer(*tp, t);
250}
251
252static int ipip_tunnel_create(struct net_device *dev)
253{
254 struct ip_tunnel *t = netdev_priv(dev);
255 struct net *net = dev_net(dev);
256 struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 int err;
258
259 err = ipip_tunnel_init(dev);
260 if (err < 0)
261 goto out;
262
263 err = register_netdevice(dev);
264 if (err < 0)
265 goto out;
266
267 strcpy(t->parms.name, dev->name);
268 dev->rtnl_link_ops = &ipip_link_ops;
269
270 dev_hold(dev);
271 ipip_tunnel_link(ipn, t);
272 return 0;
273
274out:
275 return err;
276}
277
278static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279 struct ip_tunnel_parm *parms, int create)
280{
281 __be32 remote = parms->iph.daddr;
282 __be32 local = parms->iph.saddr;
283 struct ip_tunnel *t, *nt;
284 struct ip_tunnel __rcu **tp;
285 struct net_device *dev;
286 char name[IFNAMSIZ];
287 struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289 for (tp = __ipip_bucket(ipn, parms);
290 (t = rtnl_dereference(*tp)) != NULL;
291 tp = &t->next) {
292 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293 return t;
294 }
295 if (!create)
296 return NULL;
297
298 if (parms->name[0])
299 strlcpy(name, parms->name, IFNAMSIZ);
300 else
301 strcpy(name, "tunl%d");
302
303 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304 if (dev == NULL)
305 return NULL;
306
307 dev_net_set(dev, net);
308
309 nt = netdev_priv(dev);
310 nt->parms = *parms;
311
312 if (ipip_tunnel_create(dev) < 0)
313 goto failed_free;
314
315 return nt;
316
317failed_free:
318 ipip_dev_free(dev);
319 return NULL;
320}
321
322/* called with RTNL */
323static void ipip_tunnel_uninit(struct net_device *dev)
324{
325 struct net *net = dev_net(dev);
326 struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328 if (dev == ipn->fb_tunnel_dev)
329 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330 else
331 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332 dev_put(dev);
333}
334
335static int ipip_err(struct sk_buff *skb, u32 info) 129static int ipip_err(struct sk_buff *skb, u32 info)
336{ 130{
337 131
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
339 8 bytes of packet payload. It means, that precise relaying of 133 8 bytes of packet payload. It means, that precise relaying of
340 ICMP in the real Internet is absolutely infeasible. 134 ICMP in the real Internet is absolutely infeasible.
341 */ 135 */
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
342 const struct iphdr *iph = (const struct iphdr *)skb->data; 138 const struct iphdr *iph = (const struct iphdr *)skb->data;
343 const int type = icmp_hdr(skb)->type;
344 const int code = icmp_hdr(skb)->code;
345 struct ip_tunnel *t; 139 struct ip_tunnel *t;
346 int err; 140 int err;
347 141 const int type = icmp_hdr(skb)->type;
348 switch (type) { 142 const int code = icmp_hdr(skb)->code;
349 default:
350 case ICMP_PARAMETERPROB:
351 return 0;
352
353 case ICMP_DEST_UNREACH:
354 switch (code) {
355 case ICMP_SR_FAILED:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
358 return 0;
359 default:
360 /* All others are translated to HOST_UNREACH.
361 rfc2003 contains "deep thoughts" about NET_UNREACH,
362 I believe they are just ether pollution. --ANK
363 */
364 break;
365 }
366 break;
367 case ICMP_TIME_EXCEEDED:
368 if (code != ICMP_EXC_TTL)
369 return 0;
370 break;
371 case ICMP_REDIRECT:
372 break;
373 }
374 143
375 err = -ENOENT; 144 err = -ENOENT;
376 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 145 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146 iph->daddr, iph->saddr, 0);
377 if (t == NULL) 147 if (t == NULL)
378 goto out; 148 goto out;
379 149
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)
403 else 173 else
404 t->err_count = 1; 174 t->err_count = 1;
405 t->err_time = jiffies; 175 t->err_time = jiffies;
406out:
407 176
177out:
408 return err; 178 return err;
409} 179}
410 180
181static const struct tnl_ptk_info tpi = {
182 /* no tunnel info required for ipip. */
183 .proto = htons(ETH_P_IP),
184};
185
411static int ipip_rcv(struct sk_buff *skb) 186static int ipip_rcv(struct sk_buff *skb)
412{ 187{
188 struct net *net = dev_net(skb->dev);
189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
413 struct ip_tunnel *tunnel; 190 struct ip_tunnel *tunnel;
414 const struct iphdr *iph = ip_hdr(skb); 191 const struct iphdr *iph = ip_hdr(skb);
415 int err;
416
417 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418 if (tunnel != NULL) {
419 struct pcpu_tstats *tstats;
420 192
193 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
194 iph->saddr, iph->daddr, 0);
195 if (tunnel) {
421 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422 goto drop; 197 goto drop;
423 198 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
424 secpath_reset(skb);
425
426 skb->mac_header = skb->network_header;
427 skb_reset_network_header(skb);
428 skb->protocol = htons(ETH_P_IP);
429 skb->pkt_type = PACKET_HOST;
430
431 __skb_tunnel_rx(skb, tunnel->dev);
432
433 err = IP_ECN_decapsulate(iph, skb);
434 if (unlikely(err)) {
435 if (log_ecn_error)
436 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437 &iph->saddr, iph->tos);
438 if (err > 1) {
439 ++tunnel->dev->stats.rx_frame_errors;
440 ++tunnel->dev->stats.rx_errors;
441 goto drop;
442 }
443 }
444
445 tstats = this_cpu_ptr(tunnel->dev->tstats);
446 u64_stats_update_begin(&tstats->syncp);
447 tstats->rx_packets++;
448 tstats->rx_bytes += skb->len;
449 u64_stats_update_end(&tstats->syncp);
450
451 netif_rx(skb);
452 return 0;
453 } 199 }
454 200
455 return -1; 201 return -1;
@@ -463,333 +209,64 @@ drop:
463 * This function assumes it is being called from dev_queue_xmit() 209 * This function assumes it is being called from dev_queue_xmit()
464 * and that skb is filled properly by that function. 210 * and that skb is filled properly by that function.
465 */ 211 */
466
467static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 212static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468{ 213{
469 struct ip_tunnel *tunnel = netdev_priv(dev); 214 struct ip_tunnel *tunnel = netdev_priv(dev);
470 const struct iphdr *tiph = &tunnel->parms.iph; 215 const struct iphdr *tiph = &tunnel->parms.iph;
471 u8 tos = tunnel->parms.iph.tos;
472 __be16 df = tiph->frag_off;
473 struct rtable *rt; /* Route to the other host */
474 struct net_device *tdev; /* Device to other host */
475 const struct iphdr *old_iph;
476 struct iphdr *iph; /* Our new IP header */
477 unsigned int max_headroom; /* The extra header space needed */
478 __be32 dst = tiph->daddr;
479 struct flowi4 fl4;
480 int mtu;
481
482 if (skb->protocol != htons(ETH_P_IP))
483 goto tx_error;
484 old_iph = ip_hdr(skb);
485
486 if (tos & 1)
487 tos = old_iph->tos;
488
489 if (!dst) {
490 /* NBMA tunnel */
491 if ((rt = skb_rtable(skb)) == NULL) {
492 dev->stats.tx_fifo_errors++;
493 goto tx_error;
494 }
495 dst = rt_nexthop(rt, old_iph->daddr);
496 }
497 216
498 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 217 if (unlikely(skb->protocol != htons(ETH_P_IP)))
499 dst, tiph->saddr,
500 0, 0,
501 IPPROTO_IPIP, RT_TOS(tos),
502 tunnel->parms.link);
503 if (IS_ERR(rt)) {
504 dev->stats.tx_carrier_errors++;
505 goto tx_error_icmp;
506 }
507 tdev = rt->dst.dev;
508
509 if (tdev == dev) {
510 ip_rt_put(rt);
511 dev->stats.collisions++;
512 goto tx_error; 218 goto tx_error;
513 }
514
515 df |= old_iph->frag_off & htons(IP_DF);
516
517 if (df) {
518 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
519
520 if (mtu < 68) {
521 dev->stats.collisions++;
522 ip_rt_put(rt);
523 goto tx_error;
524 }
525
526 if (skb_dst(skb))
527 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
528
529 if ((old_iph->frag_off & htons(IP_DF)) &&
530 mtu < ntohs(old_iph->tot_len)) {
531 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
532 htonl(mtu));
533 ip_rt_put(rt);
534 goto tx_error;
535 }
536 }
537
538 if (tunnel->err_count > 0) {
539 if (time_before(jiffies,
540 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
541 tunnel->err_count--;
542 dst_link_failure(skb);
543 } else
544 tunnel->err_count = 0;
545 }
546 219
547 /* 220 if (likely(!skb->encapsulation)) {
548 * Okay, now see if we can stuff it in the buffer as-is.
549 */
550 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
551
552 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
553 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
554 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
555 if (!new_skb) {
556 ip_rt_put(rt);
557 dev->stats.tx_dropped++;
558 dev_kfree_skb(skb);
559 return NETDEV_TX_OK;
560 }
561 if (skb->sk)
562 skb_set_owner_w(new_skb, skb->sk);
563 dev_kfree_skb(skb);
564 skb = new_skb;
565 old_iph = ip_hdr(skb);
566 }
567
568 if (!skb->encapsulation) {
569 skb_reset_inner_headers(skb); 221 skb_reset_inner_headers(skb);
570 skb->encapsulation = 1; 222 skb->encapsulation = 1;
571 } 223 }
572 if (skb->ip_summed != CHECKSUM_PARTIAL)
573 skb->ip_summed = CHECKSUM_NONE;
574
575 skb->transport_header = skb->network_header;
576 skb_push(skb, sizeof(struct iphdr));
577 skb_reset_network_header(skb);
578 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
579 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
580 IPSKB_REROUTED);
581 skb_dst_drop(skb);
582 skb_dst_set(skb, &rt->dst);
583
584 /*
585 * Push down and install the IPIP header.
586 */
587
588 iph = ip_hdr(skb);
589 iph->version = 4;
590 iph->ihl = sizeof(struct iphdr)>>2;
591 iph->frag_off = df;
592 iph->protocol = IPPROTO_IPIP;
593 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
594 iph->daddr = fl4.daddr;
595 iph->saddr = fl4.saddr;
596 tunnel_ip_select_ident(skb, old_iph, &rt->dst);
597
598 if ((iph->ttl = tiph->ttl) == 0)
599 iph->ttl = old_iph->ttl;
600
601 iptunnel_xmit(skb, dev);
602 224
225 ip_tunnel_xmit(skb, dev, tiph);
603 return NETDEV_TX_OK; 226 return NETDEV_TX_OK;
604 227
605tx_error_icmp:
606 dst_link_failure(skb);
607tx_error: 228tx_error:
608 dev->stats.tx_errors++; 229 dev->stats.tx_errors++;
609 dev_kfree_skb(skb); 230 dev_kfree_skb(skb);
610 return NETDEV_TX_OK; 231 return NETDEV_TX_OK;
611} 232}
612 233
613static void ipip_tunnel_bind_dev(struct net_device *dev)
614{
615 struct net_device *tdev = NULL;
616 struct ip_tunnel *tunnel;
617 const struct iphdr *iph;
618
619 tunnel = netdev_priv(dev);
620 iph = &tunnel->parms.iph;
621
622 if (iph->daddr) {
623 struct rtable *rt;
624 struct flowi4 fl4;
625
626 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
627 iph->daddr, iph->saddr,
628 0, 0,
629 IPPROTO_IPIP,
630 RT_TOS(iph->tos),
631 tunnel->parms.link);
632 if (!IS_ERR(rt)) {
633 tdev = rt->dst.dev;
634 ip_rt_put(rt);
635 }
636 dev->flags |= IFF_POINTOPOINT;
637 }
638
639 if (!tdev && tunnel->parms.link)
640 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
641
642 if (tdev) {
643 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
644 dev->mtu = tdev->mtu - sizeof(struct iphdr);
645 }
646 dev->iflink = tunnel->parms.link;
647}
648
649static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
650{
651 struct net *net = dev_net(t->dev);
652 struct ipip_net *ipn = net_generic(net, ipip_net_id);
653
654 ipip_tunnel_unlink(ipn, t);
655 synchronize_net();
656 t->parms.iph.saddr = p->iph.saddr;
657 t->parms.iph.daddr = p->iph.daddr;
658 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
659 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
660 ipip_tunnel_link(ipn, t);
661 t->parms.iph.ttl = p->iph.ttl;
662 t->parms.iph.tos = p->iph.tos;
663 t->parms.iph.frag_off = p->iph.frag_off;
664 if (t->parms.link != p->link) {
665 t->parms.link = p->link;
666 ipip_tunnel_bind_dev(t->dev);
667 }
668 netdev_state_change(t->dev);
669}
670
671static int 234static int
672ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 235ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
673{ 236{
674 int err = 0; 237 int err = 0;
675 struct ip_tunnel_parm p; 238 struct ip_tunnel_parm p;
676 struct ip_tunnel *t;
677 struct net *net = dev_net(dev);
678 struct ipip_net *ipn = net_generic(net, ipip_net_id);
679
680 switch (cmd) {
681 case SIOCGETTUNNEL:
682 t = NULL;
683 if (dev == ipn->fb_tunnel_dev) {
684 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
685 err = -EFAULT;
686 break;
687 }
688 t = ipip_tunnel_locate(net, &p, 0);
689 }
690 if (t == NULL)
691 t = netdev_priv(dev);
692 memcpy(&p, &t->parms, sizeof(p));
693 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
694 err = -EFAULT;
695 break;
696
697 case SIOCADDTUNNEL:
698 case SIOCCHGTUNNEL:
699 err = -EPERM;
700 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
701 goto done;
702
703 err = -EFAULT;
704 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
705 goto done;
706
707 err = -EINVAL;
708 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
709 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
710 goto done;
711 if (p.iph.ttl)
712 p.iph.frag_off |= htons(IP_DF);
713
714 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
715
716 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
717 if (t != NULL) {
718 if (t->dev != dev) {
719 err = -EEXIST;
720 break;
721 }
722 } else {
723 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
724 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
725 err = -EINVAL;
726 break;
727 }
728 t = netdev_priv(dev);
729 }
730
731 ipip_tunnel_update(t, &p);
732 }
733
734 if (t) {
735 err = 0;
736 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
737 err = -EFAULT;
738 } else
739 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
740 break;
741
742 case SIOCDELTUNNEL:
743 err = -EPERM;
744 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
745 goto done;
746
747 if (dev == ipn->fb_tunnel_dev) {
748 err = -EFAULT;
749 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
750 goto done;
751 err = -ENOENT;
752 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
753 goto done;
754 err = -EPERM;
755 if (t->dev == ipn->fb_tunnel_dev)
756 goto done;
757 dev = t->dev;
758 }
759 unregister_netdevice(dev);
760 err = 0;
761 break;
762
763 default:
764 err = -EINVAL;
765 }
766 239
767done: 240 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
768 return err; 241 return -EFAULT;
769}
770 242
771static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 243 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
772{ 244 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
773 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 245 return -EINVAL;
246 if (p.i_key || p.o_key || p.i_flags || p.o_flags)
774 return -EINVAL; 247 return -EINVAL;
775 dev->mtu = new_mtu; 248 if (p.iph.ttl)
249 p.iph.frag_off |= htons(IP_DF);
250
251 err = ip_tunnel_ioctl(dev, &p, cmd);
252 if (err)
253 return err;
254
255 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
256 return -EFAULT;
257
776 return 0; 258 return 0;
777} 259}
778 260
779static const struct net_device_ops ipip_netdev_ops = { 261static const struct net_device_ops ipip_netdev_ops = {
780 .ndo_uninit = ipip_tunnel_uninit, 262 .ndo_init = ipip_tunnel_init,
263 .ndo_uninit = ip_tunnel_uninit,
781 .ndo_start_xmit = ipip_tunnel_xmit, 264 .ndo_start_xmit = ipip_tunnel_xmit,
782 .ndo_do_ioctl = ipip_tunnel_ioctl, 265 .ndo_do_ioctl = ipip_tunnel_ioctl,
783 .ndo_change_mtu = ipip_tunnel_change_mtu, 266 .ndo_change_mtu = ip_tunnel_change_mtu,
784 .ndo_get_stats64 = ipip_get_stats64, 267 .ndo_get_stats64 = ip_tunnel_get_stats64,
785}; 268};
786 269
787static void ipip_dev_free(struct net_device *dev)
788{
789 free_percpu(dev->tstats);
790 free_netdev(dev);
791}
792
793#define IPIP_FEATURES (NETIF_F_SG | \ 270#define IPIP_FEATURES (NETIF_F_SG | \
794 NETIF_F_FRAGLIST | \ 271 NETIF_F_FRAGLIST | \
795 NETIF_F_HIGHDMA | \ 272 NETIF_F_HIGHDMA | \
@@ -798,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)
798static void ipip_tunnel_setup(struct net_device *dev) 275static void ipip_tunnel_setup(struct net_device *dev)
799{ 276{
800 dev->netdev_ops = &ipip_netdev_ops; 277 dev->netdev_ops = &ipip_netdev_ops;
801 dev->destructor = ipip_dev_free;
802 278
803 dev->type = ARPHRD_TUNNEL; 279 dev->type = ARPHRD_TUNNEL;
804 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
805 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
806 dev->flags = IFF_NOARP; 280 dev->flags = IFF_NOARP;
807 dev->iflink = 0; 281 dev->iflink = 0;
808 dev->addr_len = 4; 282 dev->addr_len = 4;
@@ -812,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)
812 286
813 dev->features |= IPIP_FEATURES; 287 dev->features |= IPIP_FEATURES;
814 dev->hw_features |= IPIP_FEATURES; 288 dev->hw_features |= IPIP_FEATURES;
289 ip_tunnel_setup(dev, ipip_net_id);
815} 290}
816 291
817static int ipip_tunnel_init(struct net_device *dev) 292static int ipip_tunnel_init(struct net_device *dev)
818{ 293{
819 struct ip_tunnel *tunnel = netdev_priv(dev); 294 struct ip_tunnel *tunnel = netdev_priv(dev);
820 295
821 tunnel->dev = dev;
822
823 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 296 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
824 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 297 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
825 298
826 ipip_tunnel_bind_dev(dev); 299 tunnel->hlen = 0;
827 300 tunnel->parms.iph.protocol = IPPROTO_IPIP;
828 dev->tstats = alloc_percpu(struct pcpu_tstats); 301 return ip_tunnel_init(dev);
829 if (!dev->tstats)
830 return -ENOMEM;
831
832 return 0;
833}
834
835static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
836{
837 struct ip_tunnel *tunnel = netdev_priv(dev);
838 struct iphdr *iph = &tunnel->parms.iph;
839 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
840
841 tunnel->dev = dev;
842 strcpy(tunnel->parms.name, dev->name);
843
844 iph->version = 4;
845 iph->protocol = IPPROTO_IPIP;
846 iph->ihl = 5;
847
848 dev->tstats = alloc_percpu(struct pcpu_tstats);
849 if (!dev->tstats)
850 return -ENOMEM;
851
852 dev_hold(dev);
853 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
854 return 0;
855} 302}
856 303
857static void ipip_netlink_parms(struct nlattr *data[], 304static void ipip_netlink_parms(struct nlattr *data[],
@@ -891,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],
891static int ipip_newlink(struct net *src_net, struct net_device *dev, 338static int ipip_newlink(struct net *src_net, struct net_device *dev,
892 struct nlattr *tb[], struct nlattr *data[]) 339 struct nlattr *tb[], struct nlattr *data[])
893{ 340{
894 struct net *net = dev_net(dev); 341 struct ip_tunnel_parm p;
895 struct ip_tunnel *nt;
896
897 nt = netdev_priv(dev);
898 ipip_netlink_parms(data, &nt->parms);
899
900 if (ipip_tunnel_locate(net, &nt->parms, 0))
901 return -EEXIST;
902 342
903 return ipip_tunnel_create(dev); 343 ipip_netlink_parms(data, &p);
344 return ip_tunnel_newlink(dev, tb, &p);
904} 345}
905 346
906static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], 347static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
907 struct nlattr *data[]) 348 struct nlattr *data[])
908{ 349{
909 struct ip_tunnel *t;
910 struct ip_tunnel_parm p; 350 struct ip_tunnel_parm p;
911 struct net *net = dev_net(dev);
912 struct ipip_net *ipn = net_generic(net, ipip_net_id);
913
914 if (dev == ipn->fb_tunnel_dev)
915 return -EINVAL;
916 351
917 ipip_netlink_parms(data, &p); 352 ipip_netlink_parms(data, &p);
918 353
@@ -920,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
920 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) 355 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
921 return -EINVAL; 356 return -EINVAL;
922 357
923 t = ipip_tunnel_locate(net, &p, 0); 358 return ip_tunnel_changelink(dev, tb, &p);
924
925 if (t) {
926 if (t->dev != dev)
927 return -EEXIST;
928 } else
929 t = netdev_priv(dev);
930
931 ipip_tunnel_update(t, &p);
932 return 0;
933} 359}
934 360
935static size_t ipip_get_size(const struct net_device *dev) 361static size_t ipip_get_size(const struct net_device *dev)
@@ -986,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
986 .setup = ipip_tunnel_setup, 412 .setup = ipip_tunnel_setup,
987 .newlink = ipip_newlink, 413 .newlink = ipip_newlink,
988 .changelink = ipip_changelink, 414 .changelink = ipip_changelink,
415 .dellink = ip_tunnel_dellink,
989 .get_size = ipip_get_size, 416 .get_size = ipip_get_size,
990 .fill_info = ipip_fill_info, 417 .fill_info = ipip_fill_info,
991}; 418};
@@ -996,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
996 .priority = 1, 423 .priority = 1,
997}; 424};
998 425
999static const char banner[] __initconst =
1000 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
1001
1002static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
1003{
1004 int prio;
1005
1006 for (prio = 1; prio < 4; prio++) {
1007 int h;
1008 for (h = 0; h < HASH_SIZE; h++) {
1009 struct ip_tunnel *t;
1010
1011 t = rtnl_dereference(ipn->tunnels[prio][h]);
1012 while (t != NULL) {
1013 unregister_netdevice_queue(t->dev, head);
1014 t = rtnl_dereference(t->next);
1015 }
1016 }
1017 }
1018}
1019
1020static int __net_init ipip_init_net(struct net *net) 426static int __net_init ipip_init_net(struct net *net)
1021{ 427{
1022 struct ipip_net *ipn = net_generic(net, ipip_net_id); 428 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
1023 struct ip_tunnel *t;
1024 int err;
1025
1026 ipn->tunnels[0] = ipn->tunnels_wc;
1027 ipn->tunnels[1] = ipn->tunnels_l;
1028 ipn->tunnels[2] = ipn->tunnels_r;
1029 ipn->tunnels[3] = ipn->tunnels_r_l;
1030
1031 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1032 "tunl0",
1033 ipip_tunnel_setup);
1034 if (!ipn->fb_tunnel_dev) {
1035 err = -ENOMEM;
1036 goto err_alloc_dev;
1037 }
1038 dev_net_set(ipn->fb_tunnel_dev, net);
1039
1040 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1041 if (err)
1042 goto err_reg_dev;
1043
1044 if ((err = register_netdev(ipn->fb_tunnel_dev)))
1045 goto err_reg_dev;
1046
1047 t = netdev_priv(ipn->fb_tunnel_dev);
1048
1049 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1050 return 0;
1051
1052err_reg_dev:
1053 ipip_dev_free(ipn->fb_tunnel_dev);
1054err_alloc_dev:
1055 /* nothing */
1056 return err;
1057} 429}
1058 430
1059static void __net_exit ipip_exit_net(struct net *net) 431static void __net_exit ipip_exit_net(struct net *net)
1060{ 432{
1061 struct ipip_net *ipn = net_generic(net, ipip_net_id); 433 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
1062 LIST_HEAD(list); 434 ip_tunnel_delete_net(itn);
1063
1064 rtnl_lock();
1065 ipip_destroy_tunnels(ipn, &list);
1066 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1067 unregister_netdevice_many(&list);
1068 rtnl_unlock();
1069} 435}
1070 436
1071static struct pernet_operations ipip_net_ops = { 437static struct pernet_operations ipip_net_ops = {
1072 .init = ipip_init_net, 438 .init = ipip_init_net,
1073 .exit = ipip_exit_net, 439 .exit = ipip_exit_net,
1074 .id = &ipip_net_id, 440 .id = &ipip_net_id,
1075 .size = sizeof(struct ipip_net), 441 .size = sizeof(struct ip_tunnel_net),
1076}; 442};
1077 443
1078static int __init ipip_init(void) 444static int __init ipip_init(void)
1079{ 445{
1080 int err; 446 int err;
1081 447
1082 printk(banner); 448 pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
1083 449
1084 err = register_pernet_device(&ipip_net_ops); 450 err = register_pernet_device(&ipip_net_ops);
1085 if (err < 0) 451 if (err < 0)