aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_gre.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_gre.c')
-rw-r--r--net/ipv4/ip_gre.c283
1 files changed, 166 insertions, 117 deletions
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 35c93e8b6a46..8871067560db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h>
47 48
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 49#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -63,13 +64,13 @@
63 We cannot track such dead loops during route installation, 64 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be 65 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl), 66 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best 67 and silently drop packet when it expires. It is a good
67 solution, but it supposes maintaing new variable in ALL 68 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used. 69 skb, even if no tunneling is used.
69 70
70 Current solution: HARD_TX_LOCK lock breaks dead loops. 71 Current solution: xmit_recursion breaks dead loops. This is a percpu
71 72 counter, since when we enter the first ndo_xmit(), cpu migration is
72 73 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 74
74 2. Networking dead loops would not kill routers, but would really 75 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case, 76 kill network. IP hop limit plays role of "t->recursion" in this case,
@@ -128,7 +129,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev);
128 129
129static int ipgre_net_id __read_mostly; 130static int ipgre_net_id __read_mostly;
130struct ipgre_net { 131struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE]; 132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
132 133
133 struct net_device *fb_tunnel_dev; 134 struct net_device *fb_tunnel_dev;
134}; 135};
@@ -158,13 +159,40 @@ struct ipgre_net {
158#define tunnels_l tunnels[1] 159#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0] 160#define tunnels_wc tunnels[0]
160/* 161/*
161 * Locking : hash tables are protected by RCU and a spinlock 162 * Locking : hash tables are protected by RCU and RTNL
162 */ 163 */
163static DEFINE_SPINLOCK(ipgre_lock);
164 164
165#define for_each_ip_tunnel_rcu(start) \ 165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167 167
168/* often modified stats are per cpu, other are shared (netdev->stats) */
169struct pcpu_tstats {
170 unsigned long rx_packets;
171 unsigned long rx_bytes;
172 unsigned long tx_packets;
173 unsigned long tx_bytes;
174};
175
176static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
177{
178 struct pcpu_tstats sum = { 0 };
179 int i;
180
181 for_each_possible_cpu(i) {
182 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
183
184 sum.rx_packets += tstats->rx_packets;
185 sum.rx_bytes += tstats->rx_bytes;
186 sum.tx_packets += tstats->tx_packets;
187 sum.tx_bytes += tstats->tx_bytes;
188 }
189 dev->stats.rx_packets = sum.rx_packets;
190 dev->stats.rx_bytes = sum.rx_bytes;
191 dev->stats.tx_packets = sum.tx_packets;
192 dev->stats.tx_bytes = sum.tx_bytes;
193 return &dev->stats;
194}
195
168/* Given src, dst and key, find appropriate for input tunnel. */ 196/* Given src, dst and key, find appropriate for input tunnel. */
169 197
170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, 198static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
@@ -173,8 +201,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
173{ 201{
174 struct net *net = dev_net(dev); 202 struct net *net = dev_net(dev);
175 int link = dev->ifindex; 203 int link = dev->ifindex;
176 unsigned h0 = HASH(remote); 204 unsigned int h0 = HASH(remote);
177 unsigned h1 = HASH(key); 205 unsigned int h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL; 206 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 207 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 208 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
@@ -289,13 +317,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
289 return NULL; 317 return NULL;
290} 318}
291 319
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 320static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms) 321 struct ip_tunnel_parm *parms)
294{ 322{
295 __be32 remote = parms->iph.daddr; 323 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr; 324 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key; 325 __be32 key = parms->i_key;
298 unsigned h = HASH(key); 326 unsigned int h = HASH(key);
299 int prio = 0; 327 int prio = 0;
300 328
301 if (local) 329 if (local)
@@ -308,7 +336,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
308 return &ign->tunnels[prio][h]; 336 return &ign->tunnels[prio][h];
309} 337}
310 338
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 339static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t) 340 struct ip_tunnel *t)
313{ 341{
314 return __ipgre_bucket(ign, &t->parms); 342 return __ipgre_bucket(ign, &t->parms);
@@ -316,23 +344,22 @@ static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
316 344
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 345static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{ 346{
319 struct ip_tunnel **tp = ipgre_bucket(ign, t); 347 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
320 348
321 spin_lock_bh(&ipgre_lock); 349 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t); 350 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
325} 351}
326 352
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 353static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{ 354{
329 struct ip_tunnel **tp; 355 struct ip_tunnel __rcu **tp;
330 356 struct ip_tunnel *iter;
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 357
332 if (t == *tp) { 358 for (tp = ipgre_bucket(ign, t);
333 spin_lock_bh(&ipgre_lock); 359 (iter = rtnl_dereference(*tp)) != NULL;
334 *tp = t->next; 360 tp = &iter->next) {
335 spin_unlock_bh(&ipgre_lock); 361 if (t == iter) {
362 rcu_assign_pointer(*tp, t->next);
336 break; 363 break;
337 } 364 }
338 } 365 }
@@ -346,10 +373,13 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
346 __be32 local = parms->iph.saddr; 373 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key; 374 __be32 key = parms->i_key;
348 int link = parms->link; 375 int link = parms->link;
349 struct ip_tunnel *t, **tp; 376 struct ip_tunnel *t;
377 struct ip_tunnel __rcu **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 378 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 379
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 380 for (tp = __ipgre_bucket(ign, parms);
381 (t = rtnl_dereference(*tp)) != NULL;
382 tp = &t->next)
353 if (local == t->parms.iph.saddr && 383 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr && 384 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key && 385 key == t->parms.i_key &&
@@ -360,7 +390,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
360 return t; 390 return t;
361} 391}
362 392
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 393static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create) 394 struct ip_tunnel_parm *parms, int create)
365{ 395{
366 struct ip_tunnel *t, *nt; 396 struct ip_tunnel *t, *nt;
@@ -375,19 +405,14 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
375 if (parms->name[0]) 405 if (parms->name[0])
376 strlcpy(name, parms->name, IFNAMSIZ); 406 strlcpy(name, parms->name, IFNAMSIZ);
377 else 407 else
378 sprintf(name, "gre%%d"); 408 strcpy(name, "gre%d");
379 409
380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 410 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381 if (!dev) 411 if (!dev)
382 return NULL; 412 return NULL;
383 413
384 dev_net_set(dev, net); 414 dev_net_set(dev, net);
385 415
386 if (strchr(name, '%')) {
387 if (dev_alloc_name(dev, name) < 0)
388 goto failed_free;
389 }
390
391 nt = netdev_priv(dev); 416 nt = netdev_priv(dev);
392 nt->parms = *parms; 417 nt->parms = *parms;
393 dev->rtnl_link_ops = &ipgre_link_ops; 418 dev->rtnl_link_ops = &ipgre_link_ops;
@@ -432,7 +457,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
432 by themself??? 457 by themself???
433 */ 458 */
434 459
435 struct iphdr *iph = (struct iphdr *)skb->data; 460 const struct iphdr *iph = (const struct iphdr *)skb->data;
436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 461 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
437 int grehlen = (iph->ihl<<2) + 4; 462 int grehlen = (iph->ihl<<2) + 4;
438 const int type = icmp_hdr(skb)->type; 463 const int type = icmp_hdr(skb)->type;
@@ -504,7 +529,7 @@ out:
504 rcu_read_unlock(); 529 rcu_read_unlock();
505} 530}
506 531
507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 532static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
508{ 533{
509 if (INET_ECN_is_ce(iph->tos)) { 534 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) { 535 if (skb->protocol == htons(ETH_P_IP)) {
@@ -516,19 +541,19 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
516} 541}
517 542
518static inline u8 543static inline u8
519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 544ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
520{ 545{
521 u8 inner = 0; 546 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP)) 547 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos; 548 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6)) 549 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 550 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner); 551 return INET_ECN_encapsulate(tos, inner);
527} 552}
528 553
529static int ipgre_rcv(struct sk_buff *skb) 554static int ipgre_rcv(struct sk_buff *skb)
530{ 555{
531 struct iphdr *iph; 556 const struct iphdr *iph;
532 u8 *h; 557 u8 *h;
533 __be16 flags; 558 __be16 flags;
534 __sum16 csum = 0; 559 __sum16 csum = 0;
@@ -582,7 +607,7 @@ static int ipgre_rcv(struct sk_buff *skb)
582 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 607 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
583 iph->saddr, iph->daddr, key, 608 iph->saddr, iph->daddr, key,
584 gre_proto))) { 609 gre_proto))) {
585 struct net_device_stats *stats = &tunnel->dev->stats; 610 struct pcpu_tstats *tstats;
586 611
587 secpath_reset(skb); 612 secpath_reset(skb);
588 613
@@ -604,24 +629,24 @@ static int ipgre_rcv(struct sk_buff *skb)
604#ifdef CONFIG_NET_IPGRE_BROADCAST 629#ifdef CONFIG_NET_IPGRE_BROADCAST
605 if (ipv4_is_multicast(iph->daddr)) { 630 if (ipv4_is_multicast(iph->daddr)) {
606 /* Looped back packet, drop it! */ 631 /* Looped back packet, drop it! */
607 if (skb_rtable(skb)->fl.iif == 0) 632 if (rt_is_output_route(skb_rtable(skb)))
608 goto drop; 633 goto drop;
609 stats->multicast++; 634 tunnel->dev->stats.multicast++;
610 skb->pkt_type = PACKET_BROADCAST; 635 skb->pkt_type = PACKET_BROADCAST;
611 } 636 }
612#endif 637#endif
613 638
614 if (((flags&GRE_CSUM) && csum) || 639 if (((flags&GRE_CSUM) && csum) ||
615 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 640 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
616 stats->rx_crc_errors++; 641 tunnel->dev->stats.rx_crc_errors++;
617 stats->rx_errors++; 642 tunnel->dev->stats.rx_errors++;
618 goto drop; 643 goto drop;
619 } 644 }
620 if (tunnel->parms.i_flags&GRE_SEQ) { 645 if (tunnel->parms.i_flags&GRE_SEQ) {
621 if (!(flags&GRE_SEQ) || 646 if (!(flags&GRE_SEQ) ||
622 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 647 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
623 stats->rx_fifo_errors++; 648 tunnel->dev->stats.rx_fifo_errors++;
624 stats->rx_errors++; 649 tunnel->dev->stats.rx_errors++;
625 goto drop; 650 goto drop;
626 } 651 }
627 tunnel->i_seqno = seqno + 1; 652 tunnel->i_seqno = seqno + 1;
@@ -630,8 +655,8 @@ static int ipgre_rcv(struct sk_buff *skb)
630 /* Warning: All skb pointers will be invalidated! */ 655 /* Warning: All skb pointers will be invalidated! */
631 if (tunnel->dev->type == ARPHRD_ETHER) { 656 if (tunnel->dev->type == ARPHRD_ETHER) {
632 if (!pskb_may_pull(skb, ETH_HLEN)) { 657 if (!pskb_may_pull(skb, ETH_HLEN)) {
633 stats->rx_length_errors++; 658 tunnel->dev->stats.rx_length_errors++;
634 stats->rx_errors++; 659 tunnel->dev->stats.rx_errors++;
635 goto drop; 660 goto drop;
636 } 661 }
637 662
@@ -640,14 +665,19 @@ static int ipgre_rcv(struct sk_buff *skb)
640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 665 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641 } 666 }
642 667
643 skb_tunnel_rx(skb, tunnel->dev); 668 tstats = this_cpu_ptr(tunnel->dev->tstats);
669 tstats->rx_packets++;
670 tstats->rx_bytes += skb->len;
671
672 __skb_tunnel_rx(skb, tunnel->dev);
644 673
645 skb_reset_network_header(skb); 674 skb_reset_network_header(skb);
646 ipgre_ecn_decapsulate(iph, skb); 675 ipgre_ecn_decapsulate(iph, skb);
647 676
648 netif_rx(skb); 677 netif_rx(skb);
678
649 rcu_read_unlock(); 679 rcu_read_unlock();
650 return(0); 680 return 0;
651 } 681 }
652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 682 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653 683
@@ -655,20 +685,20 @@ drop:
655 rcu_read_unlock(); 685 rcu_read_unlock();
656drop_nolock: 686drop_nolock:
657 kfree_skb(skb); 687 kfree_skb(skb);
658 return(0); 688 return 0;
659} 689}
660 690
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 691static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
662{ 692{
663 struct ip_tunnel *tunnel = netdev_priv(dev); 693 struct ip_tunnel *tunnel = netdev_priv(dev);
664 struct net_device_stats *stats = &dev->stats; 694 struct pcpu_tstats *tstats;
665 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); 695 const struct iphdr *old_iph = ip_hdr(skb);
666 struct iphdr *old_iph = ip_hdr(skb); 696 const struct iphdr *tiph;
667 struct iphdr *tiph; 697 struct flowi4 fl4;
668 u8 tos; 698 u8 tos;
669 __be16 df; 699 __be16 df;
670 struct rtable *rt; /* Route to the other host */ 700 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */ 701 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */ 702 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */ 703 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen; 704 int gre_hlen;
@@ -680,7 +710,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
680 710
681 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 711 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
682 gre_hlen = 0; 712 gre_hlen = 0;
683 tiph = (struct iphdr *)skb->data; 713 tiph = (const struct iphdr *)skb->data;
684 } else { 714 } else {
685 gre_hlen = tunnel->hlen; 715 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph; 716 tiph = &tunnel->parms.iph;
@@ -690,7 +720,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
690 /* NBMA tunnel */ 720 /* NBMA tunnel */
691 721
692 if (skb_dst(skb) == NULL) { 722 if (skb_dst(skb) == NULL) {
693 stats->tx_fifo_errors++; 723 dev->stats.tx_fifo_errors++;
694 goto tx_error; 724 goto tx_error;
695 } 725 }
696 726
@@ -701,14 +731,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
701 } 731 }
702#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
703 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6; 734 const struct in6_addr *addr6;
705 int addr_type; 735 int addr_type;
706 struct neighbour *neigh = skb_dst(skb)->neighbour; 736 struct neighbour *neigh = skb_dst(skb)->neighbour;
707 737
708 if (neigh == NULL) 738 if (neigh == NULL)
709 goto tx_error; 739 goto tx_error;
710 740
711 addr6 = (struct in6_addr *)&neigh->primary_key; 741 addr6 = (const struct in6_addr *)&neigh->primary_key;
712 addr_type = ipv6_addr_type(addr6); 742 addr_type = ipv6_addr_type(addr6);
713 743
714 if (addr_type == IPV6_ADDR_ANY) { 744 if (addr_type == IPV6_ADDR_ANY) {
@@ -732,26 +762,21 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
732 if (skb->protocol == htons(ETH_P_IP)) 762 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos; 763 tos = old_iph->tos;
734 else if (skb->protocol == htons(ETH_P_IPV6)) 764 else if (skb->protocol == htons(ETH_P_IPV6))
735 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 765 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
736 } 766 }
737 767
738 { 768 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
739 struct flowi fl = { .oif = tunnel->parms.link, 769 tunnel->parms.o_key, RT_TOS(tos),
740 .nl_u = { .ip4_u = 770 tunnel->parms.link);
741 { .daddr = dst, 771 if (IS_ERR(rt)) {
742 .saddr = tiph->saddr, 772 dev->stats.tx_carrier_errors++;
743 .tos = RT_TOS(tos) } }, 773 goto tx_error;
744 .proto = IPPROTO_GRE };
745 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
746 stats->tx_carrier_errors++;
747 goto tx_error;
748 }
749 } 774 }
750 tdev = rt->dst.dev; 775 tdev = rt->dst.dev;
751 776
752 if (tdev == dev) { 777 if (tdev == dev) {
753 ip_rt_put(rt); 778 ip_rt_put(rt);
754 stats->collisions++; 779 dev->stats.collisions++;
755 goto tx_error; 780 goto tx_error;
756 } 781 }
757 782
@@ -783,7 +808,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
783 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 808 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
784 rt6->rt6i_dst.plen == 128) { 809 rt6->rt6i_dst.plen == 128) {
785 rt6->rt6i_flags |= RTF_MODIFIED; 810 rt6->rt6i_flags |= RTF_MODIFIED;
786 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; 811 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
787 } 812 }
788 } 813 }
789 814
@@ -814,7 +839,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
814 dev->needed_headroom = max_headroom; 839 dev->needed_headroom = max_headroom;
815 if (!new_skb) { 840 if (!new_skb) {
816 ip_rt_put(rt); 841 ip_rt_put(rt);
817 txq->tx_dropped++; 842 dev->stats.tx_dropped++;
818 dev_kfree_skb(skb); 843 dev_kfree_skb(skb);
819 return NETDEV_TX_OK; 844 return NETDEV_TX_OK;
820 } 845 }
@@ -844,18 +869,18 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
844 iph->frag_off = df; 869 iph->frag_off = df;
845 iph->protocol = IPPROTO_GRE; 870 iph->protocol = IPPROTO_GRE;
846 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 871 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
847 iph->daddr = rt->rt_dst; 872 iph->daddr = fl4.daddr;
848 iph->saddr = rt->rt_src; 873 iph->saddr = fl4.saddr;
849 874
850 if ((iph->ttl = tiph->ttl) == 0) { 875 if ((iph->ttl = tiph->ttl) == 0) {
851 if (skb->protocol == htons(ETH_P_IP)) 876 if (skb->protocol == htons(ETH_P_IP))
852 iph->ttl = old_iph->ttl; 877 iph->ttl = old_iph->ttl;
853#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 878#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
854 else if (skb->protocol == htons(ETH_P_IPV6)) 879 else if (skb->protocol == htons(ETH_P_IPV6))
855 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 880 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
856#endif 881#endif
857 else 882 else
858 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); 883 iph->ttl = ip4_dst_hoplimit(&rt->dst);
859 } 884 }
860 885
861 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 886 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -881,15 +906,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
881 } 906 }
882 907
883 nf_reset(skb); 908 nf_reset(skb);
884 909 tstats = this_cpu_ptr(dev->tstats);
885 IPTUNNEL_XMIT(); 910 __IPTUNNEL_XMIT(tstats, &dev->stats);
886 return NETDEV_TX_OK; 911 return NETDEV_TX_OK;
887 912
888tx_error_icmp: 913tx_error_icmp:
889 dst_link_failure(skb); 914 dst_link_failure(skb);
890 915
891tx_error: 916tx_error:
892 stats->tx_errors++; 917 dev->stats.tx_errors++;
893 dev_kfree_skb(skb); 918 dev_kfree_skb(skb);
894 return NETDEV_TX_OK; 919 return NETDEV_TX_OK;
895} 920}
@@ -898,7 +923,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
898{ 923{
899 struct net_device *tdev = NULL; 924 struct net_device *tdev = NULL;
900 struct ip_tunnel *tunnel; 925 struct ip_tunnel *tunnel;
901 struct iphdr *iph; 926 const struct iphdr *iph;
902 int hlen = LL_MAX_HEADER; 927 int hlen = LL_MAX_HEADER;
903 int mtu = ETH_DATA_LEN; 928 int mtu = ETH_DATA_LEN;
904 int addend = sizeof(struct iphdr) + 4; 929 int addend = sizeof(struct iphdr) + 4;
@@ -909,14 +934,15 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
909 /* Guess output device to choose reasonable mtu and needed_headroom */ 934 /* Guess output device to choose reasonable mtu and needed_headroom */
910 935
911 if (iph->daddr) { 936 if (iph->daddr) {
912 struct flowi fl = { .oif = tunnel->parms.link, 937 struct flowi4 fl4;
913 .nl_u = { .ip4_u =
914 { .daddr = iph->daddr,
915 .saddr = iph->saddr,
916 .tos = RT_TOS(iph->tos) } },
917 .proto = IPPROTO_GRE };
918 struct rtable *rt; 938 struct rtable *rt;
919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 939
940 rt = ip_route_output_gre(dev_net(dev), &fl4,
941 iph->daddr, iph->saddr,
942 tunnel->parms.o_key,
943 RT_TOS(iph->tos),
944 tunnel->parms.link);
945 if (!IS_ERR(rt)) {
920 tdev = rt->dst.dev; 946 tdev = rt->dst.dev;
921 ip_rt_put(rt); 947 ip_rt_put(rt);
922 } 948 }
@@ -1012,7 +1038,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1012 break; 1038 break;
1013 } 1039 }
1014 } else { 1040 } else {
1015 unsigned nflags = 0; 1041 unsigned int nflags = 0;
1016 1042
1017 t = netdev_priv(dev); 1043 t = netdev_priv(dev);
1018 1044
@@ -1026,6 +1052,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1026 break; 1052 break;
1027 } 1053 }
1028 ipgre_tunnel_unlink(ign, t); 1054 ipgre_tunnel_unlink(ign, t);
1055 synchronize_net();
1029 t->parms.iph.saddr = p.iph.saddr; 1056 t->parms.iph.saddr = p.iph.saddr;
1030 t->parms.iph.daddr = p.iph.daddr; 1057 t->parms.iph.daddr = p.iph.daddr;
1031 t->parms.i_key = p.i_key; 1058 t->parms.i_key = p.i_key;
@@ -1125,7 +1152,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1125 1152
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1153static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 unsigned short type, 1154 unsigned short type,
1128 const void *daddr, const void *saddr, unsigned len) 1155 const void *daddr, const void *saddr, unsigned int len)
1129{ 1156{
1130 struct ip_tunnel *t = netdev_priv(dev); 1157 struct ip_tunnel *t = netdev_priv(dev);
1131 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1158 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
@@ -1151,7 +1178,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1151 1178
1152static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1179static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1153{ 1180{
1154 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); 1181 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1155 memcpy(haddr, &iph->saddr, 4); 1182 memcpy(haddr, &iph->saddr, 4);
1156 return 4; 1183 return 4;
1157} 1184}
@@ -1167,14 +1194,16 @@ static int ipgre_open(struct net_device *dev)
1167 struct ip_tunnel *t = netdev_priv(dev); 1194 struct ip_tunnel *t = netdev_priv(dev);
1168 1195
1169 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1196 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1170 struct flowi fl = { .oif = t->parms.link, 1197 struct flowi4 fl4;
1171 .nl_u = { .ip4_u =
1172 { .daddr = t->parms.iph.daddr,
1173 .saddr = t->parms.iph.saddr,
1174 .tos = RT_TOS(t->parms.iph.tos) } },
1175 .proto = IPPROTO_GRE };
1176 struct rtable *rt; 1198 struct rtable *rt;
1177 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1199
1200 rt = ip_route_output_gre(dev_net(dev), &fl4,
1201 t->parms.iph.daddr,
1202 t->parms.iph.saddr,
1203 t->parms.o_key,
1204 RT_TOS(t->parms.iph.tos),
1205 t->parms.link);
1206 if (IS_ERR(rt))
1178 return -EADDRNOTAVAIL; 1207 return -EADDRNOTAVAIL;
1179 dev = rt->dst.dev; 1208 dev = rt->dst.dev;
1180 ip_rt_put(rt); 1209 ip_rt_put(rt);
@@ -1193,10 +1222,8 @@ static int ipgre_close(struct net_device *dev)
1193 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1222 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1194 struct in_device *in_dev; 1223 struct in_device *in_dev;
1195 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 1224 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1196 if (in_dev) { 1225 if (in_dev)
1197 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1226 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198 in_dev_put(in_dev);
1199 }
1200 } 1227 }
1201 return 0; 1228 return 0;
1202} 1229}
@@ -1213,12 +1240,19 @@ static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_start_xmit = ipgre_tunnel_xmit, 1240 .ndo_start_xmit = ipgre_tunnel_xmit,
1214 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1241 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1215 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1242 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1243 .ndo_get_stats = ipgre_get_stats,
1216}; 1244};
1217 1245
1246static void ipgre_dev_free(struct net_device *dev)
1247{
1248 free_percpu(dev->tstats);
1249 free_netdev(dev);
1250}
1251
1218static void ipgre_tunnel_setup(struct net_device *dev) 1252static void ipgre_tunnel_setup(struct net_device *dev)
1219{ 1253{
1220 dev->netdev_ops = &ipgre_netdev_ops; 1254 dev->netdev_ops = &ipgre_netdev_ops;
1221 dev->destructor = free_netdev; 1255 dev->destructor = ipgre_dev_free;
1222 1256
1223 dev->type = ARPHRD_IPGRE; 1257 dev->type = ARPHRD_IPGRE;
1224 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1258 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
@@ -1256,6 +1290,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
1256 } else 1290 } else
1257 dev->header_ops = &ipgre_header_ops; 1291 dev->header_ops = &ipgre_header_ops;
1258 1292
1293 dev->tstats = alloc_percpu(struct pcpu_tstats);
1294 if (!dev->tstats)
1295 return -ENOMEM;
1296
1259 return 0; 1297 return 0;
1260} 1298}
1261 1299
@@ -1263,7 +1301,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1263{ 1301{
1264 struct ip_tunnel *tunnel = netdev_priv(dev); 1302 struct ip_tunnel *tunnel = netdev_priv(dev);
1265 struct iphdr *iph = &tunnel->parms.iph; 1303 struct iphdr *iph = &tunnel->parms.iph;
1266 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1267 1304
1268 tunnel->dev = dev; 1305 tunnel->dev = dev;
1269 strcpy(tunnel->parms.name, dev->name); 1306 strcpy(tunnel->parms.name, dev->name);
@@ -1274,14 +1311,12 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1274 tunnel->hlen = sizeof(struct iphdr) + 4; 1311 tunnel->hlen = sizeof(struct iphdr) + 4;
1275 1312
1276 dev_hold(dev); 1313 dev_hold(dev);
1277 ign->tunnels_wc[0] = tunnel;
1278} 1314}
1279 1315
1280 1316
1281static const struct net_protocol ipgre_protocol = { 1317static const struct gre_protocol ipgre_protocol = {
1282 .handler = ipgre_rcv, 1318 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err, 1319 .err_handler = ipgre_err,
1284 .netns_ok = 1,
1285}; 1320};
1286 1321
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1322static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1291,11 +1326,13 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1291 for (prio = 0; prio < 4; prio++) { 1326 for (prio = 0; prio < 4; prio++) {
1292 int h; 1327 int h;
1293 for (h = 0; h < HASH_SIZE; h++) { 1328 for (h = 0; h < HASH_SIZE; h++) {
1294 struct ip_tunnel *t = ign->tunnels[prio][h]; 1329 struct ip_tunnel *t;
1330
1331 t = rtnl_dereference(ign->tunnels[prio][h]);
1295 1332
1296 while (t != NULL) { 1333 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head); 1334 unregister_netdevice_queue(t->dev, head);
1298 t = t->next; 1335 t = rtnl_dereference(t->next);
1299 } 1336 }
1300 } 1337 }
1301 } 1338 }
@@ -1320,10 +1357,12 @@ static int __net_init ipgre_init_net(struct net *net)
1320 if ((err = register_netdev(ign->fb_tunnel_dev))) 1357 if ((err = register_netdev(ign->fb_tunnel_dev)))
1321 goto err_reg_dev; 1358 goto err_reg_dev;
1322 1359
1360 rcu_assign_pointer(ign->tunnels_wc[0],
1361 netdev_priv(ign->fb_tunnel_dev));
1323 return 0; 1362 return 0;
1324 1363
1325err_reg_dev: 1364err_reg_dev:
1326 free_netdev(ign->fb_tunnel_dev); 1365 ipgre_dev_free(ign->fb_tunnel_dev);
1327err_alloc_dev: 1366err_alloc_dev:
1328 return err; 1367 return err;
1329} 1368}
@@ -1441,6 +1480,10 @@ static int ipgre_tap_init(struct net_device *dev)
1441 1480
1442 ipgre_tunnel_bind_dev(dev); 1481 ipgre_tunnel_bind_dev(dev);
1443 1482
1483 dev->tstats = alloc_percpu(struct pcpu_tstats);
1484 if (!dev->tstats)
1485 return -ENOMEM;
1486
1444 return 0; 1487 return 0;
1445} 1488}
1446 1489
@@ -1451,6 +1494,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
1451 .ndo_set_mac_address = eth_mac_addr, 1494 .ndo_set_mac_address = eth_mac_addr,
1452 .ndo_validate_addr = eth_validate_addr, 1495 .ndo_validate_addr = eth_validate_addr,
1453 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1496 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1497 .ndo_get_stats = ipgre_get_stats,
1454}; 1498};
1455 1499
1456static void ipgre_tap_setup(struct net_device *dev) 1500static void ipgre_tap_setup(struct net_device *dev)
@@ -1459,7 +1503,7 @@ static void ipgre_tap_setup(struct net_device *dev)
1459 ether_setup(dev); 1503 ether_setup(dev);
1460 1504
1461 dev->netdev_ops = &ipgre_tap_netdev_ops; 1505 dev->netdev_ops = &ipgre_tap_netdev_ops;
1462 dev->destructor = free_netdev; 1506 dev->destructor = ipgre_dev_free;
1463 1507
1464 dev->iflink = 0; 1508 dev->iflink = 0;
1465 dev->features |= NETIF_F_NETNS_LOCAL; 1509 dev->features |= NETIF_F_NETNS_LOCAL;
@@ -1487,6 +1531,10 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
1487 if (!tb[IFLA_MTU]) 1531 if (!tb[IFLA_MTU])
1488 dev->mtu = mtu; 1532 dev->mtu = mtu;
1489 1533
1534 /* Can use a lockless transmit, unless we generate output sequences */
1535 if (!(nt->parms.o_flags & GRE_SEQ))
1536 dev->features |= NETIF_F_LLTX;
1537
1490 err = register_netdevice(dev); 1538 err = register_netdevice(dev);
1491 if (err) 1539 if (err)
1492 goto out; 1540 goto out;
@@ -1522,7 +1570,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1522 t = nt; 1570 t = nt;
1523 1571
1524 if (dev->type != ARPHRD_ETHER) { 1572 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0; 1573 unsigned int nflags = 0;
1526 1574
1527 if (ipv4_is_multicast(p.iph.daddr)) 1575 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST; 1576 nflags = IFF_BROADCAST;
@@ -1663,7 +1711,7 @@ static int __init ipgre_init(void)
1663 if (err < 0) 1711 if (err < 0)
1664 return err; 1712 return err;
1665 1713
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1714 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 if (err < 0) { 1715 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1716 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed; 1717 goto add_proto_failed;
@@ -1683,7 +1731,7 @@ out:
1683tap_ops_failed: 1731tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops); 1732 rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed: 1733rtnl_link_failed:
1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1734 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687add_proto_failed: 1735add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops); 1736 unregister_pernet_device(&ipgre_net_ops);
1689 goto out; 1737 goto out;
@@ -1693,7 +1741,7 @@ static void __exit ipgre_fini(void)
1693{ 1741{
1694 rtnl_link_unregister(&ipgre_tap_ops); 1742 rtnl_link_unregister(&ipgre_tap_ops);
1695 rtnl_link_unregister(&ipgre_link_ops); 1743 rtnl_link_unregister(&ipgre_link_ops);
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1744 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1745 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698 unregister_pernet_device(&ipgre_net_ops); 1746 unregister_pernet_device(&ipgre_net_ops);
1699} 1747}
@@ -1703,3 +1751,4 @@ module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL"); 1751MODULE_LICENSE("GPL");
1704MODULE_ALIAS_RTNL_LINK("gre"); 1752MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap"); 1753MODULE_ALIAS_RTNL_LINK("gretap");
1754MODULE_ALIAS_NETDEV("gre0");