aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorMahesh Bandewar <maheshb@google.com>2016-09-16 15:59:19 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-19 01:25:22 -0400
commit4fbae7d83c98c30efcf0a2a2ac55fbb75ef5a1a5 (patch)
tree3ea819d38ad4fbbae8d4db166f58451c2a78ee20 /drivers/net
parente8bffe0cf964f0330595bb376b74921cccdaac88 (diff)
ipvlan: Introduce l3s mode
In a typical IPvlan L3 setup where master is in default-ns and each slave is into different (slave) ns. In this setup egress packet processing for traffic originating from slave-ns will hit all NF_HOOKs in slave-ns as well as default-ns. However same is not true for ingress processing. All these NF_HOOKs are hit only in the slave-ns skipping them in the default-ns. IPvlan in L3 mode is restrictive and if admins want to deploy iptables rules in default-ns, this asymmetric data path makes it impossible to do so. This patch makes use of the l3_rcv() (added as part of l3mdev enhancements) to perform input route lookup on RX packets without changing the skb->dev and then uses nf_hook at NF_INET_LOCAL_IN to change the skb->dev just before handing over skb to L4. Signed-off-by: Mahesh Bandewar <maheshb@google.com> CC: David Ahern <dsa@cumulusnetworks.com> Reviewed-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/ipvlan/ipvlan.h6
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c94
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c87
4 files changed, 181 insertions, 7 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c5415b05ea9..8768a625350d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config IPVLAN
149 tristate "IP-VLAN support" 149 tristate "IP-VLAN support"
150 depends on INET 150 depends on INET
151 depends on IPV6 151 depends on IPV6
152 depends on NET_L3_MASTER_DEV
152 ---help--- 153 ---help---
153 This allows one to create virtual devices off of a main interface 154 This allows one to create virtual devices off of a main interface
154 and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) 155 and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 695a5dc9ace3..7e0732f5ea07 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -23,11 +23,13 @@
23#include <linux/if_vlan.h> 23#include <linux/if_vlan.h>
24#include <linux/ip.h> 24#include <linux/ip.h>
25#include <linux/inetdevice.h> 25#include <linux/inetdevice.h>
26#include <linux/netfilter.h>
26#include <net/ip.h> 27#include <net/ip.h>
27#include <net/ip6_route.h> 28#include <net/ip6_route.h>
28#include <net/rtnetlink.h> 29#include <net/rtnetlink.h>
29#include <net/route.h> 30#include <net/route.h>
30#include <net/addrconf.h> 31#include <net/addrconf.h>
32#include <net/l3mdev.h>
31 33
32#define IPVLAN_DRV "ipvlan" 34#define IPVLAN_DRV "ipvlan"
33#define IPV_DRV_VER "0.1" 35#define IPV_DRV_VER "0.1"
@@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
124 const void *iaddr, bool is_v6); 126 const void *iaddr, bool is_v6);
125bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); 127bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
126void ipvlan_ht_addr_del(struct ipvl_addr *addr); 128void ipvlan_ht_addr_del(struct ipvl_addr *addr);
129struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
130 u16 proto);
131unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
132 const struct nf_hook_state *state);
127#endif /* __IPVLAN_H */ 133#endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b5f9511d819e..b4e990743e1d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
560 case IPVLAN_MODE_L2: 560 case IPVLAN_MODE_L2:
561 return ipvlan_xmit_mode_l2(skb, dev); 561 return ipvlan_xmit_mode_l2(skb, dev);
562 case IPVLAN_MODE_L3: 562 case IPVLAN_MODE_L3:
563 case IPVLAN_MODE_L3S:
563 return ipvlan_xmit_mode_l3(skb, dev); 564 return ipvlan_xmit_mode_l3(skb, dev);
564 } 565 }
565 566
@@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
664 return ipvlan_handle_mode_l2(pskb, port); 665 return ipvlan_handle_mode_l2(pskb, port);
665 case IPVLAN_MODE_L3: 666 case IPVLAN_MODE_L3:
666 return ipvlan_handle_mode_l3(pskb, port); 667 return ipvlan_handle_mode_l3(pskb, port);
668 case IPVLAN_MODE_L3S:
669 return RX_HANDLER_PASS;
667 } 670 }
668 671
669 /* Should not reach here */ 672 /* Should not reach here */
@@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
672 kfree_skb(skb); 675 kfree_skb(skb);
673 return RX_HANDLER_CONSUMED; 676 return RX_HANDLER_CONSUMED;
674} 677}
678
679static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
680 struct net_device *dev)
681{
682 struct ipvl_addr *addr = NULL;
683 struct ipvl_port *port;
684 void *lyr3h;
685 int addr_type;
686
687 if (!dev || !netif_is_ipvlan_port(dev))
688 goto out;
689
690 port = ipvlan_port_get_rcu(dev);
691 if (!port || port->mode != IPVLAN_MODE_L3S)
692 goto out;
693
694 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
695 if (!lyr3h)
696 goto out;
697
698 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
699out:
700 return addr;
701}
702
703struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
704 u16 proto)
705{
706 struct ipvl_addr *addr;
707 struct net_device *sdev;
708
709 addr = ipvlan_skb_to_addr(skb, dev);
710 if (!addr)
711 goto out;
712
713 sdev = addr->master->dev;
714 switch (proto) {
715 case AF_INET:
716 {
717 int err;
718 struct iphdr *ip4h = ip_hdr(skb);
719
720 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
721 ip4h->tos, sdev);
722 if (unlikely(err))
723 goto out;
724 break;
725 }
726 case AF_INET6:
727 {
728 struct dst_entry *dst;
729 struct ipv6hdr *ip6h = ipv6_hdr(skb);
730 int flags = RT6_LOOKUP_F_HAS_SADDR;
731 struct flowi6 fl6 = {
732 .flowi6_iif = sdev->ifindex,
733 .daddr = ip6h->daddr,
734 .saddr = ip6h->saddr,
735 .flowlabel = ip6_flowinfo(ip6h),
736 .flowi6_mark = skb->mark,
737 .flowi6_proto = ip6h->nexthdr,
738 };
739
740 skb_dst_drop(skb);
741 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
742 skb_dst_set(skb, dst);
743 break;
744 }
745 default:
746 break;
747 }
748
749out:
750 return skb;
751}
752
753unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
754 const struct nf_hook_state *state)
755{
756 struct ipvl_addr *addr;
757 unsigned int len;
758
759 addr = ipvlan_skb_to_addr(skb, skb->dev);
760 if (!addr)
761 goto out;
762
763 skb->dev = addr->master->dev;
764 len = skb->len + ETH_HLEN;
765 ipvlan_count_rx(addr->master, len, true, false);
766out:
767 return NF_ACCEPT;
768}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 18b4e8c7f68a..f442eb366863 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -9,24 +9,87 @@
9 9
10#include "ipvlan.h" 10#include "ipvlan.h"
11 11
12static u32 ipvl_nf_hook_refcnt = 0;
13
14static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
15 {
16 .hook = ipvlan_nf_input,
17 .pf = NFPROTO_IPV4,
18 .hooknum = NF_INET_LOCAL_IN,
19 .priority = INT_MAX,
20 },
21 {
22 .hook = ipvlan_nf_input,
23 .pf = NFPROTO_IPV6,
24 .hooknum = NF_INET_LOCAL_IN,
25 .priority = INT_MAX,
26 },
27};
28
29static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
30 .l3mdev_l3_rcv = ipvlan_l3_rcv,
31};
32
12static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) 33static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
13{ 34{
14 ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; 35 ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
15} 36}
16 37
17static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) 38static int ipvlan_register_nf_hook(void)
39{
40 int err = 0;
41
42 if (!ipvl_nf_hook_refcnt) {
43 err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
44 if (!err)
45 ipvl_nf_hook_refcnt = 1;
46 } else {
47 ipvl_nf_hook_refcnt++;
48 }
49
50 return err;
51}
52
53static void ipvlan_unregister_nf_hook(void)
54{
55 WARN_ON(!ipvl_nf_hook_refcnt);
56
57 ipvl_nf_hook_refcnt--;
58 if (!ipvl_nf_hook_refcnt)
59 _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
60}
61
62static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
18{ 63{
19 struct ipvl_dev *ipvlan; 64 struct ipvl_dev *ipvlan;
65 struct net_device *mdev = port->dev;
66 int err = 0;
20 67
68 ASSERT_RTNL();
21 if (port->mode != nval) { 69 if (port->mode != nval) {
70 if (nval == IPVLAN_MODE_L3S) {
71 /* New mode is L3S */
72 err = ipvlan_register_nf_hook();
73 if (!err) {
74 mdev->l3mdev_ops = &ipvl_l3mdev_ops;
75 mdev->priv_flags |= IFF_L3MDEV_MASTER;
76 } else
77 return err;
78 } else if (port->mode == IPVLAN_MODE_L3S) {
79 /* Old mode was L3S */
80 mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
81 ipvlan_unregister_nf_hook();
82 mdev->l3mdev_ops = NULL;
83 }
22 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 84 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
23 if (nval == IPVLAN_MODE_L3) 85 if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
24 ipvlan->dev->flags |= IFF_NOARP; 86 ipvlan->dev->flags |= IFF_NOARP;
25 else 87 else
26 ipvlan->dev->flags &= ~IFF_NOARP; 88 ipvlan->dev->flags &= ~IFF_NOARP;
27 } 89 }
28 port->mode = nval; 90 port->mode = nval;
29 } 91 }
92 return err;
30} 93}
31 94
32static int ipvlan_port_create(struct net_device *dev) 95static int ipvlan_port_create(struct net_device *dev)
@@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
74 struct ipvl_port *port = ipvlan_port_get_rtnl(dev); 137 struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
75 138
76 dev->priv_flags &= ~IFF_IPVLAN_MASTER; 139 dev->priv_flags &= ~IFF_IPVLAN_MASTER;
140 if (port->mode == IPVLAN_MODE_L3S) {
141 dev->priv_flags &= ~IFF_L3MDEV_MASTER;
142 ipvlan_unregister_nf_hook();
143 dev->l3mdev_ops = NULL;
144 }
77 netdev_rx_handler_unregister(dev); 145 netdev_rx_handler_unregister(dev);
78 cancel_work_sync(&port->wq); 146 cancel_work_sync(&port->wq);
79 __skb_queue_purge(&port->backlog); 147 __skb_queue_purge(&port->backlog);
@@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev)
132 struct net_device *phy_dev = ipvlan->phy_dev; 200 struct net_device *phy_dev = ipvlan->phy_dev;
133 struct ipvl_addr *addr; 201 struct ipvl_addr *addr;
134 202
135 if (ipvlan->port->mode == IPVLAN_MODE_L3) 203 if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
204 ipvlan->port->mode == IPVLAN_MODE_L3S)
136 dev->flags |= IFF_NOARP; 205 dev->flags |= IFF_NOARP;
137 else 206 else
138 dev->flags &= ~IFF_NOARP; 207 dev->flags &= ~IFF_NOARP;
@@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev,
372{ 441{
373 struct ipvl_dev *ipvlan = netdev_priv(dev); 442 struct ipvl_dev *ipvlan = netdev_priv(dev);
374 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 443 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
444 int err = 0;
375 445
376 if (data && data[IFLA_IPVLAN_MODE]) { 446 if (data && data[IFLA_IPVLAN_MODE]) {
377 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 447 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
378 448
379 ipvlan_set_port_mode(port, nmode); 449 err = ipvlan_set_port_mode(port, nmode);
380 } 450 }
381 return 0; 451 return err;
382} 452}
383 453
384static size_t ipvlan_nl_getsize(const struct net_device *dev) 454static size_t ipvlan_nl_getsize(const struct net_device *dev)
@@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
473 unregister_netdevice(dev); 543 unregister_netdevice(dev);
474 return err; 544 return err;
475 } 545 }
546 err = ipvlan_set_port_mode(port, mode);
547 if (err) {
548 unregister_netdevice(dev);
549 return err;
550 }
476 551
477 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); 552 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
478 ipvlan_set_port_mode(port, mode);
479
480 netif_stacked_transfer_operstate(phy_dev, dev); 553 netif_stacked_transfer_operstate(phy_dev, dev);
481 return 0; 554 return 0;
482} 555}