aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ipvlan.txt7
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/ipvlan/ipvlan.h6
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c94
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c87
-rw-r--r--include/uapi/linux/if_link.h1
6 files changed, 188 insertions, 8 deletions
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index 14422f8fcdc4..24196cef7c91 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
22 There are no module parameters for this driver and it can be configured 22 There are no module parameters for this driver and it can be configured
23using IProute2/ip utility. 23using IProute2/ip utility.
24 24
25 ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 } 25 ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
26 26
27 e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 27 e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
28 28
@@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be
48used before packets are queued on the outbound device. In this mode the slaves 48used before packets are queued on the outbound device. In this mode the slaves
49will not receive nor can send multicast / broadcast traffic. 49will not receive nor can send multicast / broadcast traffic.
50 50
514.3 L3S mode:
52 This is very similar to the L3 mode except that iptables (conn-tracking)
53works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
54performance but that shouldn't matter since you are choosing this mode over plain-L3
55mode to make conn-tracking work.
51 56
525. What to choose (macvlan vs. ipvlan)? 575. What to choose (macvlan vs. ipvlan)?
53 These two devices are very similar in many regards and the specific use 58 These two devices are very similar in many regards and the specific use
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c5415b05ea9..8768a625350d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config IPVLAN
149 tristate "IP-VLAN support" 149 tristate "IP-VLAN support"
150 depends on INET 150 depends on INET
151 depends on IPV6 151 depends on IPV6
152 depends on NET_L3_MASTER_DEV
152 ---help--- 153 ---help---
153 This allows one to create virtual devices off of a main interface 154 This allows one to create virtual devices off of a main interface
154 and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) 155 and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 695a5dc9ace3..7e0732f5ea07 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -23,11 +23,13 @@
23#include <linux/if_vlan.h> 23#include <linux/if_vlan.h>
24#include <linux/ip.h> 24#include <linux/ip.h>
25#include <linux/inetdevice.h> 25#include <linux/inetdevice.h>
26#include <linux/netfilter.h>
26#include <net/ip.h> 27#include <net/ip.h>
27#include <net/ip6_route.h> 28#include <net/ip6_route.h>
28#include <net/rtnetlink.h> 29#include <net/rtnetlink.h>
29#include <net/route.h> 30#include <net/route.h>
30#include <net/addrconf.h> 31#include <net/addrconf.h>
32#include <net/l3mdev.h>
31 33
32#define IPVLAN_DRV "ipvlan" 34#define IPVLAN_DRV "ipvlan"
33#define IPV_DRV_VER "0.1" 35#define IPV_DRV_VER "0.1"
@@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
124 const void *iaddr, bool is_v6); 126 const void *iaddr, bool is_v6);
125bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); 127bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
126void ipvlan_ht_addr_del(struct ipvl_addr *addr); 128void ipvlan_ht_addr_del(struct ipvl_addr *addr);
129struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
130 u16 proto);
131unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
132 const struct nf_hook_state *state);
127#endif /* __IPVLAN_H */ 133#endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b5f9511d819e..b4e990743e1d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
560 case IPVLAN_MODE_L2: 560 case IPVLAN_MODE_L2:
561 return ipvlan_xmit_mode_l2(skb, dev); 561 return ipvlan_xmit_mode_l2(skb, dev);
562 case IPVLAN_MODE_L3: 562 case IPVLAN_MODE_L3:
563 case IPVLAN_MODE_L3S:
563 return ipvlan_xmit_mode_l3(skb, dev); 564 return ipvlan_xmit_mode_l3(skb, dev);
564 } 565 }
565 566
@@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
664 return ipvlan_handle_mode_l2(pskb, port); 665 return ipvlan_handle_mode_l2(pskb, port);
665 case IPVLAN_MODE_L3: 666 case IPVLAN_MODE_L3:
666 return ipvlan_handle_mode_l3(pskb, port); 667 return ipvlan_handle_mode_l3(pskb, port);
668 case IPVLAN_MODE_L3S:
669 return RX_HANDLER_PASS;
667 } 670 }
668 671
669 /* Should not reach here */ 672 /* Should not reach here */
@@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
672 kfree_skb(skb); 675 kfree_skb(skb);
673 return RX_HANDLER_CONSUMED; 676 return RX_HANDLER_CONSUMED;
674} 677}
678
679static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
680 struct net_device *dev)
681{
682 struct ipvl_addr *addr = NULL;
683 struct ipvl_port *port;
684 void *lyr3h;
685 int addr_type;
686
687 if (!dev || !netif_is_ipvlan_port(dev))
688 goto out;
689
690 port = ipvlan_port_get_rcu(dev);
691 if (!port || port->mode != IPVLAN_MODE_L3S)
692 goto out;
693
694 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
695 if (!lyr3h)
696 goto out;
697
698 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
699out:
700 return addr;
701}
702
703struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
704 u16 proto)
705{
706 struct ipvl_addr *addr;
707 struct net_device *sdev;
708
709 addr = ipvlan_skb_to_addr(skb, dev);
710 if (!addr)
711 goto out;
712
713 sdev = addr->master->dev;
714 switch (proto) {
715 case AF_INET:
716 {
717 int err;
718 struct iphdr *ip4h = ip_hdr(skb);
719
720 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
721 ip4h->tos, sdev);
722 if (unlikely(err))
723 goto out;
724 break;
725 }
726 case AF_INET6:
727 {
728 struct dst_entry *dst;
729 struct ipv6hdr *ip6h = ipv6_hdr(skb);
730 int flags = RT6_LOOKUP_F_HAS_SADDR;
731 struct flowi6 fl6 = {
732 .flowi6_iif = sdev->ifindex,
733 .daddr = ip6h->daddr,
734 .saddr = ip6h->saddr,
735 .flowlabel = ip6_flowinfo(ip6h),
736 .flowi6_mark = skb->mark,
737 .flowi6_proto = ip6h->nexthdr,
738 };
739
740 skb_dst_drop(skb);
741 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
742 skb_dst_set(skb, dst);
743 break;
744 }
745 default:
746 break;
747 }
748
749out:
750 return skb;
751}
752
753unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
754 const struct nf_hook_state *state)
755{
756 struct ipvl_addr *addr;
757 unsigned int len;
758
759 addr = ipvlan_skb_to_addr(skb, skb->dev);
760 if (!addr)
761 goto out;
762
763 skb->dev = addr->master->dev;
764 len = skb->len + ETH_HLEN;
765 ipvlan_count_rx(addr->master, len, true, false);
766out:
767 return NF_ACCEPT;
768}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 18b4e8c7f68a..f442eb366863 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -9,24 +9,87 @@
9 9
10#include "ipvlan.h" 10#include "ipvlan.h"
11 11
12static u32 ipvl_nf_hook_refcnt = 0;
13
14static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
15 {
16 .hook = ipvlan_nf_input,
17 .pf = NFPROTO_IPV4,
18 .hooknum = NF_INET_LOCAL_IN,
19 .priority = INT_MAX,
20 },
21 {
22 .hook = ipvlan_nf_input,
23 .pf = NFPROTO_IPV6,
24 .hooknum = NF_INET_LOCAL_IN,
25 .priority = INT_MAX,
26 },
27};
28
29static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
30 .l3mdev_l3_rcv = ipvlan_l3_rcv,
31};
32
12static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) 33static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
13{ 34{
14 ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; 35 ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
15} 36}
16 37
17static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) 38static int ipvlan_register_nf_hook(void)
39{
40 int err = 0;
41
42 if (!ipvl_nf_hook_refcnt) {
43 err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
44 if (!err)
45 ipvl_nf_hook_refcnt = 1;
46 } else {
47 ipvl_nf_hook_refcnt++;
48 }
49
50 return err;
51}
52
53static void ipvlan_unregister_nf_hook(void)
54{
55 WARN_ON(!ipvl_nf_hook_refcnt);
56
57 ipvl_nf_hook_refcnt--;
58 if (!ipvl_nf_hook_refcnt)
59 _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
60}
61
62static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
18{ 63{
19 struct ipvl_dev *ipvlan; 64 struct ipvl_dev *ipvlan;
65 struct net_device *mdev = port->dev;
66 int err = 0;
20 67
68 ASSERT_RTNL();
21 if (port->mode != nval) { 69 if (port->mode != nval) {
70 if (nval == IPVLAN_MODE_L3S) {
71 /* New mode is L3S */
72 err = ipvlan_register_nf_hook();
73 if (!err) {
74 mdev->l3mdev_ops = &ipvl_l3mdev_ops;
75 mdev->priv_flags |= IFF_L3MDEV_MASTER;
76 } else
77 return err;
78 } else if (port->mode == IPVLAN_MODE_L3S) {
79 /* Old mode was L3S */
80 mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
81 ipvlan_unregister_nf_hook();
82 mdev->l3mdev_ops = NULL;
83 }
22 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 84 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
23 if (nval == IPVLAN_MODE_L3) 85 if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
24 ipvlan->dev->flags |= IFF_NOARP; 86 ipvlan->dev->flags |= IFF_NOARP;
25 else 87 else
26 ipvlan->dev->flags &= ~IFF_NOARP; 88 ipvlan->dev->flags &= ~IFF_NOARP;
27 } 89 }
28 port->mode = nval; 90 port->mode = nval;
29 } 91 }
92 return err;
30} 93}
31 94
32static int ipvlan_port_create(struct net_device *dev) 95static int ipvlan_port_create(struct net_device *dev)
@@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
74 struct ipvl_port *port = ipvlan_port_get_rtnl(dev); 137 struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
75 138
76 dev->priv_flags &= ~IFF_IPVLAN_MASTER; 139 dev->priv_flags &= ~IFF_IPVLAN_MASTER;
140 if (port->mode == IPVLAN_MODE_L3S) {
141 dev->priv_flags &= ~IFF_L3MDEV_MASTER;
142 ipvlan_unregister_nf_hook();
143 dev->l3mdev_ops = NULL;
144 }
77 netdev_rx_handler_unregister(dev); 145 netdev_rx_handler_unregister(dev);
78 cancel_work_sync(&port->wq); 146 cancel_work_sync(&port->wq);
79 __skb_queue_purge(&port->backlog); 147 __skb_queue_purge(&port->backlog);
@@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev)
132 struct net_device *phy_dev = ipvlan->phy_dev; 200 struct net_device *phy_dev = ipvlan->phy_dev;
133 struct ipvl_addr *addr; 201 struct ipvl_addr *addr;
134 202
135 if (ipvlan->port->mode == IPVLAN_MODE_L3) 203 if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
204 ipvlan->port->mode == IPVLAN_MODE_L3S)
136 dev->flags |= IFF_NOARP; 205 dev->flags |= IFF_NOARP;
137 else 206 else
138 dev->flags &= ~IFF_NOARP; 207 dev->flags &= ~IFF_NOARP;
@@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev,
372{ 441{
373 struct ipvl_dev *ipvlan = netdev_priv(dev); 442 struct ipvl_dev *ipvlan = netdev_priv(dev);
374 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 443 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
444 int err = 0;
375 445
376 if (data && data[IFLA_IPVLAN_MODE]) { 446 if (data && data[IFLA_IPVLAN_MODE]) {
377 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 447 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
378 448
379 ipvlan_set_port_mode(port, nmode); 449 err = ipvlan_set_port_mode(port, nmode);
380 } 450 }
381 return 0; 451 return err;
382} 452}
383 453
384static size_t ipvlan_nl_getsize(const struct net_device *dev) 454static size_t ipvlan_nl_getsize(const struct net_device *dev)
@@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
473 unregister_netdevice(dev); 543 unregister_netdevice(dev);
474 return err; 544 return err;
475 } 545 }
546 err = ipvlan_set_port_mode(port, mode);
547 if (err) {
548 unregister_netdevice(dev);
549 return err;
550 }
476 551
477 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); 552 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
478 ipvlan_set_port_mode(port, mode);
479
480 netif_stacked_transfer_operstate(phy_dev, dev); 553 netif_stacked_transfer_operstate(phy_dev, dev);
481 return 0; 554 return 0;
482} 555}
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2351776a724f..7ec9e99d5491 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -464,6 +464,7 @@ enum {
464enum ipvlan_mode { 464enum ipvlan_mode {
465 IPVLAN_MODE_L2 = 0, 465 IPVLAN_MODE_L2 = 0,
466 IPVLAN_MODE_L3, 466 IPVLAN_MODE_L3,
467 IPVLAN_MODE_L3S,
467 IPVLAN_MODE_MAX 468 IPVLAN_MODE_MAX
468}; 469};
469 470