diff options
-rw-r--r-- | Documentation/networking/ipvlan.txt | 7 | ||||
-rw-r--r-- | drivers/net/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/ipvlan/ipvlan.h | 6 | ||||
-rw-r--r-- | drivers/net/ipvlan/ipvlan_core.c | 94 | ||||
-rw-r--r-- | drivers/net/ipvlan/ipvlan_main.c | 87 | ||||
-rw-r--r-- | include/uapi/linux/if_link.h | 1 |
6 files changed, 188 insertions, 8 deletions
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 14422f8fcdc4..24196cef7c91 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt | |||
@@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module | |||
22 | There are no module parameters for this driver and it can be configured | 22 | There are no module parameters for this driver and it can be configured |
23 | using IProute2/ip utility. | 23 | using IProute2/ip utility. |
24 | 24 | ||
25 | ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 } | 25 | ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s } |
26 | 26 | ||
27 | e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 | 27 | e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 |
28 | 28 | ||
@@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be | |||
48 | used before packets are queued on the outbound device. In this mode the slaves | 48 | used before packets are queued on the outbound device. In this mode the slaves |
49 | will not receive nor can send multicast / broadcast traffic. | 49 | will not receive nor can send multicast / broadcast traffic. |
50 | 50 | ||
51 | 4.3 L3S mode: | ||
52 | This is very similar to the L3 mode except that iptables (conn-tracking) | ||
53 | works in this mode and hence it is L3-symmetric (L3s). This will have slightly less | ||
54 | performance but that shouldn't matter since you are choosing this mode over plain-L3 | ||
55 | mode to make conn-tracking work. | ||
51 | 56 | ||
52 | 5. What to choose (macvlan vs. ipvlan)? | 57 | 5. What to choose (macvlan vs. ipvlan)? |
53 | These two devices are very similar in many regards and the specific use | 58 | These two devices are very similar in many regards and the specific use |
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0c5415b05ea9..8768a625350d 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig | |||
@@ -149,6 +149,7 @@ config IPVLAN | |||
149 | tristate "IP-VLAN support" | 149 | tristate "IP-VLAN support" |
150 | depends on INET | 150 | depends on INET |
151 | depends on IPV6 | 151 | depends on IPV6 |
152 | depends on NET_L3_MASTER_DEV | ||
152 | ---help--- | 153 | ---help--- |
153 | This allows one to create virtual devices off of a main interface | 154 | This allows one to create virtual devices off of a main interface |
154 | and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) | 155 | and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) |
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 695a5dc9ace3..7e0732f5ea07 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h | |||
@@ -23,11 +23,13 @@ | |||
23 | #include <linux/if_vlan.h> | 23 | #include <linux/if_vlan.h> |
24 | #include <linux/ip.h> | 24 | #include <linux/ip.h> |
25 | #include <linux/inetdevice.h> | 25 | #include <linux/inetdevice.h> |
26 | #include <linux/netfilter.h> | ||
26 | #include <net/ip.h> | 27 | #include <net/ip.h> |
27 | #include <net/ip6_route.h> | 28 | #include <net/ip6_route.h> |
28 | #include <net/rtnetlink.h> | 29 | #include <net/rtnetlink.h> |
29 | #include <net/route.h> | 30 | #include <net/route.h> |
30 | #include <net/addrconf.h> | 31 | #include <net/addrconf.h> |
32 | #include <net/l3mdev.h> | ||
31 | 33 | ||
32 | #define IPVLAN_DRV "ipvlan" | 34 | #define IPVLAN_DRV "ipvlan" |
33 | #define IPV_DRV_VER "0.1" | 35 | #define IPV_DRV_VER "0.1" |
@@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, | |||
124 | const void *iaddr, bool is_v6); | 126 | const void *iaddr, bool is_v6); |
125 | bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); | 127 | bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); |
126 | void ipvlan_ht_addr_del(struct ipvl_addr *addr); | 128 | void ipvlan_ht_addr_del(struct ipvl_addr *addr); |
129 | struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, | ||
130 | u16 proto); | ||
131 | unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, | ||
132 | const struct nf_hook_state *state); | ||
127 | #endif /* __IPVLAN_H */ | 133 | #endif /* __IPVLAN_H */ |
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b5f9511d819e..b4e990743e1d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c | |||
@@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) | |||
560 | case IPVLAN_MODE_L2: | 560 | case IPVLAN_MODE_L2: |
561 | return ipvlan_xmit_mode_l2(skb, dev); | 561 | return ipvlan_xmit_mode_l2(skb, dev); |
562 | case IPVLAN_MODE_L3: | 562 | case IPVLAN_MODE_L3: |
563 | case IPVLAN_MODE_L3S: | ||
563 | return ipvlan_xmit_mode_l3(skb, dev); | 564 | return ipvlan_xmit_mode_l3(skb, dev); |
564 | } | 565 | } |
565 | 566 | ||
@@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) | |||
664 | return ipvlan_handle_mode_l2(pskb, port); | 665 | return ipvlan_handle_mode_l2(pskb, port); |
665 | case IPVLAN_MODE_L3: | 666 | case IPVLAN_MODE_L3: |
666 | return ipvlan_handle_mode_l3(pskb, port); | 667 | return ipvlan_handle_mode_l3(pskb, port); |
668 | case IPVLAN_MODE_L3S: | ||
669 | return RX_HANDLER_PASS; | ||
667 | } | 670 | } |
668 | 671 | ||
669 | /* Should not reach here */ | 672 | /* Should not reach here */ |
@@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) | |||
672 | kfree_skb(skb); | 675 | kfree_skb(skb); |
673 | return RX_HANDLER_CONSUMED; | 676 | return RX_HANDLER_CONSUMED; |
674 | } | 677 | } |
678 | |||
679 | static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, | ||
680 | struct net_device *dev) | ||
681 | { | ||
682 | struct ipvl_addr *addr = NULL; | ||
683 | struct ipvl_port *port; | ||
684 | void *lyr3h; | ||
685 | int addr_type; | ||
686 | |||
687 | if (!dev || !netif_is_ipvlan_port(dev)) | ||
688 | goto out; | ||
689 | |||
690 | port = ipvlan_port_get_rcu(dev); | ||
691 | if (!port || port->mode != IPVLAN_MODE_L3S) | ||
692 | goto out; | ||
693 | |||
694 | lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); | ||
695 | if (!lyr3h) | ||
696 | goto out; | ||
697 | |||
698 | addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); | ||
699 | out: | ||
700 | return addr; | ||
701 | } | ||
702 | |||
703 | struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, | ||
704 | u16 proto) | ||
705 | { | ||
706 | struct ipvl_addr *addr; | ||
707 | struct net_device *sdev; | ||
708 | |||
709 | addr = ipvlan_skb_to_addr(skb, dev); | ||
710 | if (!addr) | ||
711 | goto out; | ||
712 | |||
713 | sdev = addr->master->dev; | ||
714 | switch (proto) { | ||
715 | case AF_INET: | ||
716 | { | ||
717 | int err; | ||
718 | struct iphdr *ip4h = ip_hdr(skb); | ||
719 | |||
720 | err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, | ||
721 | ip4h->tos, sdev); | ||
722 | if (unlikely(err)) | ||
723 | goto out; | ||
724 | break; | ||
725 | } | ||
726 | case AF_INET6: | ||
727 | { | ||
728 | struct dst_entry *dst; | ||
729 | struct ipv6hdr *ip6h = ipv6_hdr(skb); | ||
730 | int flags = RT6_LOOKUP_F_HAS_SADDR; | ||
731 | struct flowi6 fl6 = { | ||
732 | .flowi6_iif = sdev->ifindex, | ||
733 | .daddr = ip6h->daddr, | ||
734 | .saddr = ip6h->saddr, | ||
735 | .flowlabel = ip6_flowinfo(ip6h), | ||
736 | .flowi6_mark = skb->mark, | ||
737 | .flowi6_proto = ip6h->nexthdr, | ||
738 | }; | ||
739 | |||
740 | skb_dst_drop(skb); | ||
741 | dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); | ||
742 | skb_dst_set(skb, dst); | ||
743 | break; | ||
744 | } | ||
745 | default: | ||
746 | break; | ||
747 | } | ||
748 | |||
749 | out: | ||
750 | return skb; | ||
751 | } | ||
752 | |||
753 | unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, | ||
754 | const struct nf_hook_state *state) | ||
755 | { | ||
756 | struct ipvl_addr *addr; | ||
757 | unsigned int len; | ||
758 | |||
759 | addr = ipvlan_skb_to_addr(skb, skb->dev); | ||
760 | if (!addr) | ||
761 | goto out; | ||
762 | |||
763 | skb->dev = addr->master->dev; | ||
764 | len = skb->len + ETH_HLEN; | ||
765 | ipvlan_count_rx(addr->master, len, true, false); | ||
766 | out: | ||
767 | return NF_ACCEPT; | ||
768 | } | ||
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c7f68a..f442eb366863 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c | |||
@@ -9,24 +9,87 @@ | |||
9 | 9 | ||
10 | #include "ipvlan.h" | 10 | #include "ipvlan.h" |
11 | 11 | ||
12 | static u32 ipvl_nf_hook_refcnt = 0; | ||
13 | |||
14 | static struct nf_hook_ops ipvl_nfops[] __read_mostly = { | ||
15 | { | ||
16 | .hook = ipvlan_nf_input, | ||
17 | .pf = NFPROTO_IPV4, | ||
18 | .hooknum = NF_INET_LOCAL_IN, | ||
19 | .priority = INT_MAX, | ||
20 | }, | ||
21 | { | ||
22 | .hook = ipvlan_nf_input, | ||
23 | .pf = NFPROTO_IPV6, | ||
24 | .hooknum = NF_INET_LOCAL_IN, | ||
25 | .priority = INT_MAX, | ||
26 | }, | ||
27 | }; | ||
28 | |||
29 | static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { | ||
30 | .l3mdev_l3_rcv = ipvlan_l3_rcv, | ||
31 | }; | ||
32 | |||
12 | static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) | 33 | static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) |
13 | { | 34 | { |
14 | ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; | 35 | ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; |
15 | } | 36 | } |
16 | 37 | ||
17 | static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) | 38 | static int ipvlan_register_nf_hook(void) |
39 | { | ||
40 | int err = 0; | ||
41 | |||
42 | if (!ipvl_nf_hook_refcnt) { | ||
43 | err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); | ||
44 | if (!err) | ||
45 | ipvl_nf_hook_refcnt = 1; | ||
46 | } else { | ||
47 | ipvl_nf_hook_refcnt++; | ||
48 | } | ||
49 | |||
50 | return err; | ||
51 | } | ||
52 | |||
53 | static void ipvlan_unregister_nf_hook(void) | ||
54 | { | ||
55 | WARN_ON(!ipvl_nf_hook_refcnt); | ||
56 | |||
57 | ipvl_nf_hook_refcnt--; | ||
58 | if (!ipvl_nf_hook_refcnt) | ||
59 | _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); | ||
60 | } | ||
61 | |||
62 | static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) | ||
18 | { | 63 | { |
19 | struct ipvl_dev *ipvlan; | 64 | struct ipvl_dev *ipvlan; |
65 | struct net_device *mdev = port->dev; | ||
66 | int err = 0; | ||
20 | 67 | ||
68 | ASSERT_RTNL(); | ||
21 | if (port->mode != nval) { | 69 | if (port->mode != nval) { |
70 | if (nval == IPVLAN_MODE_L3S) { | ||
71 | /* New mode is L3S */ | ||
72 | err = ipvlan_register_nf_hook(); | ||
73 | if (!err) { | ||
74 | mdev->l3mdev_ops = &ipvl_l3mdev_ops; | ||
75 | mdev->priv_flags |= IFF_L3MDEV_MASTER; | ||
76 | } else | ||
77 | return err; | ||
78 | } else if (port->mode == IPVLAN_MODE_L3S) { | ||
79 | /* Old mode was L3S */ | ||
80 | mdev->priv_flags &= ~IFF_L3MDEV_MASTER; | ||
81 | ipvlan_unregister_nf_hook(); | ||
82 | mdev->l3mdev_ops = NULL; | ||
83 | } | ||
22 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) { | 84 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) { |
23 | if (nval == IPVLAN_MODE_L3) | 85 | if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) |
24 | ipvlan->dev->flags |= IFF_NOARP; | 86 | ipvlan->dev->flags |= IFF_NOARP; |
25 | else | 87 | else |
26 | ipvlan->dev->flags &= ~IFF_NOARP; | 88 | ipvlan->dev->flags &= ~IFF_NOARP; |
27 | } | 89 | } |
28 | port->mode = nval; | 90 | port->mode = nval; |
29 | } | 91 | } |
92 | return err; | ||
30 | } | 93 | } |
31 | 94 | ||
32 | static int ipvlan_port_create(struct net_device *dev) | 95 | static int ipvlan_port_create(struct net_device *dev) |
@@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) | |||
74 | struct ipvl_port *port = ipvlan_port_get_rtnl(dev); | 137 | struct ipvl_port *port = ipvlan_port_get_rtnl(dev); |
75 | 138 | ||
76 | dev->priv_flags &= ~IFF_IPVLAN_MASTER; | 139 | dev->priv_flags &= ~IFF_IPVLAN_MASTER; |
140 | if (port->mode == IPVLAN_MODE_L3S) { | ||
141 | dev->priv_flags &= ~IFF_L3MDEV_MASTER; | ||
142 | ipvlan_unregister_nf_hook(); | ||
143 | dev->l3mdev_ops = NULL; | ||
144 | } | ||
77 | netdev_rx_handler_unregister(dev); | 145 | netdev_rx_handler_unregister(dev); |
78 | cancel_work_sync(&port->wq); | 146 | cancel_work_sync(&port->wq); |
79 | __skb_queue_purge(&port->backlog); | 147 | __skb_queue_purge(&port->backlog); |
@@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) | |||
132 | struct net_device *phy_dev = ipvlan->phy_dev; | 200 | struct net_device *phy_dev = ipvlan->phy_dev; |
133 | struct ipvl_addr *addr; | 201 | struct ipvl_addr *addr; |
134 | 202 | ||
135 | if (ipvlan->port->mode == IPVLAN_MODE_L3) | 203 | if (ipvlan->port->mode == IPVLAN_MODE_L3 || |
204 | ipvlan->port->mode == IPVLAN_MODE_L3S) | ||
136 | dev->flags |= IFF_NOARP; | 205 | dev->flags |= IFF_NOARP; |
137 | else | 206 | else |
138 | dev->flags &= ~IFF_NOARP; | 207 | dev->flags &= ~IFF_NOARP; |
@@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, | |||
372 | { | 441 | { |
373 | struct ipvl_dev *ipvlan = netdev_priv(dev); | 442 | struct ipvl_dev *ipvlan = netdev_priv(dev); |
374 | struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); | 443 | struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); |
444 | int err = 0; | ||
375 | 445 | ||
376 | if (data && data[IFLA_IPVLAN_MODE]) { | 446 | if (data && data[IFLA_IPVLAN_MODE]) { |
377 | u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); | 447 | u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); |
378 | 448 | ||
379 | ipvlan_set_port_mode(port, nmode); | 449 | err = ipvlan_set_port_mode(port, nmode); |
380 | } | 450 | } |
381 | return 0; | 451 | return err; |
382 | } | 452 | } |
383 | 453 | ||
384 | static size_t ipvlan_nl_getsize(const struct net_device *dev) | 454 | static size_t ipvlan_nl_getsize(const struct net_device *dev) |
@@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, | |||
473 | unregister_netdevice(dev); | 543 | unregister_netdevice(dev); |
474 | return err; | 544 | return err; |
475 | } | 545 | } |
546 | err = ipvlan_set_port_mode(port, mode); | ||
547 | if (err) { | ||
548 | unregister_netdevice(dev); | ||
549 | return err; | ||
550 | } | ||
476 | 551 | ||
477 | list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); | 552 | list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); |
478 | ipvlan_set_port_mode(port, mode); | ||
479 | |||
480 | netif_stacked_transfer_operstate(phy_dev, dev); | 553 | netif_stacked_transfer_operstate(phy_dev, dev); |
481 | return 0; | 554 | return 0; |
482 | } | 555 | } |
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2351776a724f..7ec9e99d5491 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h | |||
@@ -464,6 +464,7 @@ enum { | |||
464 | enum ipvlan_mode { | 464 | enum ipvlan_mode { |
465 | IPVLAN_MODE_L2 = 0, | 465 | IPVLAN_MODE_L2 = 0, |
466 | IPVLAN_MODE_L3, | 466 | IPVLAN_MODE_L3, |
467 | IPVLAN_MODE_L3S, | ||
467 | IPVLAN_MODE_MAX | 468 | IPVLAN_MODE_MAX |
468 | }; | 469 | }; |
469 | 470 | ||