diff options
-rw-r--r-- | Documentation/networking/ipvlan.txt | 107 | ||||
-rw-r--r-- | drivers/net/Kconfig | 18 | ||||
-rw-r--r-- | drivers/net/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/ipvlan/Makefile | 7 | ||||
-rw-r--r-- | drivers/net/ipvlan/ipvlan.h | 130 | ||||
-rw-r--r-- | drivers/net/ipvlan/ipvlan_core.c | 607 | ||||
-rw-r--r-- | drivers/net/ipvlan/ipvlan_main.c | 789 | ||||
-rw-r--r-- | include/linux/netdevice.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/if_link.h | 15 |
9 files changed, 1678 insertions, 0 deletions
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt new file mode 100644 index 000000000000..cf996394e466 --- /dev/null +++ b/Documentation/networking/ipvlan.txt | |||
@@ -0,0 +1,107 @@ | |||
1 | |||
2 | IPVLAN Driver HOWTO | ||
3 | |||
4 | Initial Release: | ||
5 | Mahesh Bandewar <maheshb AT google.com> | ||
6 | |||
7 | 1. Introduction: | ||
8 | This is conceptually very similar to the macvlan driver with one major | ||
9 | exception of using L3 for mux-ing /demux-ing among slaves. This property makes | ||
10 | the master device share the L2 with it's slave devices. I have developed this | ||
11 | driver in conjuntion with network namespaces and not sure if there is use case | ||
12 | outside of it. | ||
13 | |||
14 | |||
15 | 2. Building and Installation: | ||
16 | In order to build the driver, please select the config item CONFIG_IPVLAN. | ||
17 | The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module | ||
18 | (CONFIG_IPVLAN=m). | ||
19 | |||
20 | |||
21 | 3. Configuration: | ||
22 | There are no module parameters for this driver and it can be configured | ||
23 | using IProute2/ip utility. | ||
24 | |||
25 | ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 } | ||
26 | |||
27 | e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 | ||
28 | |||
29 | |||
30 | 4. Operating modes: | ||
31 | IPvlan has two modes of operation - L2 and L3. For a given master device, | ||
32 | you can select one of these two modes and all slaves on that master will | ||
33 | operate in the same (selected) mode. The RX mode is almost identical except | ||
34 | that in L3 mode the slaves wont receive any multicast / broadcast traffic. | ||
35 | L3 mode is more restrictive since routing is controlled from the other (mostly) | ||
36 | default namespace. | ||
37 | |||
38 | 4.1 L2 mode: | ||
39 | In this mode TX processing happens on the stack instance attached to the | ||
40 | slave device and packets are switched and queued to the master device to send | ||
41 | out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) | ||
42 | as well. | ||
43 | |||
44 | 4.2 L3 mode: | ||
45 | In this mode TX processing upto L3 happens on the stack instance attached | ||
46 | to the slave device and packets are switched to the stack instance of the | ||
47 | master device for the L2 processing and routing from that instance will be | ||
48 | used before packets are queued on the outbound device. In this mode the slaves | ||
49 | will not receive nor can send multicast / broadcast traffic. | ||
50 | |||
51 | |||
52 | 5. What to choose (macvlan vs. ipvlan)? | ||
53 | These two devices are very similar in many regards and the specific use | ||
54 | case could very well define which device to choose. if one of the following | ||
55 | situations defines your use case then you can choose to use ipvlan - | ||
56 | (a) The Linux host that is connected to the external switch / router has | ||
57 | policy configured that allows only one mac per port. | ||
58 | (b) No of virtual devices created on a master exceed the mac capacity and | ||
59 | puts the NIC in promiscous mode and degraded performance is a concern. | ||
60 | (c) If the slave device is to be put into the hostile / untrusted network | ||
61 | namespace where L2 on the slave could be changed / misused. | ||
62 | |||
63 | |||
64 | 6. Example configuration: | ||
65 | |||
66 | +=============================================================+ | ||
67 | | Host: host1 | | ||
68 | | | | ||
69 | | +----------------------+ +----------------------+ | | ||
70 | | | NS:ns0 | | NS:ns1 | | | ||
71 | | | | | | | | ||
72 | | | | | | | | ||
73 | | | ipvl0 | | ipvl1 | | | ||
74 | | +----------#-----------+ +-----------#----------+ | | ||
75 | | # # | | ||
76 | | ################################ | | ||
77 | | # eth0 | | ||
78 | +==============================#==============================+ | ||
79 | |||
80 | |||
81 | (a) Create two network namespaces - ns0, ns1 | ||
82 | ip netns add ns0 | ||
83 | ip netns add ns1 | ||
84 | |||
85 | (b) Create two ipvlan slaves on eth0 (master device) | ||
86 | ip link add link eth0 ipvl0 type ipvlan mode l2 | ||
87 | ip link add link eth0 ipvl1 type ipvlan mode l2 | ||
88 | |||
89 | (c) Assign slaves to the respective network namespaces | ||
90 | ip link set dev ipvl0 netns ns0 | ||
91 | ip link set dev ipvl1 netns ns1 | ||
92 | |||
93 | (d) Now switch to the namespace (ns0 or ns1) to configure the slave devices | ||
94 | - For ns0 | ||
95 | (1) ip netns exec ns0 bash | ||
96 | (2) ip link set dev ipvl0 up | ||
97 | (3) ip link set dev lo up | ||
98 | (4) ip -4 addr add 127.0.0.1 dev lo | ||
99 | (5) ip -4 addr add $IPADDR dev ipvl0 | ||
100 | (6) ip -4 route add default via $ROUTER dev ipvl0 | ||
101 | - For ns1 | ||
102 | (1) ip netns exec ns1 bash | ||
103 | (2) ip link set dev ipvl1 up | ||
104 | (3) ip link set dev lo up | ||
105 | (4) ip -4 addr add 127.0.0.1 dev lo | ||
106 | (5) ip -4 addr add $IPADDR dev ipvl1 | ||
107 | (6) ip -4 route add default via $ROUTER dev ipvl1 | ||
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f9009be3f307..b6d64f546574 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig | |||
@@ -145,6 +145,24 @@ config MACVTAP | |||
145 | To compile this driver as a module, choose M here: the module | 145 | To compile this driver as a module, choose M here: the module |
146 | will be called macvtap. | 146 | will be called macvtap. |
147 | 147 | ||
148 | |||
149 | config IPVLAN | ||
150 | tristate "IP-VLAN support" | ||
151 | ---help--- | ||
152 | This allows one to create virtual devices off of a main interface | ||
153 | and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) | ||
154 | on packets. All interfaces (including the main interface) share L2 | ||
155 | making it transparent to the connected L2 switch. | ||
156 | |||
157 | Ipvlan devices can be added using the "ip" command from the | ||
158 | iproute2 package starting with the iproute2-X.Y.ZZ release: | ||
159 | |||
160 | "ip link add link <main-dev> [ NAME ] type ipvlan" | ||
161 | |||
162 | To compile this driver as a module, choose M here: the module | ||
163 | will be called ipvlan. | ||
164 | |||
165 | |||
148 | config VXLAN | 166 | config VXLAN |
149 | tristate "Virtual eXtensible Local Area Network (VXLAN)" | 167 | tristate "Virtual eXtensible Local Area Network (VXLAN)" |
150 | depends on INET | 168 | depends on INET |
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 61aefdd1e173..e25fdd7d905e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile | |||
@@ -6,6 +6,7 @@ | |||
6 | # Networking Core Drivers | 6 | # Networking Core Drivers |
7 | # | 7 | # |
8 | obj-$(CONFIG_BONDING) += bonding/ | 8 | obj-$(CONFIG_BONDING) += bonding/ |
9 | obj-$(CONFIG_IPVLAN) += ipvlan/ | ||
9 | obj-$(CONFIG_DUMMY) += dummy.o | 10 | obj-$(CONFIG_DUMMY) += dummy.o |
10 | obj-$(CONFIG_EQUALIZER) += eql.o | 11 | obj-$(CONFIG_EQUALIZER) += eql.o |
11 | obj-$(CONFIG_IFB) += ifb.o | 12 | obj-$(CONFIG_IFB) += ifb.o |
diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile new file mode 100644 index 000000000000..df79910192d6 --- /dev/null +++ b/drivers/net/ipvlan/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # Makefile for the Ethernet Ipvlan driver | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_IPVLAN) += ipvlan.o | ||
6 | |||
7 | ipvlan-objs := ipvlan_core.o ipvlan_main.o | ||
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h new file mode 100644 index 000000000000..ab3e7614ed71 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation; either version 2 of | ||
7 | * the License, or (at your option) any later version. | ||
8 | * | ||
9 | */ | ||
10 | #ifndef __IPVLAN_H | ||
11 | #define __IPVLAN_H | ||
12 | |||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/rculist.h> | ||
18 | #include <linux/notifier.h> | ||
19 | #include <linux/netdevice.h> | ||
20 | #include <linux/etherdevice.h> | ||
21 | #include <linux/if_arp.h> | ||
22 | #include <linux/if_link.h> | ||
23 | #include <linux/if_vlan.h> | ||
24 | #include <linux/ip.h> | ||
25 | #include <linux/inetdevice.h> | ||
26 | #include <net/rtnetlink.h> | ||
27 | #include <net/gre.h> | ||
28 | #include <net/route.h> | ||
29 | #include <net/addrconf.h> | ||
30 | |||
31 | #define IPVLAN_DRV "ipvlan" | ||
32 | #define IPV_DRV_VER "0.1" | ||
33 | |||
34 | #define IPVLAN_HASH_SIZE (1 << BITS_PER_BYTE) | ||
35 | #define IPVLAN_HASH_MASK (IPVLAN_HASH_SIZE - 1) | ||
36 | |||
37 | #define IPVLAN_MAC_FILTER_BITS 8 | ||
38 | #define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) | ||
39 | #define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) | ||
40 | |||
41 | typedef enum { | ||
42 | IPVL_IPV6 = 0, | ||
43 | IPVL_ICMPV6, | ||
44 | IPVL_IPV4, | ||
45 | IPVL_ARP, | ||
46 | } ipvl_hdr_type; | ||
47 | |||
48 | struct ipvl_pcpu_stats { | ||
49 | u64 rx_pkts; | ||
50 | u64 rx_bytes; | ||
51 | u64 rx_mcast; | ||
52 | u64 tx_pkts; | ||
53 | u64 tx_bytes; | ||
54 | struct u64_stats_sync syncp; | ||
55 | u32 rx_errs; | ||
56 | u32 tx_drps; | ||
57 | }; | ||
58 | |||
59 | struct ipvl_port; | ||
60 | |||
61 | struct ipvl_dev { | ||
62 | struct net_device *dev; | ||
63 | struct list_head pnode; | ||
64 | struct ipvl_port *port; | ||
65 | struct net_device *phy_dev; | ||
66 | struct list_head addrs; | ||
67 | int ipv4cnt; | ||
68 | int ipv6cnt; | ||
69 | struct ipvl_pcpu_stats *pcpu_stats; | ||
70 | DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); | ||
71 | netdev_features_t sfeatures; | ||
72 | u32 msg_enable; | ||
73 | u16 mtu_adj; | ||
74 | }; | ||
75 | |||
76 | struct ipvl_addr { | ||
77 | struct ipvl_dev *master; /* Back pointer to master */ | ||
78 | union { | ||
79 | struct in6_addr ip6; /* IPv6 address on logical interface */ | ||
80 | struct in_addr ip4; /* IPv4 address on logical interface */ | ||
81 | } ipu; | ||
82 | #define ip6addr ipu.ip6 | ||
83 | #define ip4addr ipu.ip4 | ||
84 | struct hlist_node hlnode; /* Hash-table linkage */ | ||
85 | struct list_head anode; /* logical-interface linkage */ | ||
86 | struct rcu_head rcu; | ||
87 | ipvl_hdr_type atype; | ||
88 | }; | ||
89 | |||
90 | struct ipvl_port { | ||
91 | struct net_device *dev; | ||
92 | struct hlist_head hlhead[IPVLAN_HASH_SIZE]; | ||
93 | struct list_head ipvlans; | ||
94 | struct rcu_head rcu; | ||
95 | int count; | ||
96 | u16 mode; | ||
97 | }; | ||
98 | |||
99 | static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) | ||
100 | { | ||
101 | return rcu_dereference(d->rx_handler_data); | ||
102 | } | ||
103 | |||
104 | static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) | ||
105 | { | ||
106 | return rtnl_dereference(d->rx_handler_data); | ||
107 | } | ||
108 | |||
109 | static inline bool ipvlan_dev_master(struct net_device *d) | ||
110 | { | ||
111 | return d->priv_flags & IFF_IPVLAN_MASTER; | ||
112 | } | ||
113 | |||
114 | static inline bool ipvlan_dev_slave(struct net_device *d) | ||
115 | { | ||
116 | return d->priv_flags & IFF_IPVLAN_SLAVE; | ||
117 | } | ||
118 | |||
119 | void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev); | ||
120 | void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); | ||
121 | void ipvlan_init_secret(void); | ||
122 | unsigned int ipvlan_mac_hash(const unsigned char *addr); | ||
123 | rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); | ||
124 | int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); | ||
125 | void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); | ||
126 | bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6); | ||
127 | struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, | ||
128 | const void *iaddr, bool is_v6); | ||
129 | void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); | ||
130 | #endif /* __IPVLAN_H */ | ||
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c new file mode 100644 index 000000000000..a14d87783245 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_core.c | |||
@@ -0,0 +1,607 @@ | |||
1 | /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of the GNU General Public License as | ||
5 | * published by the Free Software Foundation; either version 2 of | ||
6 | * the License, or (at your option) any later version. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include "ipvlan.h" | ||
11 | |||
12 | static u32 ipvlan_jhash_secret; | ||
13 | |||
14 | void ipvlan_init_secret(void) | ||
15 | { | ||
16 | net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); | ||
17 | } | ||
18 | |||
19 | static void ipvlan_count_rx(const struct ipvl_dev *ipvlan, | ||
20 | unsigned int len, bool success, bool mcast) | ||
21 | { | ||
22 | if (!ipvlan) | ||
23 | return; | ||
24 | |||
25 | if (likely(success)) { | ||
26 | struct ipvl_pcpu_stats *pcptr; | ||
27 | |||
28 | pcptr = this_cpu_ptr(ipvlan->pcpu_stats); | ||
29 | u64_stats_update_begin(&pcptr->syncp); | ||
30 | pcptr->rx_pkts++; | ||
31 | pcptr->rx_bytes += len; | ||
32 | if (mcast) | ||
33 | pcptr->rx_mcast++; | ||
34 | u64_stats_update_end(&pcptr->syncp); | ||
35 | } else { | ||
36 | this_cpu_inc(ipvlan->pcpu_stats->rx_errs); | ||
37 | } | ||
38 | } | ||
39 | |||
40 | static u8 ipvlan_get_v6_hash(const void *iaddr) | ||
41 | { | ||
42 | const struct in6_addr *ip6_addr = iaddr; | ||
43 | |||
44 | return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & | ||
45 | IPVLAN_HASH_MASK; | ||
46 | } | ||
47 | |||
48 | static u8 ipvlan_get_v4_hash(const void *iaddr) | ||
49 | { | ||
50 | const struct in_addr *ip4_addr = iaddr; | ||
51 | |||
52 | return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & | ||
53 | IPVLAN_HASH_MASK; | ||
54 | } | ||
55 | |||
56 | struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, | ||
57 | const void *iaddr, bool is_v6) | ||
58 | { | ||
59 | struct ipvl_addr *addr; | ||
60 | u8 hash; | ||
61 | |||
62 | hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : | ||
63 | ipvlan_get_v4_hash(iaddr); | ||
64 | hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { | ||
65 | if (is_v6 && addr->atype == IPVL_IPV6 && | ||
66 | ipv6_addr_equal(&addr->ip6addr, iaddr)) | ||
67 | return addr; | ||
68 | else if (!is_v6 && addr->atype == IPVL_IPV4 && | ||
69 | addr->ip4addr.s_addr == | ||
70 | ((struct in_addr *)iaddr)->s_addr) | ||
71 | return addr; | ||
72 | } | ||
73 | return NULL; | ||
74 | } | ||
75 | |||
76 | void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) | ||
77 | { | ||
78 | struct ipvl_port *port = ipvlan->port; | ||
79 | u8 hash; | ||
80 | |||
81 | hash = (addr->atype == IPVL_IPV6) ? | ||
82 | ipvlan_get_v6_hash(&addr->ip6addr) : | ||
83 | ipvlan_get_v4_hash(&addr->ip4addr); | ||
84 | hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); | ||
85 | } | ||
86 | |||
87 | void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) | ||
88 | { | ||
89 | hlist_del_rcu(&addr->hlnode); | ||
90 | if (sync) | ||
91 | synchronize_rcu(); | ||
92 | } | ||
93 | |||
94 | bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) | ||
95 | { | ||
96 | struct ipvl_port *port = ipvlan->port; | ||
97 | struct ipvl_addr *addr; | ||
98 | |||
99 | list_for_each_entry(addr, &ipvlan->addrs, anode) { | ||
100 | if ((is_v6 && addr->atype == IPVL_IPV6 && | ||
101 | ipv6_addr_equal(&addr->ip6addr, iaddr)) || | ||
102 | (!is_v6 && addr->atype == IPVL_IPV4 && | ||
103 | addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) | ||
104 | return true; | ||
105 | } | ||
106 | |||
107 | if (ipvlan_ht_addr_lookup(port, iaddr, is_v6)) | ||
108 | return true; | ||
109 | |||
110 | return false; | ||
111 | } | ||
112 | |||
113 | static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type) | ||
114 | { | ||
115 | void *lyr3h = NULL; | ||
116 | |||
117 | switch (skb->protocol) { | ||
118 | case htons(ETH_P_ARP): { | ||
119 | struct arphdr *arph; | ||
120 | |||
121 | if (unlikely(!pskb_may_pull(skb, sizeof(*arph)))) | ||
122 | return NULL; | ||
123 | |||
124 | arph = arp_hdr(skb); | ||
125 | *type = IPVL_ARP; | ||
126 | lyr3h = arph; | ||
127 | break; | ||
128 | } | ||
129 | case htons(ETH_P_IP): { | ||
130 | u32 pktlen; | ||
131 | struct iphdr *ip4h; | ||
132 | |||
133 | if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) | ||
134 | return NULL; | ||
135 | |||
136 | ip4h = ip_hdr(skb); | ||
137 | pktlen = ntohs(ip4h->tot_len); | ||
138 | if (ip4h->ihl < 5 || ip4h->version != 4) | ||
139 | return NULL; | ||
140 | if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) | ||
141 | return NULL; | ||
142 | |||
143 | *type = IPVL_IPV4; | ||
144 | lyr3h = ip4h; | ||
145 | break; | ||
146 | } | ||
147 | case htons(ETH_P_IPV6): { | ||
148 | struct ipv6hdr *ip6h; | ||
149 | |||
150 | if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) | ||
151 | return NULL; | ||
152 | |||
153 | ip6h = ipv6_hdr(skb); | ||
154 | if (ip6h->version != 6) | ||
155 | return NULL; | ||
156 | |||
157 | *type = IPVL_IPV6; | ||
158 | lyr3h = ip6h; | ||
159 | /* Only Neighbour Solicitation pkts need different treatment */ | ||
160 | if (ipv6_addr_any(&ip6h->saddr) && | ||
161 | ip6h->nexthdr == NEXTHDR_ICMP) { | ||
162 | *type = IPVL_ICMPV6; | ||
163 | lyr3h = ip6h + 1; | ||
164 | } | ||
165 | break; | ||
166 | } | ||
167 | default: | ||
168 | return NULL; | ||
169 | } | ||
170 | |||
171 | return lyr3h; | ||
172 | } | ||
173 | |||
174 | unsigned int ipvlan_mac_hash(const unsigned char *addr) | ||
175 | { | ||
176 | u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), | ||
177 | ipvlan_jhash_secret); | ||
178 | |||
179 | return hash & IPVLAN_MAC_FILTER_MASK; | ||
180 | } | ||
181 | |||
182 | static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, | ||
183 | const struct ipvl_dev *in_dev, bool local) | ||
184 | { | ||
185 | struct ethhdr *eth = eth_hdr(skb); | ||
186 | struct ipvl_dev *ipvlan; | ||
187 | struct sk_buff *nskb; | ||
188 | unsigned int len; | ||
189 | unsigned int mac_hash; | ||
190 | int ret; | ||
191 | |||
192 | if (skb->protocol == htons(ETH_P_PAUSE)) | ||
193 | return; | ||
194 | |||
195 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) { | ||
196 | if (local && (ipvlan == in_dev)) | ||
197 | continue; | ||
198 | |||
199 | mac_hash = ipvlan_mac_hash(eth->h_dest); | ||
200 | if (!test_bit(mac_hash, ipvlan->mac_filters)) | ||
201 | continue; | ||
202 | |||
203 | ret = NET_RX_DROP; | ||
204 | len = skb->len + ETH_HLEN; | ||
205 | nskb = skb_clone(skb, GFP_ATOMIC); | ||
206 | if (!nskb) | ||
207 | goto mcast_acct; | ||
208 | |||
209 | if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast)) | ||
210 | nskb->pkt_type = PACKET_BROADCAST; | ||
211 | else | ||
212 | nskb->pkt_type = PACKET_MULTICAST; | ||
213 | |||
214 | nskb->dev = ipvlan->dev; | ||
215 | if (local) | ||
216 | ret = dev_forward_skb(ipvlan->dev, nskb); | ||
217 | else | ||
218 | ret = netif_rx(nskb); | ||
219 | mcast_acct: | ||
220 | ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); | ||
221 | } | ||
222 | |||
223 | /* Locally generated? ...Forward a copy to the main-device as | ||
224 | * well. On the RX side we'll ignore it (wont give it to any | ||
225 | * of the virtual devices. | ||
226 | */ | ||
227 | if (local) { | ||
228 | nskb = skb_clone(skb, GFP_ATOMIC); | ||
229 | if (nskb) { | ||
230 | if (ether_addr_equal(eth->h_dest, port->dev->broadcast)) | ||
231 | nskb->pkt_type = PACKET_BROADCAST; | ||
232 | else | ||
233 | nskb->pkt_type = PACKET_MULTICAST; | ||
234 | |||
235 | dev_forward_skb(port->dev, nskb); | ||
236 | } | ||
237 | } | ||
238 | } | ||
239 | |||
240 | static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb, | ||
241 | bool local) | ||
242 | { | ||
243 | struct ipvl_dev *ipvlan = addr->master; | ||
244 | struct net_device *dev = ipvlan->dev; | ||
245 | unsigned int len; | ||
246 | rx_handler_result_t ret = RX_HANDLER_CONSUMED; | ||
247 | bool success = false; | ||
248 | |||
249 | len = skb->len + ETH_HLEN; | ||
250 | if (unlikely(!(dev->flags & IFF_UP))) { | ||
251 | kfree_skb(skb); | ||
252 | goto out; | ||
253 | } | ||
254 | |||
255 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
256 | if (!skb) | ||
257 | goto out; | ||
258 | |||
259 | skb->dev = dev; | ||
260 | skb->pkt_type = PACKET_HOST; | ||
261 | |||
262 | if (local) { | ||
263 | if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) | ||
264 | success = true; | ||
265 | } else { | ||
266 | ret = RX_HANDLER_ANOTHER; | ||
267 | success = true; | ||
268 | } | ||
269 | |||
270 | out: | ||
271 | ipvlan_count_rx(ipvlan, len, success, false); | ||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, | ||
276 | void *lyr3h, int addr_type, | ||
277 | bool use_dest) | ||
278 | { | ||
279 | struct ipvl_addr *addr = NULL; | ||
280 | |||
281 | if (addr_type == IPVL_IPV6) { | ||
282 | struct ipv6hdr *ip6h; | ||
283 | struct in6_addr *i6addr; | ||
284 | |||
285 | ip6h = (struct ipv6hdr *)lyr3h; | ||
286 | i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; | ||
287 | addr = ipvlan_ht_addr_lookup(port, i6addr, true); | ||
288 | } else if (addr_type == IPVL_ICMPV6) { | ||
289 | struct nd_msg *ndmh; | ||
290 | struct in6_addr *i6addr; | ||
291 | |||
292 | /* Make sure that the NeighborSolicitation ICMPv6 packets | ||
293 | * are handled to avoid DAD issue. | ||
294 | */ | ||
295 | ndmh = (struct nd_msg *)lyr3h; | ||
296 | if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { | ||
297 | i6addr = &ndmh->target; | ||
298 | addr = ipvlan_ht_addr_lookup(port, i6addr, true); | ||
299 | } | ||
300 | } else if (addr_type == IPVL_IPV4) { | ||
301 | struct iphdr *ip4h; | ||
302 | __be32 *i4addr; | ||
303 | |||
304 | ip4h = (struct iphdr *)lyr3h; | ||
305 | i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; | ||
306 | addr = ipvlan_ht_addr_lookup(port, i4addr, false); | ||
307 | } else if (addr_type == IPVL_ARP) { | ||
308 | struct arphdr *arph; | ||
309 | unsigned char *arp_ptr; | ||
310 | __be32 dip; | ||
311 | |||
312 | arph = (struct arphdr *)lyr3h; | ||
313 | arp_ptr = (unsigned char *)(arph + 1); | ||
314 | if (use_dest) | ||
315 | arp_ptr += (2 * port->dev->addr_len) + 4; | ||
316 | else | ||
317 | arp_ptr += port->dev->addr_len; | ||
318 | |||
319 | memcpy(&dip, arp_ptr, 4); | ||
320 | addr = ipvlan_ht_addr_lookup(port, &dip, false); | ||
321 | } | ||
322 | |||
323 | return addr; | ||
324 | } | ||
325 | |||
326 | static int ipvlan_process_v4_outbound(struct sk_buff *skb) | ||
327 | { | ||
328 | const struct iphdr *ip4h = ip_hdr(skb); | ||
329 | struct net_device *dev = skb->dev; | ||
330 | struct rtable *rt; | ||
331 | int err, ret = NET_XMIT_DROP; | ||
332 | struct flowi4 fl4 = { | ||
333 | .flowi4_oif = dev->iflink, | ||
334 | .flowi4_tos = RT_TOS(ip4h->tos), | ||
335 | .flowi4_flags = FLOWI_FLAG_ANYSRC, | ||
336 | .daddr = ip4h->daddr, | ||
337 | .saddr = ip4h->saddr, | ||
338 | }; | ||
339 | |||
340 | rt = ip_route_output_flow(dev_net(dev), &fl4, NULL); | ||
341 | if (IS_ERR(rt)) | ||
342 | goto err; | ||
343 | |||
344 | if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { | ||
345 | ip_rt_put(rt); | ||
346 | goto err; | ||
347 | } | ||
348 | skb_dst_drop(skb); | ||
349 | skb_dst_set(skb, &rt->dst); | ||
350 | err = ip_local_out(skb); | ||
351 | if (unlikely(net_xmit_eval(err))) | ||
352 | dev->stats.tx_errors++; | ||
353 | else | ||
354 | ret = NET_XMIT_SUCCESS; | ||
355 | goto out; | ||
356 | err: | ||
357 | dev->stats.tx_errors++; | ||
358 | kfree_skb(skb); | ||
359 | out: | ||
360 | return ret; | ||
361 | } | ||
362 | |||
363 | static int ipvlan_process_v6_outbound(struct sk_buff *skb) | ||
364 | { | ||
365 | const struct ipv6hdr *ip6h = ipv6_hdr(skb); | ||
366 | struct net_device *dev = skb->dev; | ||
367 | struct dst_entry *dst; | ||
368 | int err, ret = NET_XMIT_DROP; | ||
369 | struct flowi6 fl6 = { | ||
370 | .flowi6_iif = skb->dev->ifindex, | ||
371 | .daddr = ip6h->daddr, | ||
372 | .saddr = ip6h->saddr, | ||
373 | .flowi6_flags = FLOWI_FLAG_ANYSRC, | ||
374 | .flowlabel = ip6_flowinfo(ip6h), | ||
375 | .flowi6_mark = skb->mark, | ||
376 | .flowi6_proto = ip6h->nexthdr, | ||
377 | }; | ||
378 | |||
379 | dst = ip6_route_output(dev_net(dev), NULL, &fl6); | ||
380 | if (IS_ERR(dst)) | ||
381 | goto err; | ||
382 | |||
383 | skb_dst_drop(skb); | ||
384 | skb_dst_set(skb, dst); | ||
385 | err = ip6_local_out(skb); | ||
386 | if (unlikely(net_xmit_eval(err))) | ||
387 | dev->stats.tx_errors++; | ||
388 | else | ||
389 | ret = NET_XMIT_SUCCESS; | ||
390 | goto out; | ||
391 | err: | ||
392 | dev->stats.tx_errors++; | ||
393 | kfree_skb(skb); | ||
394 | out: | ||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | static int ipvlan_process_outbound(struct sk_buff *skb, | ||
399 | const struct ipvl_dev *ipvlan) | ||
400 | { | ||
401 | struct ethhdr *ethh = eth_hdr(skb); | ||
402 | int ret = NET_XMIT_DROP; | ||
403 | |||
404 | /* In this mode we dont care about multicast and broadcast traffic */ | ||
405 | if (is_multicast_ether_addr(ethh->h_dest)) { | ||
406 | pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", | ||
407 | ntohs(skb->protocol)); | ||
408 | kfree_skb(skb); | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | /* The ipvlan is a pseudo-L2 device, so the packets that we receive | ||
413 | * will have L2; which need to discarded and processed further | ||
414 | * in the net-ns of the main-device. | ||
415 | */ | ||
416 | if (skb_mac_header_was_set(skb)) { | ||
417 | skb_pull(skb, sizeof(*ethh)); | ||
418 | skb->mac_header = (typeof(skb->mac_header))~0U; | ||
419 | skb_reset_network_header(skb); | ||
420 | } | ||
421 | |||
422 | if (skb->protocol == htons(ETH_P_IPV6)) | ||
423 | ret = ipvlan_process_v6_outbound(skb); | ||
424 | else if (skb->protocol == htons(ETH_P_IP)) | ||
425 | ret = ipvlan_process_v4_outbound(skb); | ||
426 | else { | ||
427 | pr_warn_ratelimited("Dropped outbound packet type=%x\n", | ||
428 | ntohs(skb->protocol)); | ||
429 | kfree_skb(skb); | ||
430 | } | ||
431 | out: | ||
432 | return ret; | ||
433 | } | ||
434 | |||
435 | static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) | ||
436 | { | ||
437 | const struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
438 | void *lyr3h; | ||
439 | struct ipvl_addr *addr; | ||
440 | int addr_type; | ||
441 | |||
442 | lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); | ||
443 | if (!lyr3h) | ||
444 | goto out; | ||
445 | |||
446 | addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); | ||
447 | if (addr) | ||
448 | return ipvlan_rcv_frame(addr, skb, true); | ||
449 | |||
450 | out: | ||
451 | skb->dev = ipvlan->phy_dev; | ||
452 | return ipvlan_process_outbound(skb, ipvlan); | ||
453 | } | ||
454 | |||
455 | static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) | ||
456 | { | ||
457 | const struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
458 | struct ethhdr *eth = eth_hdr(skb); | ||
459 | struct ipvl_addr *addr; | ||
460 | void *lyr3h; | ||
461 | int addr_type; | ||
462 | |||
463 | if (ether_addr_equal(eth->h_dest, eth->h_source)) { | ||
464 | lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); | ||
465 | if (lyr3h) { | ||
466 | addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); | ||
467 | if (addr) | ||
468 | return ipvlan_rcv_frame(addr, skb, true); | ||
469 | } | ||
470 | skb = skb_share_check(skb, GFP_ATOMIC); | ||
471 | if (!skb) | ||
472 | return NET_XMIT_DROP; | ||
473 | |||
474 | /* Packet definitely does not belong to any of the | ||
475 | * virtual devices, but the dest is local. So forward | ||
476 | * the skb for the main-dev. At the RX side we just return | ||
477 | * RX_PASS for it to be processed further on the stack. | ||
478 | */ | ||
479 | return dev_forward_skb(ipvlan->phy_dev, skb); | ||
480 | |||
481 | } else if (is_multicast_ether_addr(eth->h_dest)) { | ||
482 | u8 ip_summed = skb->ip_summed; | ||
483 | |||
484 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
485 | ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true); | ||
486 | skb->ip_summed = ip_summed; | ||
487 | } | ||
488 | |||
489 | skb->dev = ipvlan->phy_dev; | ||
490 | return dev_queue_xmit(skb); | ||
491 | } | ||
492 | |||
493 | int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) | ||
494 | { | ||
495 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
496 | struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev); | ||
497 | |||
498 | if (!port) | ||
499 | goto out; | ||
500 | |||
501 | if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) | ||
502 | goto out; | ||
503 | |||
504 | switch(port->mode) { | ||
505 | case IPVLAN_MODE_L2: | ||
506 | return ipvlan_xmit_mode_l2(skb, dev); | ||
507 | case IPVLAN_MODE_L3: | ||
508 | return ipvlan_xmit_mode_l3(skb, dev); | ||
509 | } | ||
510 | |||
511 | /* Should not reach here */ | ||
512 | WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", | ||
513 | port->mode); | ||
514 | out: | ||
515 | kfree_skb(skb); | ||
516 | return NET_XMIT_DROP; | ||
517 | } | ||
518 | |||
519 | static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) | ||
520 | { | ||
521 | struct ethhdr *eth = eth_hdr(skb); | ||
522 | struct ipvl_addr *addr; | ||
523 | void *lyr3h; | ||
524 | int addr_type; | ||
525 | |||
526 | if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { | ||
527 | lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); | ||
528 | if (!lyr3h) | ||
529 | return true; | ||
530 | |||
531 | addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); | ||
532 | if (addr) | ||
533 | return false; | ||
534 | } | ||
535 | |||
536 | return true; | ||
537 | } | ||
538 | |||
539 | static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, | ||
540 | struct ipvl_port *port) | ||
541 | { | ||
542 | void *lyr3h; | ||
543 | int addr_type; | ||
544 | struct ipvl_addr *addr; | ||
545 | struct sk_buff *skb = *pskb; | ||
546 | rx_handler_result_t ret = RX_HANDLER_PASS; | ||
547 | |||
548 | lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); | ||
549 | if (!lyr3h) | ||
550 | goto out; | ||
551 | |||
552 | addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); | ||
553 | if (addr) | ||
554 | ret = ipvlan_rcv_frame(addr, skb, false); | ||
555 | |||
556 | out: | ||
557 | return ret; | ||
558 | } | ||
559 | |||
560 | static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, | ||
561 | struct ipvl_port *port) | ||
562 | { | ||
563 | struct sk_buff *skb = *pskb; | ||
564 | struct ethhdr *eth = eth_hdr(skb); | ||
565 | rx_handler_result_t ret = RX_HANDLER_PASS; | ||
566 | void *lyr3h; | ||
567 | int addr_type; | ||
568 | |||
569 | if (is_multicast_ether_addr(eth->h_dest)) { | ||
570 | if (ipvlan_external_frame(skb, port)) | ||
571 | ipvlan_multicast_frame(port, skb, NULL, false); | ||
572 | } else { | ||
573 | struct ipvl_addr *addr; | ||
574 | |||
575 | lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); | ||
576 | if (!lyr3h) | ||
577 | return ret; | ||
578 | |||
579 | addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); | ||
580 | if (addr) | ||
581 | ret = ipvlan_rcv_frame(addr, skb, false); | ||
582 | } | ||
583 | |||
584 | return ret; | ||
585 | } | ||
586 | |||
587 | rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) | ||
588 | { | ||
589 | struct sk_buff *skb = *pskb; | ||
590 | struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); | ||
591 | |||
592 | if (!port) | ||
593 | return RX_HANDLER_PASS; | ||
594 | |||
595 | switch (port->mode) { | ||
596 | case IPVLAN_MODE_L2: | ||
597 | return ipvlan_handle_mode_l2(pskb, port); | ||
598 | case IPVLAN_MODE_L3: | ||
599 | return ipvlan_handle_mode_l3(pskb, port); | ||
600 | } | ||
601 | |||
602 | /* Should not reach here */ | ||
603 | WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", | ||
604 | port->mode); | ||
605 | kfree_skb(skb); | ||
606 | return NET_RX_DROP; | ||
607 | } | ||
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c new file mode 100644 index 000000000000..c3df84bd2857 --- /dev/null +++ b/drivers/net/ipvlan/ipvlan_main.c | |||
@@ -0,0 +1,789 @@ | |||
1 | /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of the GNU General Public License as | ||
5 | * published by the Free Software Foundation; either version 2 of | ||
6 | * the License, or (at your option) any later version. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include "ipvlan.h" | ||
11 | |||
12 | void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) | ||
13 | { | ||
14 | ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; | ||
15 | } | ||
16 | |||
17 | void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval) | ||
18 | { | ||
19 | struct ipvl_dev *ipvlan; | ||
20 | |||
21 | if (port->mode != nval) { | ||
22 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) { | ||
23 | if (nval == IPVLAN_MODE_L3) | ||
24 | ipvlan->dev->flags |= IFF_NOARP; | ||
25 | else | ||
26 | ipvlan->dev->flags &= ~IFF_NOARP; | ||
27 | } | ||
28 | port->mode = nval; | ||
29 | } | ||
30 | } | ||
31 | |||
32 | static int ipvlan_port_create(struct net_device *dev) | ||
33 | { | ||
34 | struct ipvl_port *port; | ||
35 | int err, idx; | ||
36 | |||
37 | if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) { | ||
38 | netdev_err(dev, "Master is either lo or non-ether device\n"); | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); | ||
42 | if (!port) | ||
43 | return -ENOMEM; | ||
44 | |||
45 | port->dev = dev; | ||
46 | port->mode = IPVLAN_MODE_L3; | ||
47 | INIT_LIST_HEAD(&port->ipvlans); | ||
48 | for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) | ||
49 | INIT_HLIST_HEAD(&port->hlhead[idx]); | ||
50 | |||
51 | err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); | ||
52 | if (err) | ||
53 | goto err; | ||
54 | |||
55 | dev->priv_flags |= IFF_IPVLAN_MASTER; | ||
56 | return 0; | ||
57 | |||
58 | err: | ||
59 | kfree_rcu(port, rcu); | ||
60 | return err; | ||
61 | } | ||
62 | |||
63 | static void ipvlan_port_destroy(struct net_device *dev) | ||
64 | { | ||
65 | struct ipvl_port *port = ipvlan_port_get_rtnl(dev); | ||
66 | |||
67 | dev->priv_flags &= ~IFF_IPVLAN_MASTER; | ||
68 | netdev_rx_handler_unregister(dev); | ||
69 | kfree_rcu(port, rcu); | ||
70 | } | ||
71 | |||
72 | /* ipvlan network devices have devices nesting below it and are a special | ||
73 | * "super class" of normal network devices; split their locks off into a | ||
74 | * separate class since they always nest. | ||
75 | */ | ||
76 | static struct lock_class_key ipvlan_netdev_xmit_lock_key; | ||
77 | static struct lock_class_key ipvlan_netdev_addr_lock_key; | ||
78 | |||
79 | #define IPVLAN_FEATURES \ | ||
80 | (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ | ||
81 | NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ | ||
82 | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ | ||
83 | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) | ||
84 | |||
85 | #define IPVLAN_STATE_MASK \ | ||
86 | ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) | ||
87 | |||
88 | static void ipvlan_set_lockdep_class_one(struct net_device *dev, | ||
89 | struct netdev_queue *txq, | ||
90 | void *_unused) | ||
91 | { | ||
92 | lockdep_set_class(&txq->_xmit_lock, &ipvlan_netdev_xmit_lock_key); | ||
93 | } | ||
94 | |||
95 | static void ipvlan_set_lockdep_class(struct net_device *dev) | ||
96 | { | ||
97 | lockdep_set_class(&dev->addr_list_lock, &ipvlan_netdev_addr_lock_key); | ||
98 | netdev_for_each_tx_queue(dev, ipvlan_set_lockdep_class_one, NULL); | ||
99 | } | ||
100 | |||
101 | static int ipvlan_init(struct net_device *dev) | ||
102 | { | ||
103 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
104 | const struct net_device *phy_dev = ipvlan->phy_dev; | ||
105 | |||
106 | dev->state = (dev->state & ~IPVLAN_STATE_MASK) | | ||
107 | (phy_dev->state & IPVLAN_STATE_MASK); | ||
108 | dev->features = phy_dev->features & IPVLAN_FEATURES; | ||
109 | dev->features |= NETIF_F_LLTX; | ||
110 | dev->gso_max_size = phy_dev->gso_max_size; | ||
111 | dev->iflink = phy_dev->ifindex; | ||
112 | dev->hard_header_len = phy_dev->hard_header_len; | ||
113 | |||
114 | ipvlan_set_lockdep_class(dev); | ||
115 | |||
116 | ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats); | ||
117 | if (!ipvlan->pcpu_stats) | ||
118 | return -ENOMEM; | ||
119 | |||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | static void ipvlan_uninit(struct net_device *dev) | ||
124 | { | ||
125 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
126 | struct ipvl_port *port = ipvlan->port; | ||
127 | |||
128 | if (ipvlan->pcpu_stats) | ||
129 | free_percpu(ipvlan->pcpu_stats); | ||
130 | |||
131 | port->count -= 1; | ||
132 | if (!port->count) | ||
133 | ipvlan_port_destroy(port->dev); | ||
134 | } | ||
135 | |||
136 | static int ipvlan_open(struct net_device *dev) | ||
137 | { | ||
138 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
139 | struct net_device *phy_dev = ipvlan->phy_dev; | ||
140 | struct ipvl_addr *addr; | ||
141 | |||
142 | if (ipvlan->port->mode == IPVLAN_MODE_L3) | ||
143 | dev->flags |= IFF_NOARP; | ||
144 | else | ||
145 | dev->flags &= ~IFF_NOARP; | ||
146 | |||
147 | if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { | ||
148 | list_for_each_entry(addr, &ipvlan->addrs, anode) | ||
149 | ipvlan_ht_addr_add(ipvlan, addr); | ||
150 | } | ||
151 | return dev_uc_add(phy_dev, phy_dev->dev_addr); | ||
152 | } | ||
153 | |||
154 | static int ipvlan_stop(struct net_device *dev) | ||
155 | { | ||
156 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
157 | struct net_device *phy_dev = ipvlan->phy_dev; | ||
158 | struct ipvl_addr *addr; | ||
159 | |||
160 | dev_uc_unsync(phy_dev, dev); | ||
161 | dev_mc_unsync(phy_dev, dev); | ||
162 | |||
163 | dev_uc_del(phy_dev, phy_dev->dev_addr); | ||
164 | |||
165 | if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { | ||
166 | list_for_each_entry(addr, &ipvlan->addrs, anode) | ||
167 | ipvlan_ht_addr_del(addr, !dev->dismantle); | ||
168 | } | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) | ||
173 | { | ||
174 | const struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
175 | int skblen = skb->len; | ||
176 | int ret; | ||
177 | |||
178 | ret = ipvlan_queue_xmit(skb, dev); | ||
179 | if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { | ||
180 | struct ipvl_pcpu_stats *pcptr; | ||
181 | |||
182 | pcptr = this_cpu_ptr(ipvlan->pcpu_stats); | ||
183 | |||
184 | u64_stats_update_begin(&pcptr->syncp); | ||
185 | pcptr->tx_pkts++; | ||
186 | pcptr->tx_bytes += skblen; | ||
187 | u64_stats_update_end(&pcptr->syncp); | ||
188 | } else { | ||
189 | this_cpu_inc(ipvlan->pcpu_stats->tx_drps); | ||
190 | } | ||
191 | return ret; | ||
192 | } | ||
193 | |||
194 | static netdev_features_t ipvlan_fix_features(struct net_device *dev, | ||
195 | netdev_features_t features) | ||
196 | { | ||
197 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
198 | |||
199 | return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES); | ||
200 | } | ||
201 | |||
202 | static void ipvlan_change_rx_flags(struct net_device *dev, int change) | ||
203 | { | ||
204 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
205 | struct net_device *phy_dev = ipvlan->phy_dev; | ||
206 | |||
207 | if (change & IFF_ALLMULTI) | ||
208 | dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); | ||
209 | } | ||
210 | |||
211 | static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set) | ||
212 | { | ||
213 | struct net_device *dev = ipvlan->dev; | ||
214 | unsigned int hashbit = ipvlan_mac_hash(dev->broadcast); | ||
215 | |||
216 | if (set && !test_bit(hashbit, ipvlan->mac_filters)) | ||
217 | __set_bit(hashbit, ipvlan->mac_filters); | ||
218 | else if (!set && test_bit(hashbit, ipvlan->mac_filters)) | ||
219 | __clear_bit(hashbit, ipvlan->mac_filters); | ||
220 | } | ||
221 | |||
222 | static void ipvlan_set_multicast_mac_filter(struct net_device *dev) | ||
223 | { | ||
224 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
225 | |||
226 | if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { | ||
227 | bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); | ||
228 | } else { | ||
229 | struct netdev_hw_addr *ha; | ||
230 | DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); | ||
231 | |||
232 | bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); | ||
233 | netdev_for_each_mc_addr(ha, dev) | ||
234 | __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); | ||
235 | |||
236 | bitmap_copy(ipvlan->mac_filters, mc_filters, | ||
237 | IPVLAN_MAC_FILTER_SIZE); | ||
238 | } | ||
239 | dev_uc_sync(ipvlan->phy_dev, dev); | ||
240 | dev_mc_sync(ipvlan->phy_dev, dev); | ||
241 | } | ||
242 | |||
243 | static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev, | ||
244 | struct rtnl_link_stats64 *s) | ||
245 | { | ||
246 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
247 | |||
248 | if (ipvlan->pcpu_stats) { | ||
249 | struct ipvl_pcpu_stats *pcptr; | ||
250 | u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; | ||
251 | u32 rx_errs = 0, tx_drps = 0; | ||
252 | u32 strt; | ||
253 | int idx; | ||
254 | |||
255 | for_each_possible_cpu(idx) { | ||
256 | pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); | ||
257 | do { | ||
258 | strt= u64_stats_fetch_begin_irq(&pcptr->syncp); | ||
259 | rx_pkts = pcptr->rx_pkts; | ||
260 | rx_bytes = pcptr->rx_bytes; | ||
261 | rx_mcast = pcptr->rx_mcast; | ||
262 | tx_pkts = pcptr->tx_pkts; | ||
263 | tx_bytes = pcptr->tx_bytes; | ||
264 | } while (u64_stats_fetch_retry_irq(&pcptr->syncp, | ||
265 | strt)); | ||
266 | |||
267 | s->rx_packets += rx_pkts; | ||
268 | s->rx_bytes += rx_bytes; | ||
269 | s->multicast += rx_mcast; | ||
270 | s->tx_packets += tx_pkts; | ||
271 | s->tx_bytes += tx_bytes; | ||
272 | |||
273 | /* u32 values are updated without syncp protection. */ | ||
274 | rx_errs += pcptr->rx_errs; | ||
275 | tx_drps += pcptr->tx_drps; | ||
276 | } | ||
277 | s->rx_errors = rx_errs; | ||
278 | s->rx_dropped = rx_errs; | ||
279 | s->tx_dropped = tx_drps; | ||
280 | } | ||
281 | return s; | ||
282 | } | ||
283 | |||
284 | static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) | ||
285 | { | ||
286 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
287 | struct net_device *phy_dev = ipvlan->phy_dev; | ||
288 | |||
289 | return vlan_vid_add(phy_dev, proto, vid); | ||
290 | } | ||
291 | |||
292 | static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, | ||
293 | u16 vid) | ||
294 | { | ||
295 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
296 | struct net_device *phy_dev = ipvlan->phy_dev; | ||
297 | |||
298 | vlan_vid_del(phy_dev, proto, vid); | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static const struct net_device_ops ipvlan_netdev_ops = { | ||
303 | .ndo_init = ipvlan_init, | ||
304 | .ndo_uninit = ipvlan_uninit, | ||
305 | .ndo_open = ipvlan_open, | ||
306 | .ndo_stop = ipvlan_stop, | ||
307 | .ndo_start_xmit = ipvlan_start_xmit, | ||
308 | .ndo_fix_features = ipvlan_fix_features, | ||
309 | .ndo_change_rx_flags = ipvlan_change_rx_flags, | ||
310 | .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, | ||
311 | .ndo_get_stats64 = ipvlan_get_stats64, | ||
312 | .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, | ||
313 | .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, | ||
314 | }; | ||
315 | |||
316 | static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, | ||
317 | unsigned short type, const void *daddr, | ||
318 | const void *saddr, unsigned len) | ||
319 | { | ||
320 | const struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
321 | struct net_device *phy_dev = ipvlan->phy_dev; | ||
322 | |||
323 | /* TODO Probably use a different field than dev_addr so that the | ||
324 | * mac-address on the virtual device is portable and can be carried | ||
325 | * while the packets use the mac-addr on the physical device. | ||
326 | */ | ||
327 | return dev_hard_header(skb, phy_dev, type, daddr, | ||
328 | saddr ? : dev->dev_addr, len); | ||
329 | } | ||
330 | |||
331 | static const struct header_ops ipvlan_header_ops = { | ||
332 | .create = ipvlan_hard_header, | ||
333 | .rebuild = eth_rebuild_header, | ||
334 | .parse = eth_header_parse, | ||
335 | .cache = eth_header_cache, | ||
336 | .cache_update = eth_header_cache_update, | ||
337 | }; | ||
338 | |||
339 | static int ipvlan_ethtool_get_settings(struct net_device *dev, | ||
340 | struct ethtool_cmd *cmd) | ||
341 | { | ||
342 | const struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
343 | |||
344 | return __ethtool_get_settings(ipvlan->phy_dev, cmd); | ||
345 | } | ||
346 | |||
347 | static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, | ||
348 | struct ethtool_drvinfo *drvinfo) | ||
349 | { | ||
350 | strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); | ||
351 | strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); | ||
352 | } | ||
353 | |||
354 | static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) | ||
355 | { | ||
356 | const struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
357 | |||
358 | return ipvlan->msg_enable; | ||
359 | } | ||
360 | |||
361 | static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) | ||
362 | { | ||
363 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
364 | |||
365 | ipvlan->msg_enable = value; | ||
366 | } | ||
367 | |||
368 | static const struct ethtool_ops ipvlan_ethtool_ops = { | ||
369 | .get_link = ethtool_op_get_link, | ||
370 | .get_settings = ipvlan_ethtool_get_settings, | ||
371 | .get_drvinfo = ipvlan_ethtool_get_drvinfo, | ||
372 | .get_msglevel = ipvlan_ethtool_get_msglevel, | ||
373 | .set_msglevel = ipvlan_ethtool_set_msglevel, | ||
374 | }; | ||
375 | |||
376 | static int ipvlan_nl_changelink(struct net_device *dev, | ||
377 | struct nlattr *tb[], struct nlattr *data[]) | ||
378 | { | ||
379 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
380 | struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); | ||
381 | |||
382 | if (data && data[IFLA_IPVLAN_MODE]) { | ||
383 | u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); | ||
384 | |||
385 | ipvlan_set_port_mode(port, nmode); | ||
386 | } | ||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static size_t ipvlan_nl_getsize(const struct net_device *dev) | ||
391 | { | ||
392 | return (0 | ||
393 | + nla_total_size(2) /* IFLA_IPVLAN_MODE */ | ||
394 | ); | ||
395 | } | ||
396 | |||
397 | static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[]) | ||
398 | { | ||
399 | if (data && data[IFLA_IPVLAN_MODE]) { | ||
400 | u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); | ||
401 | |||
402 | if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX) | ||
403 | return -EINVAL; | ||
404 | } | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static int ipvlan_nl_fillinfo(struct sk_buff *skb, | ||
409 | const struct net_device *dev) | ||
410 | { | ||
411 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
412 | struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); | ||
413 | int ret = -EINVAL; | ||
414 | |||
415 | if (!port) | ||
416 | goto err; | ||
417 | |||
418 | ret = -EMSGSIZE; | ||
419 | if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) | ||
420 | goto err; | ||
421 | |||
422 | return 0; | ||
423 | |||
424 | err: | ||
425 | return ret; | ||
426 | } | ||
427 | |||
428 | static int ipvlan_link_new(struct net *src_net, struct net_device *dev, | ||
429 | struct nlattr *tb[], struct nlattr *data[]) | ||
430 | { | ||
431 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
432 | struct ipvl_port *port; | ||
433 | struct net_device *phy_dev; | ||
434 | int err; | ||
435 | |||
436 | if (!tb[IFLA_LINK]) | ||
437 | return -EINVAL; | ||
438 | |||
439 | phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); | ||
440 | if (!phy_dev) | ||
441 | return -ENODEV; | ||
442 | |||
443 | if (ipvlan_dev_slave(phy_dev)) { | ||
444 | struct ipvl_dev *tmp = netdev_priv(phy_dev); | ||
445 | |||
446 | phy_dev = tmp->phy_dev; | ||
447 | } else if (!ipvlan_dev_master(phy_dev)) { | ||
448 | err = ipvlan_port_create(phy_dev); | ||
449 | if (err < 0) | ||
450 | return err; | ||
451 | } | ||
452 | |||
453 | port = ipvlan_port_get_rtnl(phy_dev); | ||
454 | if (data && data[IFLA_IPVLAN_MODE]) | ||
455 | port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); | ||
456 | |||
457 | ipvlan->phy_dev = phy_dev; | ||
458 | ipvlan->dev = dev; | ||
459 | ipvlan->port = port; | ||
460 | ipvlan->sfeatures = IPVLAN_FEATURES; | ||
461 | INIT_LIST_HEAD(&ipvlan->addrs); | ||
462 | ipvlan->ipv4cnt = 0; | ||
463 | ipvlan->ipv6cnt = 0; | ||
464 | |||
465 | /* TODO Probably put random address here to be presented to the | ||
466 | * world but keep using the physical-dev address for the outgoing | ||
467 | * packets. | ||
468 | */ | ||
469 | memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN); | ||
470 | |||
471 | dev->priv_flags |= IFF_IPVLAN_SLAVE; | ||
472 | |||
473 | port->count += 1; | ||
474 | err = register_netdevice(dev); | ||
475 | if (err < 0) | ||
476 | goto ipvlan_destroy_port; | ||
477 | |||
478 | err = netdev_upper_dev_link(phy_dev, dev); | ||
479 | if (err) | ||
480 | goto ipvlan_destroy_port; | ||
481 | |||
482 | list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); | ||
483 | netif_stacked_transfer_operstate(phy_dev, dev); | ||
484 | return 0; | ||
485 | |||
486 | ipvlan_destroy_port: | ||
487 | port->count -= 1; | ||
488 | if (!port->count) | ||
489 | ipvlan_port_destroy(phy_dev); | ||
490 | |||
491 | return err; | ||
492 | } | ||
493 | |||
494 | static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) | ||
495 | { | ||
496 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
497 | struct ipvl_addr *addr, *next; | ||
498 | |||
499 | if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { | ||
500 | list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { | ||
501 | ipvlan_ht_addr_del(addr, !dev->dismantle); | ||
502 | list_del_rcu(&addr->anode); | ||
503 | } | ||
504 | } | ||
505 | list_del_rcu(&ipvlan->pnode); | ||
506 | unregister_netdevice_queue(dev, head); | ||
507 | netdev_upper_dev_unlink(ipvlan->phy_dev, dev); | ||
508 | } | ||
509 | |||
510 | static void ipvlan_link_setup(struct net_device *dev) | ||
511 | { | ||
512 | ether_setup(dev); | ||
513 | |||
514 | dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); | ||
515 | dev->priv_flags |= IFF_UNICAST_FLT; | ||
516 | dev->netdev_ops = &ipvlan_netdev_ops; | ||
517 | dev->destructor = free_netdev; | ||
518 | dev->header_ops = &ipvlan_header_ops; | ||
519 | dev->ethtool_ops = &ipvlan_ethtool_ops; | ||
520 | dev->tx_queue_len = 0; | ||
521 | } | ||
522 | |||
523 | static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = | ||
524 | { | ||
525 | [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, | ||
526 | }; | ||
527 | |||
528 | static struct rtnl_link_ops ipvlan_link_ops = { | ||
529 | .kind = "ipvlan", | ||
530 | .priv_size = sizeof(struct ipvl_dev), | ||
531 | |||
532 | .get_size = ipvlan_nl_getsize, | ||
533 | .policy = ipvlan_nl_policy, | ||
534 | .validate = ipvlan_nl_validate, | ||
535 | .fill_info = ipvlan_nl_fillinfo, | ||
536 | .changelink = ipvlan_nl_changelink, | ||
537 | .maxtype = IFLA_IPVLAN_MAX, | ||
538 | |||
539 | .setup = ipvlan_link_setup, | ||
540 | .newlink = ipvlan_link_new, | ||
541 | .dellink = ipvlan_link_delete, | ||
542 | }; | ||
543 | |||
544 | int ipvlan_link_register(struct rtnl_link_ops *ops) | ||
545 | { | ||
546 | return rtnl_link_register(ops); | ||
547 | } | ||
548 | |||
549 | static int ipvlan_device_event(struct notifier_block *unused, | ||
550 | unsigned long event, void *ptr) | ||
551 | { | ||
552 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | ||
553 | struct ipvl_dev *ipvlan, *next; | ||
554 | struct ipvl_port *port; | ||
555 | LIST_HEAD(lst_kill); | ||
556 | |||
557 | if (!ipvlan_dev_master(dev)) | ||
558 | return NOTIFY_DONE; | ||
559 | |||
560 | port = ipvlan_port_get_rtnl(dev); | ||
561 | |||
562 | switch (event) { | ||
563 | case NETDEV_CHANGE: | ||
564 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) | ||
565 | netif_stacked_transfer_operstate(ipvlan->phy_dev, | ||
566 | ipvlan->dev); | ||
567 | break; | ||
568 | |||
569 | case NETDEV_UNREGISTER: | ||
570 | if (dev->reg_state != NETREG_UNREGISTERING) | ||
571 | break; | ||
572 | |||
573 | list_for_each_entry_safe(ipvlan, next, &port->ipvlans, | ||
574 | pnode) | ||
575 | ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, | ||
576 | &lst_kill); | ||
577 | unregister_netdevice_many(&lst_kill); | ||
578 | break; | ||
579 | |||
580 | case NETDEV_FEAT_CHANGE: | ||
581 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) { | ||
582 | ipvlan->dev->features = dev->features & IPVLAN_FEATURES; | ||
583 | ipvlan->dev->gso_max_size = dev->gso_max_size; | ||
584 | netdev_features_change(ipvlan->dev); | ||
585 | } | ||
586 | break; | ||
587 | |||
588 | case NETDEV_CHANGEMTU: | ||
589 | list_for_each_entry(ipvlan, &port->ipvlans, pnode) | ||
590 | ipvlan_adjust_mtu(ipvlan, dev); | ||
591 | break; | ||
592 | |||
593 | case NETDEV_PRE_TYPE_CHANGE: | ||
594 | /* Forbid underlying device to change its type. */ | ||
595 | return NOTIFY_BAD; | ||
596 | } | ||
597 | return NOTIFY_DONE; | ||
598 | } | ||
599 | |||
600 | static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) | ||
601 | { | ||
602 | struct ipvl_addr *addr; | ||
603 | |||
604 | if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) { | ||
605 | netif_err(ipvlan, ifup, ipvlan->dev, | ||
606 | "Failed to add IPv6=%pI6c addr for %s intf\n", | ||
607 | ip6_addr, ipvlan->dev->name); | ||
608 | return -EINVAL; | ||
609 | } | ||
610 | addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); | ||
611 | if (!addr) | ||
612 | return -ENOMEM; | ||
613 | |||
614 | addr->master = ipvlan; | ||
615 | memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); | ||
616 | addr->atype = IPVL_IPV6; | ||
617 | list_add_tail_rcu(&addr->anode, &ipvlan->addrs); | ||
618 | ipvlan->ipv6cnt++; | ||
619 | ipvlan_ht_addr_add(ipvlan, addr); | ||
620 | |||
621 | return 0; | ||
622 | } | ||
623 | |||
624 | static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) | ||
625 | { | ||
626 | struct ipvl_addr *addr; | ||
627 | |||
628 | addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true); | ||
629 | if (!addr) | ||
630 | return; | ||
631 | |||
632 | ipvlan_ht_addr_del(addr, true); | ||
633 | list_del_rcu(&addr->anode); | ||
634 | ipvlan->ipv6cnt--; | ||
635 | WARN_ON(ipvlan->ipv6cnt < 0); | ||
636 | kfree_rcu(addr, rcu); | ||
637 | |||
638 | return; | ||
639 | } | ||
640 | |||
641 | static int ipvlan_addr6_event(struct notifier_block *unused, | ||
642 | unsigned long event, void *ptr) | ||
643 | { | ||
644 | struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; | ||
645 | struct net_device *dev = (struct net_device *)if6->idev->dev; | ||
646 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
647 | |||
648 | if (!ipvlan_dev_slave(dev)) | ||
649 | return NOTIFY_DONE; | ||
650 | |||
651 | if (!ipvlan || !ipvlan->port) | ||
652 | return NOTIFY_DONE; | ||
653 | |||
654 | switch (event) { | ||
655 | case NETDEV_UP: | ||
656 | if (ipvlan_add_addr6(ipvlan, &if6->addr)) | ||
657 | return NOTIFY_BAD; | ||
658 | break; | ||
659 | |||
660 | case NETDEV_DOWN: | ||
661 | ipvlan_del_addr6(ipvlan, &if6->addr); | ||
662 | break; | ||
663 | } | ||
664 | |||
665 | return NOTIFY_OK; | ||
666 | } | ||
667 | |||
668 | static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) | ||
669 | { | ||
670 | struct ipvl_addr *addr; | ||
671 | |||
672 | if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) { | ||
673 | netif_err(ipvlan, ifup, ipvlan->dev, | ||
674 | "Failed to add IPv4=%pI4 on %s intf.\n", | ||
675 | ip4_addr, ipvlan->dev->name); | ||
676 | return -EINVAL; | ||
677 | } | ||
678 | addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL); | ||
679 | if (!addr) | ||
680 | return -ENOMEM; | ||
681 | |||
682 | addr->master = ipvlan; | ||
683 | memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); | ||
684 | addr->atype = IPVL_IPV4; | ||
685 | list_add_tail_rcu(&addr->anode, &ipvlan->addrs); | ||
686 | ipvlan->ipv4cnt++; | ||
687 | ipvlan_ht_addr_add(ipvlan, addr); | ||
688 | ipvlan_set_broadcast_mac_filter(ipvlan, true); | ||
689 | |||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) | ||
694 | { | ||
695 | struct ipvl_addr *addr; | ||
696 | |||
697 | addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false); | ||
698 | if (!addr) | ||
699 | return; | ||
700 | |||
701 | ipvlan_ht_addr_del(addr, true); | ||
702 | list_del_rcu(&addr->anode); | ||
703 | ipvlan->ipv4cnt--; | ||
704 | WARN_ON(ipvlan->ipv4cnt < 0); | ||
705 | if (!ipvlan->ipv4cnt) | ||
706 | ipvlan_set_broadcast_mac_filter(ipvlan, false); | ||
707 | kfree_rcu(addr, rcu); | ||
708 | |||
709 | return; | ||
710 | } | ||
711 | |||
712 | static int ipvlan_addr4_event(struct notifier_block *unused, | ||
713 | unsigned long event, void *ptr) | ||
714 | { | ||
715 | struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; | ||
716 | struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; | ||
717 | struct ipvl_dev *ipvlan = netdev_priv(dev); | ||
718 | struct in_addr ip4_addr; | ||
719 | |||
720 | if (!ipvlan_dev_slave(dev)) | ||
721 | return NOTIFY_DONE; | ||
722 | |||
723 | if (!ipvlan || !ipvlan->port) | ||
724 | return NOTIFY_DONE; | ||
725 | |||
726 | switch (event) { | ||
727 | case NETDEV_UP: | ||
728 | ip4_addr.s_addr = if4->ifa_address; | ||
729 | if (ipvlan_add_addr4(ipvlan, &ip4_addr)) | ||
730 | return NOTIFY_BAD; | ||
731 | break; | ||
732 | |||
733 | case NETDEV_DOWN: | ||
734 | ip4_addr.s_addr = if4->ifa_address; | ||
735 | ipvlan_del_addr4(ipvlan, &ip4_addr); | ||
736 | break; | ||
737 | } | ||
738 | |||
739 | return NOTIFY_OK; | ||
740 | } | ||
741 | |||
742 | static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { | ||
743 | .notifier_call = ipvlan_addr4_event, | ||
744 | }; | ||
745 | |||
746 | static struct notifier_block ipvlan_notifier_block __read_mostly = { | ||
747 | .notifier_call = ipvlan_device_event, | ||
748 | }; | ||
749 | |||
750 | static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { | ||
751 | .notifier_call = ipvlan_addr6_event, | ||
752 | }; | ||
753 | |||
754 | static int __init ipvlan_init_module(void) | ||
755 | { | ||
756 | int err; | ||
757 | |||
758 | ipvlan_init_secret(); | ||
759 | register_netdevice_notifier(&ipvlan_notifier_block); | ||
760 | register_inet6addr_notifier(&ipvlan_addr6_notifier_block); | ||
761 | register_inetaddr_notifier(&ipvlan_addr4_notifier_block); | ||
762 | |||
763 | err = ipvlan_link_register(&ipvlan_link_ops); | ||
764 | if (err < 0) | ||
765 | goto error; | ||
766 | |||
767 | return 0; | ||
768 | error: | ||
769 | unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); | ||
770 | unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); | ||
771 | unregister_netdevice_notifier(&ipvlan_notifier_block); | ||
772 | return err; | ||
773 | } | ||
774 | |||
775 | static void __exit ipvlan_cleanup_module(void) | ||
776 | { | ||
777 | rtnl_link_unregister(&ipvlan_link_ops); | ||
778 | unregister_netdevice_notifier(&ipvlan_notifier_block); | ||
779 | unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); | ||
780 | unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); | ||
781 | } | ||
782 | |||
783 | module_init(ipvlan_init_module); | ||
784 | module_exit(ipvlan_cleanup_module); | ||
785 | |||
786 | MODULE_LICENSE("GPL"); | ||
787 | MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); | ||
788 | MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); | ||
789 | MODULE_ALIAS_RTNL_LINK("ipvlan"); | ||
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5cd508787572..2cb772495f7a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -1230,6 +1230,8 @@ enum netdev_priv_flags { | |||
1230 | IFF_LIVE_ADDR_CHANGE = 1<<20, | 1230 | IFF_LIVE_ADDR_CHANGE = 1<<20, |
1231 | IFF_MACVLAN = 1<<21, | 1231 | IFF_MACVLAN = 1<<21, |
1232 | IFF_XMIT_DST_RELEASE_PERM = 1<<22, | 1232 | IFF_XMIT_DST_RELEASE_PERM = 1<<22, |
1233 | IFF_IPVLAN_MASTER = 1<<23, | ||
1234 | IFF_IPVLAN_SLAVE = 1<<24, | ||
1233 | }; | 1235 | }; |
1234 | 1236 | ||
1235 | #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN | 1237 | #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN |
@@ -1255,6 +1257,8 @@ enum netdev_priv_flags { | |||
1255 | #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE | 1257 | #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE |
1256 | #define IFF_MACVLAN IFF_MACVLAN | 1258 | #define IFF_MACVLAN IFF_MACVLAN |
1257 | #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM | 1259 | #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM |
1260 | #define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER | ||
1261 | #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE | ||
1258 | 1262 | ||
1259 | /** | 1263 | /** |
1260 | * struct net_device - The DEVICE structure. | 1264 | * struct net_device - The DEVICE structure. |
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7072d8325016..36bddc233633 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h | |||
@@ -330,6 +330,21 @@ enum macvlan_macaddr_mode { | |||
330 | 330 | ||
331 | #define MACVLAN_FLAG_NOPROMISC 1 | 331 | #define MACVLAN_FLAG_NOPROMISC 1 |
332 | 332 | ||
333 | /* IPVLAN section */ | ||
334 | enum { | ||
335 | IFLA_IPVLAN_UNSPEC, | ||
336 | IFLA_IPVLAN_MODE, | ||
337 | __IFLA_IPVLAN_MAX | ||
338 | }; | ||
339 | |||
340 | #define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1) | ||
341 | |||
342 | enum ipvlan_mode { | ||
343 | IPVLAN_MODE_L2 = 0, | ||
344 | IPVLAN_MODE_L3, | ||
345 | IPVLAN_MODE_MAX | ||
346 | }; | ||
347 | |||
333 | /* VXLAN section */ | 348 | /* VXLAN section */ |
334 | enum { | 349 | enum { |
335 | IFLA_VXLAN_UNSPEC, | 350 | IFLA_VXLAN_UNSPEC, |