aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ipvlan
diff options
context:
space:
mode:
authorMahesh Bandewar <maheshb@google.com>2014-11-24 02:07:46 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-24 15:29:18 -0500
commit2ad7bf3638411cb547f2823df08166c13ab04269 (patch)
tree2d31c5a7f0278318d7f6aae805ca4c1296c50afb /drivers/net/ipvlan
parent2bbea0a885079a095c252a927a174a0bfec9e3e9 (diff)
ipvlan: Initial check-in of the IPVLAN driver.
This driver is very similar to the macvlan driver except that it uses L3 on the frame to determine the logical interface while functioning as packet dispatcher. It inherits L2 of the master device hence the packets on wire will have the same L2 for all the packets originating from all virtual devices off of the same master device. This driver was developed keeping the namespace use-case in mind. Hence most of the examples given here take that as the base setup where main-device belongs to the default-ns and virtual devices are assigned to the additional namespaces. The device operates in two different modes and the difference in these two modes in primarily in the TX side. (a) L2 mode : In this mode, the device behaves as a L2 device. TX processing upto L2 happens on the stack of the virtual device associated with (namespace). Packets are switched after that into the main device (default-ns) and queued for xmit. RX processing is simple and all multicast, broadcast (if applicable), and unicast belonging to the address(es) are delivered to the virtual devices. (b) L3 mode : In this mode, the device behaves like a L3 device. TX processing upto L3 happens on the stack of the virtual device associated with (namespace). Packets are switched to the main-device (default-ns) for the L2 processing. Hence the routing table of the default-ns will be used in this mode. RX processins is somewhat similar to the L2 mode except that in this mode only Unicast packets are delivered to the virtual device while main-dev will handle all other packets. The devices can be added using the "ip" command from the iproute2 package - ip link add link <master> <virtual> type ipvlan mode [ l2 | l3 ] Signed-off-by: Mahesh Bandewar <maheshb@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Maciej Żenczykowski <maze@google.com> Cc: Laurent Chavey <chavey@google.com> Cc: Tim Hockin <thockin@google.com> Cc: Brandon Philips <brandon.philips@coreos.com> Cc: Pavel Emelianov <xemul@parallels.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ipvlan')
-rw-r--r--drivers/net/ipvlan/Makefile7
-rw-r--r--drivers/net/ipvlan/ipvlan.h130
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c607
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c789
4 files changed, 1533 insertions, 0 deletions
diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile
new file mode 100644
index 000000000000..df79910192d6
--- /dev/null
+++ b/drivers/net/ipvlan/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the Ethernet Ipvlan driver
3#
4
5obj-$(CONFIG_IPVLAN) += ipvlan.o
6
7ipvlan-objs := ipvlan_core.o ipvlan_main.o
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
new file mode 100644
index 000000000000..ab3e7614ed71
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -0,0 +1,130 @@
1/*
2 * Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 */
10#ifndef __IPVLAN_H
11#define __IPVLAN_H
12
13#include <linux/kernel.h>
14#include <linux/types.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/rculist.h>
18#include <linux/notifier.h>
19#include <linux/netdevice.h>
20#include <linux/etherdevice.h>
21#include <linux/if_arp.h>
22#include <linux/if_link.h>
23#include <linux/if_vlan.h>
24#include <linux/ip.h>
25#include <linux/inetdevice.h>
26#include <net/rtnetlink.h>
27#include <net/gre.h>
28#include <net/route.h>
29#include <net/addrconf.h>
30
31#define IPVLAN_DRV "ipvlan"
32#define IPV_DRV_VER "0.1"
33
34#define IPVLAN_HASH_SIZE (1 << BITS_PER_BYTE)
35#define IPVLAN_HASH_MASK (IPVLAN_HASH_SIZE - 1)
36
37#define IPVLAN_MAC_FILTER_BITS 8
38#define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS)
39#define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1)
40
41typedef enum {
42 IPVL_IPV6 = 0,
43 IPVL_ICMPV6,
44 IPVL_IPV4,
45 IPVL_ARP,
46} ipvl_hdr_type;
47
48struct ipvl_pcpu_stats {
49 u64 rx_pkts;
50 u64 rx_bytes;
51 u64 rx_mcast;
52 u64 tx_pkts;
53 u64 tx_bytes;
54 struct u64_stats_sync syncp;
55 u32 rx_errs;
56 u32 tx_drps;
57};
58
59struct ipvl_port;
60
61struct ipvl_dev {
62 struct net_device *dev;
63 struct list_head pnode;
64 struct ipvl_port *port;
65 struct net_device *phy_dev;
66 struct list_head addrs;
67 int ipv4cnt;
68 int ipv6cnt;
69 struct ipvl_pcpu_stats *pcpu_stats;
70 DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
71 netdev_features_t sfeatures;
72 u32 msg_enable;
73 u16 mtu_adj;
74};
75
76struct ipvl_addr {
77 struct ipvl_dev *master; /* Back pointer to master */
78 union {
79 struct in6_addr ip6; /* IPv6 address on logical interface */
80 struct in_addr ip4; /* IPv4 address on logical interface */
81 } ipu;
82#define ip6addr ipu.ip6
83#define ip4addr ipu.ip4
84 struct hlist_node hlnode; /* Hash-table linkage */
85 struct list_head anode; /* logical-interface linkage */
86 struct rcu_head rcu;
87 ipvl_hdr_type atype;
88};
89
90struct ipvl_port {
91 struct net_device *dev;
92 struct hlist_head hlhead[IPVLAN_HASH_SIZE];
93 struct list_head ipvlans;
94 struct rcu_head rcu;
95 int count;
96 u16 mode;
97};
98
99static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
100{
101 return rcu_dereference(d->rx_handler_data);
102}
103
104static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d)
105{
106 return rtnl_dereference(d->rx_handler_data);
107}
108
109static inline bool ipvlan_dev_master(struct net_device *d)
110{
111 return d->priv_flags & IFF_IPVLAN_MASTER;
112}
113
114static inline bool ipvlan_dev_slave(struct net_device *d)
115{
116 return d->priv_flags & IFF_IPVLAN_SLAVE;
117}
118
119void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev);
120void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval);
121void ipvlan_init_secret(void);
122unsigned int ipvlan_mac_hash(const unsigned char *addr);
123rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
124int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
125void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
126bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
127struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
128 const void *iaddr, bool is_v6);
129void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync);
130#endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
new file mode 100644
index 000000000000..a14d87783245
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -0,0 +1,607 @@
1/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
7 *
8 */
9
10#include "ipvlan.h"
11
12static u32 ipvlan_jhash_secret;
13
14void ipvlan_init_secret(void)
15{
16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
17}
18
19static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
20 unsigned int len, bool success, bool mcast)
21{
22 if (!ipvlan)
23 return;
24
25 if (likely(success)) {
26 struct ipvl_pcpu_stats *pcptr;
27
28 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
29 u64_stats_update_begin(&pcptr->syncp);
30 pcptr->rx_pkts++;
31 pcptr->rx_bytes += len;
32 if (mcast)
33 pcptr->rx_mcast++;
34 u64_stats_update_end(&pcptr->syncp);
35 } else {
36 this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
37 }
38}
39
40static u8 ipvlan_get_v6_hash(const void *iaddr)
41{
42 const struct in6_addr *ip6_addr = iaddr;
43
44 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
45 IPVLAN_HASH_MASK;
46}
47
48static u8 ipvlan_get_v4_hash(const void *iaddr)
49{
50 const struct in_addr *ip4_addr = iaddr;
51
52 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
53 IPVLAN_HASH_MASK;
54}
55
56struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
57 const void *iaddr, bool is_v6)
58{
59 struct ipvl_addr *addr;
60 u8 hash;
61
62 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
63 ipvlan_get_v4_hash(iaddr);
64 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
65 if (is_v6 && addr->atype == IPVL_IPV6 &&
66 ipv6_addr_equal(&addr->ip6addr, iaddr))
67 return addr;
68 else if (!is_v6 && addr->atype == IPVL_IPV4 &&
69 addr->ip4addr.s_addr ==
70 ((struct in_addr *)iaddr)->s_addr)
71 return addr;
72 }
73 return NULL;
74}
75
76void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
77{
78 struct ipvl_port *port = ipvlan->port;
79 u8 hash;
80
81 hash = (addr->atype == IPVL_IPV6) ?
82 ipvlan_get_v6_hash(&addr->ip6addr) :
83 ipvlan_get_v4_hash(&addr->ip4addr);
84 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
85}
86
87void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync)
88{
89 hlist_del_rcu(&addr->hlnode);
90 if (sync)
91 synchronize_rcu();
92}
93
94bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
95{
96 struct ipvl_port *port = ipvlan->port;
97 struct ipvl_addr *addr;
98
99 list_for_each_entry(addr, &ipvlan->addrs, anode) {
100 if ((is_v6 && addr->atype == IPVL_IPV6 &&
101 ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
102 (!is_v6 && addr->atype == IPVL_IPV4 &&
103 addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
104 return true;
105 }
106
107 if (ipvlan_ht_addr_lookup(port, iaddr, is_v6))
108 return true;
109
110 return false;
111}
112
113static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
114{
115 void *lyr3h = NULL;
116
117 switch (skb->protocol) {
118 case htons(ETH_P_ARP): {
119 struct arphdr *arph;
120
121 if (unlikely(!pskb_may_pull(skb, sizeof(*arph))))
122 return NULL;
123
124 arph = arp_hdr(skb);
125 *type = IPVL_ARP;
126 lyr3h = arph;
127 break;
128 }
129 case htons(ETH_P_IP): {
130 u32 pktlen;
131 struct iphdr *ip4h;
132
133 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
134 return NULL;
135
136 ip4h = ip_hdr(skb);
137 pktlen = ntohs(ip4h->tot_len);
138 if (ip4h->ihl < 5 || ip4h->version != 4)
139 return NULL;
140 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
141 return NULL;
142
143 *type = IPVL_IPV4;
144 lyr3h = ip4h;
145 break;
146 }
147 case htons(ETH_P_IPV6): {
148 struct ipv6hdr *ip6h;
149
150 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
151 return NULL;
152
153 ip6h = ipv6_hdr(skb);
154 if (ip6h->version != 6)
155 return NULL;
156
157 *type = IPVL_IPV6;
158 lyr3h = ip6h;
159 /* Only Neighbour Solicitation pkts need different treatment */
160 if (ipv6_addr_any(&ip6h->saddr) &&
161 ip6h->nexthdr == NEXTHDR_ICMP) {
162 *type = IPVL_ICMPV6;
163 lyr3h = ip6h + 1;
164 }
165 break;
166 }
167 default:
168 return NULL;
169 }
170
171 return lyr3h;
172}
173
174unsigned int ipvlan_mac_hash(const unsigned char *addr)
175{
176 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
177 ipvlan_jhash_secret);
178
179 return hash & IPVLAN_MAC_FILTER_MASK;
180}
181
182static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
183 const struct ipvl_dev *in_dev, bool local)
184{
185 struct ethhdr *eth = eth_hdr(skb);
186 struct ipvl_dev *ipvlan;
187 struct sk_buff *nskb;
188 unsigned int len;
189 unsigned int mac_hash;
190 int ret;
191
192 if (skb->protocol == htons(ETH_P_PAUSE))
193 return;
194
195 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
196 if (local && (ipvlan == in_dev))
197 continue;
198
199 mac_hash = ipvlan_mac_hash(eth->h_dest);
200 if (!test_bit(mac_hash, ipvlan->mac_filters))
201 continue;
202
203 ret = NET_RX_DROP;
204 len = skb->len + ETH_HLEN;
205 nskb = skb_clone(skb, GFP_ATOMIC);
206 if (!nskb)
207 goto mcast_acct;
208
209 if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast))
210 nskb->pkt_type = PACKET_BROADCAST;
211 else
212 nskb->pkt_type = PACKET_MULTICAST;
213
214 nskb->dev = ipvlan->dev;
215 if (local)
216 ret = dev_forward_skb(ipvlan->dev, nskb);
217 else
218 ret = netif_rx(nskb);
219mcast_acct:
220 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
221 }
222
223 /* Locally generated? ...Forward a copy to the main-device as
224 * well. On the RX side we'll ignore it (wont give it to any
225 * of the virtual devices.
226 */
227 if (local) {
228 nskb = skb_clone(skb, GFP_ATOMIC);
229 if (nskb) {
230 if (ether_addr_equal(eth->h_dest, port->dev->broadcast))
231 nskb->pkt_type = PACKET_BROADCAST;
232 else
233 nskb->pkt_type = PACKET_MULTICAST;
234
235 dev_forward_skb(port->dev, nskb);
236 }
237 }
238}
239
240static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
241 bool local)
242{
243 struct ipvl_dev *ipvlan = addr->master;
244 struct net_device *dev = ipvlan->dev;
245 unsigned int len;
246 rx_handler_result_t ret = RX_HANDLER_CONSUMED;
247 bool success = false;
248
249 len = skb->len + ETH_HLEN;
250 if (unlikely(!(dev->flags & IFF_UP))) {
251 kfree_skb(skb);
252 goto out;
253 }
254
255 skb = skb_share_check(skb, GFP_ATOMIC);
256 if (!skb)
257 goto out;
258
259 skb->dev = dev;
260 skb->pkt_type = PACKET_HOST;
261
262 if (local) {
263 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
264 success = true;
265 } else {
266 ret = RX_HANDLER_ANOTHER;
267 success = true;
268 }
269
270out:
271 ipvlan_count_rx(ipvlan, len, success, false);
272 return ret;
273}
274
275static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
276 void *lyr3h, int addr_type,
277 bool use_dest)
278{
279 struct ipvl_addr *addr = NULL;
280
281 if (addr_type == IPVL_IPV6) {
282 struct ipv6hdr *ip6h;
283 struct in6_addr *i6addr;
284
285 ip6h = (struct ipv6hdr *)lyr3h;
286 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
287 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
288 } else if (addr_type == IPVL_ICMPV6) {
289 struct nd_msg *ndmh;
290 struct in6_addr *i6addr;
291
292 /* Make sure that the NeighborSolicitation ICMPv6 packets
293 * are handled to avoid DAD issue.
294 */
295 ndmh = (struct nd_msg *)lyr3h;
296 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
297 i6addr = &ndmh->target;
298 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
299 }
300 } else if (addr_type == IPVL_IPV4) {
301 struct iphdr *ip4h;
302 __be32 *i4addr;
303
304 ip4h = (struct iphdr *)lyr3h;
305 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
306 addr = ipvlan_ht_addr_lookup(port, i4addr, false);
307 } else if (addr_type == IPVL_ARP) {
308 struct arphdr *arph;
309 unsigned char *arp_ptr;
310 __be32 dip;
311
312 arph = (struct arphdr *)lyr3h;
313 arp_ptr = (unsigned char *)(arph + 1);
314 if (use_dest)
315 arp_ptr += (2 * port->dev->addr_len) + 4;
316 else
317 arp_ptr += port->dev->addr_len;
318
319 memcpy(&dip, arp_ptr, 4);
320 addr = ipvlan_ht_addr_lookup(port, &dip, false);
321 }
322
323 return addr;
324}
325
326static int ipvlan_process_v4_outbound(struct sk_buff *skb)
327{
328 const struct iphdr *ip4h = ip_hdr(skb);
329 struct net_device *dev = skb->dev;
330 struct rtable *rt;
331 int err, ret = NET_XMIT_DROP;
332 struct flowi4 fl4 = {
333 .flowi4_oif = dev->iflink,
334 .flowi4_tos = RT_TOS(ip4h->tos),
335 .flowi4_flags = FLOWI_FLAG_ANYSRC,
336 .daddr = ip4h->daddr,
337 .saddr = ip4h->saddr,
338 };
339
340 rt = ip_route_output_flow(dev_net(dev), &fl4, NULL);
341 if (IS_ERR(rt))
342 goto err;
343
344 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
345 ip_rt_put(rt);
346 goto err;
347 }
348 skb_dst_drop(skb);
349 skb_dst_set(skb, &rt->dst);
350 err = ip_local_out(skb);
351 if (unlikely(net_xmit_eval(err)))
352 dev->stats.tx_errors++;
353 else
354 ret = NET_XMIT_SUCCESS;
355 goto out;
356err:
357 dev->stats.tx_errors++;
358 kfree_skb(skb);
359out:
360 return ret;
361}
362
363static int ipvlan_process_v6_outbound(struct sk_buff *skb)
364{
365 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
366 struct net_device *dev = skb->dev;
367 struct dst_entry *dst;
368 int err, ret = NET_XMIT_DROP;
369 struct flowi6 fl6 = {
370 .flowi6_iif = skb->dev->ifindex,
371 .daddr = ip6h->daddr,
372 .saddr = ip6h->saddr,
373 .flowi6_flags = FLOWI_FLAG_ANYSRC,
374 .flowlabel = ip6_flowinfo(ip6h),
375 .flowi6_mark = skb->mark,
376 .flowi6_proto = ip6h->nexthdr,
377 };
378
379 dst = ip6_route_output(dev_net(dev), NULL, &fl6);
380 if (IS_ERR(dst))
381 goto err;
382
383 skb_dst_drop(skb);
384 skb_dst_set(skb, dst);
385 err = ip6_local_out(skb);
386 if (unlikely(net_xmit_eval(err)))
387 dev->stats.tx_errors++;
388 else
389 ret = NET_XMIT_SUCCESS;
390 goto out;
391err:
392 dev->stats.tx_errors++;
393 kfree_skb(skb);
394out:
395 return ret;
396}
397
398static int ipvlan_process_outbound(struct sk_buff *skb,
399 const struct ipvl_dev *ipvlan)
400{
401 struct ethhdr *ethh = eth_hdr(skb);
402 int ret = NET_XMIT_DROP;
403
404 /* In this mode we dont care about multicast and broadcast traffic */
405 if (is_multicast_ether_addr(ethh->h_dest)) {
406 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
407 ntohs(skb->protocol));
408 kfree_skb(skb);
409 goto out;
410 }
411
412 /* The ipvlan is a pseudo-L2 device, so the packets that we receive
413 * will have L2; which need to discarded and processed further
414 * in the net-ns of the main-device.
415 */
416 if (skb_mac_header_was_set(skb)) {
417 skb_pull(skb, sizeof(*ethh));
418 skb->mac_header = (typeof(skb->mac_header))~0U;
419 skb_reset_network_header(skb);
420 }
421
422 if (skb->protocol == htons(ETH_P_IPV6))
423 ret = ipvlan_process_v6_outbound(skb);
424 else if (skb->protocol == htons(ETH_P_IP))
425 ret = ipvlan_process_v4_outbound(skb);
426 else {
427 pr_warn_ratelimited("Dropped outbound packet type=%x\n",
428 ntohs(skb->protocol));
429 kfree_skb(skb);
430 }
431out:
432 return ret;
433}
434
435static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
436{
437 const struct ipvl_dev *ipvlan = netdev_priv(dev);
438 void *lyr3h;
439 struct ipvl_addr *addr;
440 int addr_type;
441
442 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
443 if (!lyr3h)
444 goto out;
445
446 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
447 if (addr)
448 return ipvlan_rcv_frame(addr, skb, true);
449
450out:
451 skb->dev = ipvlan->phy_dev;
452 return ipvlan_process_outbound(skb, ipvlan);
453}
454
455static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
456{
457 const struct ipvl_dev *ipvlan = netdev_priv(dev);
458 struct ethhdr *eth = eth_hdr(skb);
459 struct ipvl_addr *addr;
460 void *lyr3h;
461 int addr_type;
462
463 if (ether_addr_equal(eth->h_dest, eth->h_source)) {
464 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
465 if (lyr3h) {
466 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
467 if (addr)
468 return ipvlan_rcv_frame(addr, skb, true);
469 }
470 skb = skb_share_check(skb, GFP_ATOMIC);
471 if (!skb)
472 return NET_XMIT_DROP;
473
474 /* Packet definitely does not belong to any of the
475 * virtual devices, but the dest is local. So forward
476 * the skb for the main-dev. At the RX side we just return
477 * RX_PASS for it to be processed further on the stack.
478 */
479 return dev_forward_skb(ipvlan->phy_dev, skb);
480
481 } else if (is_multicast_ether_addr(eth->h_dest)) {
482 u8 ip_summed = skb->ip_summed;
483
484 skb->ip_summed = CHECKSUM_UNNECESSARY;
485 ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true);
486 skb->ip_summed = ip_summed;
487 }
488
489 skb->dev = ipvlan->phy_dev;
490 return dev_queue_xmit(skb);
491}
492
493int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
494{
495 struct ipvl_dev *ipvlan = netdev_priv(dev);
496 struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev);
497
498 if (!port)
499 goto out;
500
501 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
502 goto out;
503
504 switch(port->mode) {
505 case IPVLAN_MODE_L2:
506 return ipvlan_xmit_mode_l2(skb, dev);
507 case IPVLAN_MODE_L3:
508 return ipvlan_xmit_mode_l3(skb, dev);
509 }
510
511 /* Should not reach here */
512 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
513 port->mode);
514out:
515 kfree_skb(skb);
516 return NET_XMIT_DROP;
517}
518
519static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
520{
521 struct ethhdr *eth = eth_hdr(skb);
522 struct ipvl_addr *addr;
523 void *lyr3h;
524 int addr_type;
525
526 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
527 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
528 if (!lyr3h)
529 return true;
530
531 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
532 if (addr)
533 return false;
534 }
535
536 return true;
537}
538
539static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
540 struct ipvl_port *port)
541{
542 void *lyr3h;
543 int addr_type;
544 struct ipvl_addr *addr;
545 struct sk_buff *skb = *pskb;
546 rx_handler_result_t ret = RX_HANDLER_PASS;
547
548 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
549 if (!lyr3h)
550 goto out;
551
552 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
553 if (addr)
554 ret = ipvlan_rcv_frame(addr, skb, false);
555
556out:
557 return ret;
558}
559
560static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
561 struct ipvl_port *port)
562{
563 struct sk_buff *skb = *pskb;
564 struct ethhdr *eth = eth_hdr(skb);
565 rx_handler_result_t ret = RX_HANDLER_PASS;
566 void *lyr3h;
567 int addr_type;
568
569 if (is_multicast_ether_addr(eth->h_dest)) {
570 if (ipvlan_external_frame(skb, port))
571 ipvlan_multicast_frame(port, skb, NULL, false);
572 } else {
573 struct ipvl_addr *addr;
574
575 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
576 if (!lyr3h)
577 return ret;
578
579 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
580 if (addr)
581 ret = ipvlan_rcv_frame(addr, skb, false);
582 }
583
584 return ret;
585}
586
587rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
588{
589 struct sk_buff *skb = *pskb;
590 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
591
592 if (!port)
593 return RX_HANDLER_PASS;
594
595 switch (port->mode) {
596 case IPVLAN_MODE_L2:
597 return ipvlan_handle_mode_l2(pskb, port);
598 case IPVLAN_MODE_L3:
599 return ipvlan_handle_mode_l3(pskb, port);
600 }
601
602 /* Should not reach here */
603 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
604 port->mode);
605 kfree_skb(skb);
606 return NET_RX_DROP;
607}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
new file mode 100644
index 000000000000..c3df84bd2857
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -0,0 +1,789 @@
1/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
7 *
8 */
9
10#include "ipvlan.h"
11
12void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
13{
14 ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
15}
16
17void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval)
18{
19 struct ipvl_dev *ipvlan;
20
21 if (port->mode != nval) {
22 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
23 if (nval == IPVLAN_MODE_L3)
24 ipvlan->dev->flags |= IFF_NOARP;
25 else
26 ipvlan->dev->flags &= ~IFF_NOARP;
27 }
28 port->mode = nval;
29 }
30}
31
32static int ipvlan_port_create(struct net_device *dev)
33{
34 struct ipvl_port *port;
35 int err, idx;
36
37 if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) {
38 netdev_err(dev, "Master is either lo or non-ether device\n");
39 return -EINVAL;
40 }
41 port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
42 if (!port)
43 return -ENOMEM;
44
45 port->dev = dev;
46 port->mode = IPVLAN_MODE_L3;
47 INIT_LIST_HEAD(&port->ipvlans);
48 for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
49 INIT_HLIST_HEAD(&port->hlhead[idx]);
50
51 err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
52 if (err)
53 goto err;
54
55 dev->priv_flags |= IFF_IPVLAN_MASTER;
56 return 0;
57
58err:
59 kfree_rcu(port, rcu);
60 return err;
61}
62
63static void ipvlan_port_destroy(struct net_device *dev)
64{
65 struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
66
67 dev->priv_flags &= ~IFF_IPVLAN_MASTER;
68 netdev_rx_handler_unregister(dev);
69 kfree_rcu(port, rcu);
70}
71
72/* ipvlan network devices have devices nesting below it and are a special
73 * "super class" of normal network devices; split their locks off into a
74 * separate class since they always nest.
75 */
76static struct lock_class_key ipvlan_netdev_xmit_lock_key;
77static struct lock_class_key ipvlan_netdev_addr_lock_key;
78
79#define IPVLAN_FEATURES \
80 (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
81 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
82 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
83 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
84
85#define IPVLAN_STATE_MASK \
86 ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
87
88static void ipvlan_set_lockdep_class_one(struct net_device *dev,
89 struct netdev_queue *txq,
90 void *_unused)
91{
92 lockdep_set_class(&txq->_xmit_lock, &ipvlan_netdev_xmit_lock_key);
93}
94
95static void ipvlan_set_lockdep_class(struct net_device *dev)
96{
97 lockdep_set_class(&dev->addr_list_lock, &ipvlan_netdev_addr_lock_key);
98 netdev_for_each_tx_queue(dev, ipvlan_set_lockdep_class_one, NULL);
99}
100
101static int ipvlan_init(struct net_device *dev)
102{
103 struct ipvl_dev *ipvlan = netdev_priv(dev);
104 const struct net_device *phy_dev = ipvlan->phy_dev;
105
106 dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
107 (phy_dev->state & IPVLAN_STATE_MASK);
108 dev->features = phy_dev->features & IPVLAN_FEATURES;
109 dev->features |= NETIF_F_LLTX;
110 dev->gso_max_size = phy_dev->gso_max_size;
111 dev->iflink = phy_dev->ifindex;
112 dev->hard_header_len = phy_dev->hard_header_len;
113
114 ipvlan_set_lockdep_class(dev);
115
116 ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
117 if (!ipvlan->pcpu_stats)
118 return -ENOMEM;
119
120 return 0;
121}
122
123static void ipvlan_uninit(struct net_device *dev)
124{
125 struct ipvl_dev *ipvlan = netdev_priv(dev);
126 struct ipvl_port *port = ipvlan->port;
127
128 if (ipvlan->pcpu_stats)
129 free_percpu(ipvlan->pcpu_stats);
130
131 port->count -= 1;
132 if (!port->count)
133 ipvlan_port_destroy(port->dev);
134}
135
136static int ipvlan_open(struct net_device *dev)
137{
138 struct ipvl_dev *ipvlan = netdev_priv(dev);
139 struct net_device *phy_dev = ipvlan->phy_dev;
140 struct ipvl_addr *addr;
141
142 if (ipvlan->port->mode == IPVLAN_MODE_L3)
143 dev->flags |= IFF_NOARP;
144 else
145 dev->flags &= ~IFF_NOARP;
146
147 if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
148 list_for_each_entry(addr, &ipvlan->addrs, anode)
149 ipvlan_ht_addr_add(ipvlan, addr);
150 }
151 return dev_uc_add(phy_dev, phy_dev->dev_addr);
152}
153
154static int ipvlan_stop(struct net_device *dev)
155{
156 struct ipvl_dev *ipvlan = netdev_priv(dev);
157 struct net_device *phy_dev = ipvlan->phy_dev;
158 struct ipvl_addr *addr;
159
160 dev_uc_unsync(phy_dev, dev);
161 dev_mc_unsync(phy_dev, dev);
162
163 dev_uc_del(phy_dev, phy_dev->dev_addr);
164
165 if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
166 list_for_each_entry(addr, &ipvlan->addrs, anode)
167 ipvlan_ht_addr_del(addr, !dev->dismantle);
168 }
169 return 0;
170}
171
172netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, struct net_device *dev)
173{
174 const struct ipvl_dev *ipvlan = netdev_priv(dev);
175 int skblen = skb->len;
176 int ret;
177
178 ret = ipvlan_queue_xmit(skb, dev);
179 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
180 struct ipvl_pcpu_stats *pcptr;
181
182 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
183
184 u64_stats_update_begin(&pcptr->syncp);
185 pcptr->tx_pkts++;
186 pcptr->tx_bytes += skblen;
187 u64_stats_update_end(&pcptr->syncp);
188 } else {
189 this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
190 }
191 return ret;
192}
193
194static netdev_features_t ipvlan_fix_features(struct net_device *dev,
195 netdev_features_t features)
196{
197 struct ipvl_dev *ipvlan = netdev_priv(dev);
198
199 return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES);
200}
201
202static void ipvlan_change_rx_flags(struct net_device *dev, int change)
203{
204 struct ipvl_dev *ipvlan = netdev_priv(dev);
205 struct net_device *phy_dev = ipvlan->phy_dev;
206
207 if (change & IFF_ALLMULTI)
208 dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
209}
210
211static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set)
212{
213 struct net_device *dev = ipvlan->dev;
214 unsigned int hashbit = ipvlan_mac_hash(dev->broadcast);
215
216 if (set && !test_bit(hashbit, ipvlan->mac_filters))
217 __set_bit(hashbit, ipvlan->mac_filters);
218 else if (!set && test_bit(hashbit, ipvlan->mac_filters))
219 __clear_bit(hashbit, ipvlan->mac_filters);
220}
221
222static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
223{
224 struct ipvl_dev *ipvlan = netdev_priv(dev);
225
226 if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
227 bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
228 } else {
229 struct netdev_hw_addr *ha;
230 DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
231
232 bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
233 netdev_for_each_mc_addr(ha, dev)
234 __set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
235
236 bitmap_copy(ipvlan->mac_filters, mc_filters,
237 IPVLAN_MAC_FILTER_SIZE);
238 }
239 dev_uc_sync(ipvlan->phy_dev, dev);
240 dev_mc_sync(ipvlan->phy_dev, dev);
241}
242
243static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev,
244 struct rtnl_link_stats64 *s)
245{
246 struct ipvl_dev *ipvlan = netdev_priv(dev);
247
248 if (ipvlan->pcpu_stats) {
249 struct ipvl_pcpu_stats *pcptr;
250 u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
251 u32 rx_errs = 0, tx_drps = 0;
252 u32 strt;
253 int idx;
254
255 for_each_possible_cpu(idx) {
256 pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
257 do {
258 strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
259 rx_pkts = pcptr->rx_pkts;
260 rx_bytes = pcptr->rx_bytes;
261 rx_mcast = pcptr->rx_mcast;
262 tx_pkts = pcptr->tx_pkts;
263 tx_bytes = pcptr->tx_bytes;
264 } while (u64_stats_fetch_retry_irq(&pcptr->syncp,
265 strt));
266
267 s->rx_packets += rx_pkts;
268 s->rx_bytes += rx_bytes;
269 s->multicast += rx_mcast;
270 s->tx_packets += tx_pkts;
271 s->tx_bytes += tx_bytes;
272
273 /* u32 values are updated without syncp protection. */
274 rx_errs += pcptr->rx_errs;
275 tx_drps += pcptr->tx_drps;
276 }
277 s->rx_errors = rx_errs;
278 s->rx_dropped = rx_errs;
279 s->tx_dropped = tx_drps;
280 }
281 return s;
282}
283
284static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
285{
286 struct ipvl_dev *ipvlan = netdev_priv(dev);
287 struct net_device *phy_dev = ipvlan->phy_dev;
288
289 return vlan_vid_add(phy_dev, proto, vid);
290}
291
292static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
293 u16 vid)
294{
295 struct ipvl_dev *ipvlan = netdev_priv(dev);
296 struct net_device *phy_dev = ipvlan->phy_dev;
297
298 vlan_vid_del(phy_dev, proto, vid);
299 return 0;
300}
301
302static const struct net_device_ops ipvlan_netdev_ops = {
303 .ndo_init = ipvlan_init,
304 .ndo_uninit = ipvlan_uninit,
305 .ndo_open = ipvlan_open,
306 .ndo_stop = ipvlan_stop,
307 .ndo_start_xmit = ipvlan_start_xmit,
308 .ndo_fix_features = ipvlan_fix_features,
309 .ndo_change_rx_flags = ipvlan_change_rx_flags,
310 .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter,
311 .ndo_get_stats64 = ipvlan_get_stats64,
312 .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid,
313 .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid,
314};
315
316static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
317 unsigned short type, const void *daddr,
318 const void *saddr, unsigned len)
319{
320 const struct ipvl_dev *ipvlan = netdev_priv(dev);
321 struct net_device *phy_dev = ipvlan->phy_dev;
322
323 /* TODO Probably use a different field than dev_addr so that the
324 * mac-address on the virtual device is portable and can be carried
325 * while the packets use the mac-addr on the physical device.
326 */
327 return dev_hard_header(skb, phy_dev, type, daddr,
328 saddr ? : dev->dev_addr, len);
329}
330
331static const struct header_ops ipvlan_header_ops = {
332 .create = ipvlan_hard_header,
333 .rebuild = eth_rebuild_header,
334 .parse = eth_header_parse,
335 .cache = eth_header_cache,
336 .cache_update = eth_header_cache_update,
337};
338
339static int ipvlan_ethtool_get_settings(struct net_device *dev,
340 struct ethtool_cmd *cmd)
341{
342 const struct ipvl_dev *ipvlan = netdev_priv(dev);
343
344 return __ethtool_get_settings(ipvlan->phy_dev, cmd);
345}
346
347static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
348 struct ethtool_drvinfo *drvinfo)
349{
350 strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
351 strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
352}
353
354static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
355{
356 const struct ipvl_dev *ipvlan = netdev_priv(dev);
357
358 return ipvlan->msg_enable;
359}
360
361static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
362{
363 struct ipvl_dev *ipvlan = netdev_priv(dev);
364
365 ipvlan->msg_enable = value;
366}
367
368static const struct ethtool_ops ipvlan_ethtool_ops = {
369 .get_link = ethtool_op_get_link,
370 .get_settings = ipvlan_ethtool_get_settings,
371 .get_drvinfo = ipvlan_ethtool_get_drvinfo,
372 .get_msglevel = ipvlan_ethtool_get_msglevel,
373 .set_msglevel = ipvlan_ethtool_set_msglevel,
374};
375
376static int ipvlan_nl_changelink(struct net_device *dev,
377 struct nlattr *tb[], struct nlattr *data[])
378{
379 struct ipvl_dev *ipvlan = netdev_priv(dev);
380 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
381
382 if (data && data[IFLA_IPVLAN_MODE]) {
383 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
384
385 ipvlan_set_port_mode(port, nmode);
386 }
387 return 0;
388}
389
390static size_t ipvlan_nl_getsize(const struct net_device *dev)
391{
392 return (0
393 + nla_total_size(2) /* IFLA_IPVLAN_MODE */
394 );
395}
396
397static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[])
398{
399 if (data && data[IFLA_IPVLAN_MODE]) {
400 u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
401
402 if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX)
403 return -EINVAL;
404 }
405 return 0;
406}
407
408static int ipvlan_nl_fillinfo(struct sk_buff *skb,
409 const struct net_device *dev)
410{
411 struct ipvl_dev *ipvlan = netdev_priv(dev);
412 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
413 int ret = -EINVAL;
414
415 if (!port)
416 goto err;
417
418 ret = -EMSGSIZE;
419 if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
420 goto err;
421
422 return 0;
423
424err:
425 return ret;
426}
427
428static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
429 struct nlattr *tb[], struct nlattr *data[])
430{
431 struct ipvl_dev *ipvlan = netdev_priv(dev);
432 struct ipvl_port *port;
433 struct net_device *phy_dev;
434 int err;
435
436 if (!tb[IFLA_LINK])
437 return -EINVAL;
438
439 phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
440 if (!phy_dev)
441 return -ENODEV;
442
443 if (ipvlan_dev_slave(phy_dev)) {
444 struct ipvl_dev *tmp = netdev_priv(phy_dev);
445
446 phy_dev = tmp->phy_dev;
447 } else if (!ipvlan_dev_master(phy_dev)) {
448 err = ipvlan_port_create(phy_dev);
449 if (err < 0)
450 return err;
451 }
452
453 port = ipvlan_port_get_rtnl(phy_dev);
454 if (data && data[IFLA_IPVLAN_MODE])
455 port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
456
457 ipvlan->phy_dev = phy_dev;
458 ipvlan->dev = dev;
459 ipvlan->port = port;
460 ipvlan->sfeatures = IPVLAN_FEATURES;
461 INIT_LIST_HEAD(&ipvlan->addrs);
462 ipvlan->ipv4cnt = 0;
463 ipvlan->ipv6cnt = 0;
464
465 /* TODO Probably put random address here to be presented to the
466 * world but keep using the physical-dev address for the outgoing
467 * packets.
468 */
469 memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
470
471 dev->priv_flags |= IFF_IPVLAN_SLAVE;
472
473 port->count += 1;
474 err = register_netdevice(dev);
475 if (err < 0)
476 goto ipvlan_destroy_port;
477
478 err = netdev_upper_dev_link(phy_dev, dev);
479 if (err)
480 goto ipvlan_destroy_port;
481
482 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
483 netif_stacked_transfer_operstate(phy_dev, dev);
484 return 0;
485
486ipvlan_destroy_port:
487 port->count -= 1;
488 if (!port->count)
489 ipvlan_port_destroy(phy_dev);
490
491 return err;
492}
493
494static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
495{
496 struct ipvl_dev *ipvlan = netdev_priv(dev);
497 struct ipvl_addr *addr, *next;
498
499 if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
500 list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
501 ipvlan_ht_addr_del(addr, !dev->dismantle);
502 list_del_rcu(&addr->anode);
503 }
504 }
505 list_del_rcu(&ipvlan->pnode);
506 unregister_netdevice_queue(dev, head);
507 netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
508}
509
510static void ipvlan_link_setup(struct net_device *dev)
511{
512 ether_setup(dev);
513
514 dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
515 dev->priv_flags |= IFF_UNICAST_FLT;
516 dev->netdev_ops = &ipvlan_netdev_ops;
517 dev->destructor = free_netdev;
518 dev->header_ops = &ipvlan_header_ops;
519 dev->ethtool_ops = &ipvlan_ethtool_ops;
520 dev->tx_queue_len = 0;
521}
522
523static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
524{
525 [IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
526};
527
528static struct rtnl_link_ops ipvlan_link_ops = {
529 .kind = "ipvlan",
530 .priv_size = sizeof(struct ipvl_dev),
531
532 .get_size = ipvlan_nl_getsize,
533 .policy = ipvlan_nl_policy,
534 .validate = ipvlan_nl_validate,
535 .fill_info = ipvlan_nl_fillinfo,
536 .changelink = ipvlan_nl_changelink,
537 .maxtype = IFLA_IPVLAN_MAX,
538
539 .setup = ipvlan_link_setup,
540 .newlink = ipvlan_link_new,
541 .dellink = ipvlan_link_delete,
542};
543
544int ipvlan_link_register(struct rtnl_link_ops *ops)
545{
546 return rtnl_link_register(ops);
547}
548
549static int ipvlan_device_event(struct notifier_block *unused,
550 unsigned long event, void *ptr)
551{
552 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
553 struct ipvl_dev *ipvlan, *next;
554 struct ipvl_port *port;
555 LIST_HEAD(lst_kill);
556
557 if (!ipvlan_dev_master(dev))
558 return NOTIFY_DONE;
559
560 port = ipvlan_port_get_rtnl(dev);
561
562 switch (event) {
563 case NETDEV_CHANGE:
564 list_for_each_entry(ipvlan, &port->ipvlans, pnode)
565 netif_stacked_transfer_operstate(ipvlan->phy_dev,
566 ipvlan->dev);
567 break;
568
569 case NETDEV_UNREGISTER:
570 if (dev->reg_state != NETREG_UNREGISTERING)
571 break;
572
573 list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
574 pnode)
575 ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
576 &lst_kill);
577 unregister_netdevice_many(&lst_kill);
578 break;
579
580 case NETDEV_FEAT_CHANGE:
581 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
582 ipvlan->dev->features = dev->features & IPVLAN_FEATURES;
583 ipvlan->dev->gso_max_size = dev->gso_max_size;
584 netdev_features_change(ipvlan->dev);
585 }
586 break;
587
588 case NETDEV_CHANGEMTU:
589 list_for_each_entry(ipvlan, &port->ipvlans, pnode)
590 ipvlan_adjust_mtu(ipvlan, dev);
591 break;
592
593 case NETDEV_PRE_TYPE_CHANGE:
594 /* Forbid underlying device to change its type. */
595 return NOTIFY_BAD;
596 }
597 return NOTIFY_DONE;
598}
599
600static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
601{
602 struct ipvl_addr *addr;
603
604 if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) {
605 netif_err(ipvlan, ifup, ipvlan->dev,
606 "Failed to add IPv6=%pI6c addr for %s intf\n",
607 ip6_addr, ipvlan->dev->name);
608 return -EINVAL;
609 }
610 addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
611 if (!addr)
612 return -ENOMEM;
613
614 addr->master = ipvlan;
615 memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr));
616 addr->atype = IPVL_IPV6;
617 list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
618 ipvlan->ipv6cnt++;
619 ipvlan_ht_addr_add(ipvlan, addr);
620
621 return 0;
622}
623
624static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
625{
626 struct ipvl_addr *addr;
627
628 addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true);
629 if (!addr)
630 return;
631
632 ipvlan_ht_addr_del(addr, true);
633 list_del_rcu(&addr->anode);
634 ipvlan->ipv6cnt--;
635 WARN_ON(ipvlan->ipv6cnt < 0);
636 kfree_rcu(addr, rcu);
637
638 return;
639}
640
641static int ipvlan_addr6_event(struct notifier_block *unused,
642 unsigned long event, void *ptr)
643{
644 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
645 struct net_device *dev = (struct net_device *)if6->idev->dev;
646 struct ipvl_dev *ipvlan = netdev_priv(dev);
647
648 if (!ipvlan_dev_slave(dev))
649 return NOTIFY_DONE;
650
651 if (!ipvlan || !ipvlan->port)
652 return NOTIFY_DONE;
653
654 switch (event) {
655 case NETDEV_UP:
656 if (ipvlan_add_addr6(ipvlan, &if6->addr))
657 return NOTIFY_BAD;
658 break;
659
660 case NETDEV_DOWN:
661 ipvlan_del_addr6(ipvlan, &if6->addr);
662 break;
663 }
664
665 return NOTIFY_OK;
666}
667
668static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
669{
670 struct ipvl_addr *addr;
671
672 if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) {
673 netif_err(ipvlan, ifup, ipvlan->dev,
674 "Failed to add IPv4=%pI4 on %s intf.\n",
675 ip4_addr, ipvlan->dev->name);
676 return -EINVAL;
677 }
678 addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
679 if (!addr)
680 return -ENOMEM;
681
682 addr->master = ipvlan;
683 memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr));
684 addr->atype = IPVL_IPV4;
685 list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
686 ipvlan->ipv4cnt++;
687 ipvlan_ht_addr_add(ipvlan, addr);
688 ipvlan_set_broadcast_mac_filter(ipvlan, true);
689
690 return 0;
691}
692
693static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
694{
695 struct ipvl_addr *addr;
696
697 addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false);
698 if (!addr)
699 return;
700
701 ipvlan_ht_addr_del(addr, true);
702 list_del_rcu(&addr->anode);
703 ipvlan->ipv4cnt--;
704 WARN_ON(ipvlan->ipv4cnt < 0);
705 if (!ipvlan->ipv4cnt)
706 ipvlan_set_broadcast_mac_filter(ipvlan, false);
707 kfree_rcu(addr, rcu);
708
709 return;
710}
711
712static int ipvlan_addr4_event(struct notifier_block *unused,
713 unsigned long event, void *ptr)
714{
715 struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
716 struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
717 struct ipvl_dev *ipvlan = netdev_priv(dev);
718 struct in_addr ip4_addr;
719
720 if (!ipvlan_dev_slave(dev))
721 return NOTIFY_DONE;
722
723 if (!ipvlan || !ipvlan->port)
724 return NOTIFY_DONE;
725
726 switch (event) {
727 case NETDEV_UP:
728 ip4_addr.s_addr = if4->ifa_address;
729 if (ipvlan_add_addr4(ipvlan, &ip4_addr))
730 return NOTIFY_BAD;
731 break;
732
733 case NETDEV_DOWN:
734 ip4_addr.s_addr = if4->ifa_address;
735 ipvlan_del_addr4(ipvlan, &ip4_addr);
736 break;
737 }
738
739 return NOTIFY_OK;
740}
741
742static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
743 .notifier_call = ipvlan_addr4_event,
744};
745
746static struct notifier_block ipvlan_notifier_block __read_mostly = {
747 .notifier_call = ipvlan_device_event,
748};
749
750static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
751 .notifier_call = ipvlan_addr6_event,
752};
753
754static int __init ipvlan_init_module(void)
755{
756 int err;
757
758 ipvlan_init_secret();
759 register_netdevice_notifier(&ipvlan_notifier_block);
760 register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
761 register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
762
763 err = ipvlan_link_register(&ipvlan_link_ops);
764 if (err < 0)
765 goto error;
766
767 return 0;
768error:
769 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
770 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
771 unregister_netdevice_notifier(&ipvlan_notifier_block);
772 return err;
773}
774
775static void __exit ipvlan_cleanup_module(void)
776{
777 rtnl_link_unregister(&ipvlan_link_ops);
778 unregister_netdevice_notifier(&ipvlan_notifier_block);
779 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
780 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
781}
782
783module_init(ipvlan_init_module);
784module_exit(ipvlan_cleanup_module);
785
786MODULE_LICENSE("GPL");
787MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
788MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
789MODULE_ALIAS_RTNL_LINK("ipvlan");