diff options
author | Stephen Hemminger <stephen@networkplumber.org> | 2013-06-25 20:02:49 -0400 |
---|---|---|
committer | Stephen Hemminger <stephen@networkplumber.org> | 2013-06-25 20:02:49 -0400 |
commit | 3f5d6af0948a33a58001182de9cbb6b3e674ea14 (patch) | |
tree | 73a4ccde21718f3890f594d8622df73495a43853 | |
parent | 8599b52e14a1611dcb563289421bee76751f1d53 (diff) | |
parent | 537f7f8494be4219eb0ef47121ea16a6f9f0f49e (diff) |
Merge ../vxlan-x
-rw-r--r-- | drivers/net/vxlan.c | 562 | ||||
-rw-r--r-- | net/bridge/br_fdb.c | 5 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 8 |
3 files changed, 359 insertions, 216 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 284c6c00c353..3e75f9726c33 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c | |||
@@ -68,24 +68,26 @@ struct vxlanhdr { | |||
68 | 68 | ||
69 | /* UDP port for VXLAN traffic. | 69 | /* UDP port for VXLAN traffic. |
70 | * The IANA assigned port is 4789, but the Linux default is 8472 | 70 | * The IANA assigned port is 4789, but the Linux default is 8472 |
71 | * for compatability with early adopters. | 71 | * for compatibility with early adopters. |
72 | */ | 72 | */ |
73 | static unsigned int vxlan_port __read_mostly = 8472; | 73 | static unsigned short vxlan_port __read_mostly = 8472; |
74 | module_param_named(udp_port, vxlan_port, uint, 0444); | 74 | module_param_named(udp_port, vxlan_port, ushort, 0444); |
75 | MODULE_PARM_DESC(udp_port, "Destination UDP port"); | 75 | MODULE_PARM_DESC(udp_port, "Destination UDP port"); |
76 | 76 | ||
77 | static bool log_ecn_error = true; | 77 | static bool log_ecn_error = true; |
78 | module_param(log_ecn_error, bool, 0644); | 78 | module_param(log_ecn_error, bool, 0644); |
79 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | 79 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); |
80 | 80 | ||
81 | static unsigned int vxlan_net_id; | 81 | static int vxlan_net_id; |
82 | |||
83 | static const u8 all_zeros_mac[ETH_ALEN]; | ||
82 | 84 | ||
83 | /* per UDP socket information */ | 85 | /* per UDP socket information */ |
84 | struct vxlan_sock { | 86 | struct vxlan_sock { |
85 | struct hlist_node hlist; | 87 | struct hlist_node hlist; |
86 | struct rcu_head rcu; | 88 | struct rcu_head rcu; |
87 | struct work_struct del_work; | 89 | struct work_struct del_work; |
88 | unsigned int refcnt; | 90 | atomic_t refcnt; |
89 | struct socket *sock; | 91 | struct socket *sock; |
90 | struct hlist_head vni_list[VNI_HASH_SIZE]; | 92 | struct hlist_head vni_list[VNI_HASH_SIZE]; |
91 | }; | 93 | }; |
@@ -94,6 +96,7 @@ struct vxlan_sock { | |||
94 | struct vxlan_net { | 96 | struct vxlan_net { |
95 | struct list_head vxlan_list; | 97 | struct list_head vxlan_list; |
96 | struct hlist_head sock_list[PORT_HASH_SIZE]; | 98 | struct hlist_head sock_list[PORT_HASH_SIZE]; |
99 | spinlock_t sock_lock; | ||
97 | }; | 100 | }; |
98 | 101 | ||
99 | struct vxlan_rdst { | 102 | struct vxlan_rdst { |
@@ -101,7 +104,8 @@ struct vxlan_rdst { | |||
101 | __be16 remote_port; | 104 | __be16 remote_port; |
102 | u32 remote_vni; | 105 | u32 remote_vni; |
103 | u32 remote_ifindex; | 106 | u32 remote_ifindex; |
104 | struct vxlan_rdst *remote_next; | 107 | struct list_head list; |
108 | struct rcu_head rcu; | ||
105 | }; | 109 | }; |
106 | 110 | ||
107 | /* Forwarding table entry */ | 111 | /* Forwarding table entry */ |
@@ -110,7 +114,7 @@ struct vxlan_fdb { | |||
110 | struct rcu_head rcu; | 114 | struct rcu_head rcu; |
111 | unsigned long updated; /* jiffies */ | 115 | unsigned long updated; /* jiffies */ |
112 | unsigned long used; | 116 | unsigned long used; |
113 | struct vxlan_rdst remote; | 117 | struct list_head remotes; |
114 | u16 state; /* see ndm_state */ | 118 | u16 state; /* see ndm_state */ |
115 | u8 flags; /* see ndm_flags */ | 119 | u8 flags; /* see ndm_flags */ |
116 | u8 eth_addr[ETH_ALEN]; | 120 | u8 eth_addr[ETH_ALEN]; |
@@ -131,6 +135,9 @@ struct vxlan_dev { | |||
131 | __u8 ttl; | 135 | __u8 ttl; |
132 | u32 flags; /* VXLAN_F_* below */ | 136 | u32 flags; /* VXLAN_F_* below */ |
133 | 137 | ||
138 | struct work_struct sock_work; | ||
139 | struct work_struct igmp_work; | ||
140 | |||
134 | unsigned long age_interval; | 141 | unsigned long age_interval; |
135 | struct timer_list age_timer; | 142 | struct timer_list age_timer; |
136 | spinlock_t hash_lock; | 143 | spinlock_t hash_lock; |
@@ -148,6 +155,9 @@ struct vxlan_dev { | |||
148 | 155 | ||
149 | /* salt for hash table */ | 156 | /* salt for hash table */ |
150 | static u32 vxlan_salt __read_mostly; | 157 | static u32 vxlan_salt __read_mostly; |
158 | static struct workqueue_struct *vxlan_wq; | ||
159 | |||
160 | static void vxlan_sock_work(struct work_struct *work); | ||
151 | 161 | ||
152 | /* Virtual Network hash table head */ | 162 | /* Virtual Network hash table head */ |
153 | static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) | 163 | static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) |
@@ -163,6 +173,14 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port) | |||
163 | return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; | 173 | return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; |
164 | } | 174 | } |
165 | 175 | ||
176 | /* First remote destination for a forwarding entry. | ||
177 | * Guaranteed to be non-NULL because remotes are never deleted. | ||
178 | */ | ||
179 | static inline struct vxlan_rdst *first_remote(struct vxlan_fdb *fdb) | ||
180 | { | ||
181 | return list_first_or_null_rcu(&fdb->remotes, struct vxlan_rdst, list); | ||
182 | } | ||
183 | |||
166 | /* Find VXLAN socket based on network namespace and UDP port */ | 184 | /* Find VXLAN socket based on network namespace and UDP port */ |
167 | static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port) | 185 | static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port) |
168 | { | 186 | { |
@@ -195,9 +213,9 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port) | |||
195 | 213 | ||
196 | /* Fill in neighbour message in skbuff. */ | 214 | /* Fill in neighbour message in skbuff. */ |
197 | static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, | 215 | static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, |
198 | const struct vxlan_fdb *fdb, | 216 | const struct vxlan_fdb *fdb, |
199 | u32 portid, u32 seq, int type, unsigned int flags, | 217 | u32 portid, u32 seq, int type, unsigned int flags, |
200 | const struct vxlan_rdst *rdst) | 218 | const struct vxlan_rdst *rdst) |
201 | { | 219 | { |
202 | unsigned long now = jiffies; | 220 | unsigned long now = jiffies; |
203 | struct nda_cacheinfo ci; | 221 | struct nda_cacheinfo ci; |
@@ -235,7 +253,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, | |||
235 | nla_put_be16(skb, NDA_PORT, rdst->remote_port)) | 253 | nla_put_be16(skb, NDA_PORT, rdst->remote_port)) |
236 | goto nla_put_failure; | 254 | goto nla_put_failure; |
237 | if (rdst->remote_vni != vxlan->default_dst.remote_vni && | 255 | if (rdst->remote_vni != vxlan->default_dst.remote_vni && |
238 | nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) | 256 | nla_put_u32(skb, NDA_VNI, rdst->remote_vni)) |
239 | goto nla_put_failure; | 257 | goto nla_put_failure; |
240 | if (rdst->remote_ifindex && | 258 | if (rdst->remote_ifindex && |
241 | nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) | 259 | nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) |
@@ -268,7 +286,7 @@ static inline size_t vxlan_nlmsg_size(void) | |||
268 | } | 286 | } |
269 | 287 | ||
270 | static void vxlan_fdb_notify(struct vxlan_dev *vxlan, | 288 | static void vxlan_fdb_notify(struct vxlan_dev *vxlan, |
271 | const struct vxlan_fdb *fdb, int type) | 289 | struct vxlan_fdb *fdb, int type) |
272 | { | 290 | { |
273 | struct net *net = dev_net(vxlan->dev); | 291 | struct net *net = dev_net(vxlan->dev); |
274 | struct sk_buff *skb; | 292 | struct sk_buff *skb; |
@@ -278,7 +296,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan, | |||
278 | if (skb == NULL) | 296 | if (skb == NULL) |
279 | goto errout; | 297 | goto errout; |
280 | 298 | ||
281 | err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote); | 299 | err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, first_remote(fdb)); |
282 | if (err < 0) { | 300 | if (err < 0) { |
283 | /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ | 301 | /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ |
284 | WARN_ON(err == -EMSGSIZE); | 302 | WARN_ON(err == -EMSGSIZE); |
@@ -296,22 +314,27 @@ errout: | |||
296 | static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) | 314 | static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) |
297 | { | 315 | { |
298 | struct vxlan_dev *vxlan = netdev_priv(dev); | 316 | struct vxlan_dev *vxlan = netdev_priv(dev); |
299 | struct vxlan_fdb f; | 317 | struct vxlan_fdb f = { |
318 | .state = NUD_STALE, | ||
319 | }; | ||
320 | struct vxlan_rdst remote = { | ||
321 | .remote_ip = ipa, /* goes to NDA_DST */ | ||
322 | .remote_vni = VXLAN_N_VID, | ||
323 | }; | ||
300 | 324 | ||
301 | memset(&f, 0, sizeof f); | 325 | INIT_LIST_HEAD(&f.remotes); |
302 | f.state = NUD_STALE; | 326 | list_add_rcu(&remote.list, &f.remotes); |
303 | f.remote.remote_ip = ipa; /* goes to NDA_DST */ | ||
304 | f.remote.remote_vni = VXLAN_N_VID; | ||
305 | 327 | ||
306 | vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); | 328 | vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); |
307 | } | 329 | } |
308 | 330 | ||
309 | static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) | 331 | static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) |
310 | { | 332 | { |
311 | struct vxlan_fdb f; | 333 | struct vxlan_fdb f = { |
334 | .state = NUD_STALE, | ||
335 | }; | ||
312 | 336 | ||
313 | memset(&f, 0, sizeof f); | 337 | INIT_LIST_HEAD(&f.remotes); |
314 | f.state = NUD_STALE; | ||
315 | memcpy(f.eth_addr, eth_addr, ETH_ALEN); | 338 | memcpy(f.eth_addr, eth_addr, ETH_ALEN); |
316 | 339 | ||
317 | vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); | 340 | vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); |
@@ -366,21 +389,34 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, | |||
366 | return f; | 389 | return f; |
367 | } | 390 | } |
368 | 391 | ||
369 | /* Add/update destinations for multicast */ | 392 | /* caller should hold vxlan->hash_lock */ |
370 | static int vxlan_fdb_append(struct vxlan_fdb *f, | 393 | static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, |
371 | __be32 ip, __be16 port, __u32 vni, __u32 ifindex) | 394 | __be32 ip, __be16 port, |
395 | __u32 vni, __u32 ifindex) | ||
372 | { | 396 | { |
373 | struct vxlan_rdst *rd_prev, *rd; | 397 | struct vxlan_rdst *rd; |
374 | 398 | ||
375 | rd_prev = NULL; | 399 | list_for_each_entry(rd, &f->remotes, list) { |
376 | for (rd = &f->remote; rd; rd = rd->remote_next) { | ||
377 | if (rd->remote_ip == ip && | 400 | if (rd->remote_ip == ip && |
378 | rd->remote_port == port && | 401 | rd->remote_port == port && |
379 | rd->remote_vni == vni && | 402 | rd->remote_vni == vni && |
380 | rd->remote_ifindex == ifindex) | 403 | rd->remote_ifindex == ifindex) |
381 | return 0; | 404 | return rd; |
382 | rd_prev = rd; | ||
383 | } | 405 | } |
406 | |||
407 | return NULL; | ||
408 | } | ||
409 | |||
410 | /* Add/update destinations for multicast */ | ||
411 | static int vxlan_fdb_append(struct vxlan_fdb *f, | ||
412 | __be32 ip, __be16 port, __u32 vni, __u32 ifindex) | ||
413 | { | ||
414 | struct vxlan_rdst *rd; | ||
415 | |||
416 | rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex); | ||
417 | if (rd) | ||
418 | return 0; | ||
419 | |||
384 | rd = kmalloc(sizeof(*rd), GFP_ATOMIC); | 420 | rd = kmalloc(sizeof(*rd), GFP_ATOMIC); |
385 | if (rd == NULL) | 421 | if (rd == NULL) |
386 | return -ENOBUFS; | 422 | return -ENOBUFS; |
@@ -388,8 +424,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, | |||
388 | rd->remote_port = port; | 424 | rd->remote_port = port; |
389 | rd->remote_vni = vni; | 425 | rd->remote_vni = vni; |
390 | rd->remote_ifindex = ifindex; | 426 | rd->remote_ifindex = ifindex; |
391 | rd->remote_next = NULL; | 427 | |
392 | rd_prev->remote_next = rd; | 428 | list_add_tail_rcu(&rd->list, &f->remotes); |
429 | |||
393 | return 1; | 430 | return 1; |
394 | } | 431 | } |
395 | 432 | ||
@@ -421,7 +458,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, | |||
421 | notify = 1; | 458 | notify = 1; |
422 | } | 459 | } |
423 | if ((flags & NLM_F_APPEND) && | 460 | if ((flags & NLM_F_APPEND) && |
424 | is_multicast_ether_addr(f->eth_addr)) { | 461 | (is_multicast_ether_addr(f->eth_addr) || |
462 | is_zero_ether_addr(f->eth_addr))) { | ||
425 | int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); | 463 | int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); |
426 | 464 | ||
427 | if (rc < 0) | 465 | if (rc < 0) |
@@ -441,16 +479,14 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, | |||
441 | return -ENOMEM; | 479 | return -ENOMEM; |
442 | 480 | ||
443 | notify = 1; | 481 | notify = 1; |
444 | f->remote.remote_ip = ip; | ||
445 | f->remote.remote_port = port; | ||
446 | f->remote.remote_vni = vni; | ||
447 | f->remote.remote_ifindex = ifindex; | ||
448 | f->remote.remote_next = NULL; | ||
449 | f->state = state; | 482 | f->state = state; |
450 | f->flags = ndm_flags; | 483 | f->flags = ndm_flags; |
451 | f->updated = f->used = jiffies; | 484 | f->updated = f->used = jiffies; |
485 | INIT_LIST_HEAD(&f->remotes); | ||
452 | memcpy(f->eth_addr, mac, ETH_ALEN); | 486 | memcpy(f->eth_addr, mac, ETH_ALEN); |
453 | 487 | ||
488 | vxlan_fdb_append(f, ip, port, vni, ifindex); | ||
489 | |||
454 | ++vxlan->addrcnt; | 490 | ++vxlan->addrcnt; |
455 | hlist_add_head_rcu(&f->hlist, | 491 | hlist_add_head_rcu(&f->hlist, |
456 | vxlan_fdb_head(vxlan, mac)); | 492 | vxlan_fdb_head(vxlan, mac)); |
@@ -462,16 +498,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, | |||
462 | return 0; | 498 | return 0; |
463 | } | 499 | } |
464 | 500 | ||
501 | static void vxlan_fdb_free_rdst(struct rcu_head *head) | ||
502 | { | ||
503 | struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); | ||
504 | kfree(rd); | ||
505 | } | ||
506 | |||
465 | static void vxlan_fdb_free(struct rcu_head *head) | 507 | static void vxlan_fdb_free(struct rcu_head *head) |
466 | { | 508 | { |
467 | struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); | 509 | struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); |
510 | struct vxlan_rdst *rd, *nd; | ||
468 | 511 | ||
469 | while (f->remote.remote_next) { | 512 | list_for_each_entry_safe(rd, nd, &f->remotes, list) |
470 | struct vxlan_rdst *rd = f->remote.remote_next; | ||
471 | |||
472 | f->remote.remote_next = rd->remote_next; | ||
473 | kfree(rd); | 513 | kfree(rd); |
474 | } | ||
475 | kfree(f); | 514 | kfree(f); |
476 | } | 515 | } |
477 | 516 | ||
@@ -487,58 +526,77 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) | |||
487 | call_rcu(&f->rcu, vxlan_fdb_free); | 526 | call_rcu(&f->rcu, vxlan_fdb_free); |
488 | } | 527 | } |
489 | 528 | ||
490 | /* Add static entry (via netlink) */ | 529 | static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, |
491 | static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], | 530 | __be32 *ip, __be16 *port, u32 *vni, u32 *ifindex) |
492 | struct net_device *dev, | ||
493 | const unsigned char *addr, u16 flags) | ||
494 | { | 531 | { |
495 | struct vxlan_dev *vxlan = netdev_priv(dev); | ||
496 | struct net *net = dev_net(vxlan->dev); | 532 | struct net *net = dev_net(vxlan->dev); |
497 | __be32 ip; | ||
498 | __be16 port; | ||
499 | u32 vni, ifindex; | ||
500 | int err; | ||
501 | |||
502 | if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { | ||
503 | pr_info("RTM_NEWNEIGH with invalid state %#x\n", | ||
504 | ndm->ndm_state); | ||
505 | return -EINVAL; | ||
506 | } | ||
507 | |||
508 | if (tb[NDA_DST] == NULL) | ||
509 | return -EINVAL; | ||
510 | 533 | ||
511 | if (nla_len(tb[NDA_DST]) != sizeof(__be32)) | 534 | if (tb[NDA_DST]) { |
512 | return -EAFNOSUPPORT; | 535 | if (nla_len(tb[NDA_DST]) != sizeof(__be32)) |
536 | return -EAFNOSUPPORT; | ||
513 | 537 | ||
514 | ip = nla_get_be32(tb[NDA_DST]); | 538 | *ip = nla_get_be32(tb[NDA_DST]); |
539 | } else { | ||
540 | *ip = htonl(INADDR_ANY); | ||
541 | } | ||
515 | 542 | ||
516 | if (tb[NDA_PORT]) { | 543 | if (tb[NDA_PORT]) { |
517 | if (nla_len(tb[NDA_PORT]) != sizeof(__be16)) | 544 | if (nla_len(tb[NDA_PORT]) != sizeof(__be16)) |
518 | return -EINVAL; | 545 | return -EINVAL; |
519 | port = nla_get_be16(tb[NDA_PORT]); | 546 | *port = nla_get_be16(tb[NDA_PORT]); |
520 | } else | 547 | } else { |
521 | port = vxlan->dst_port; | 548 | *port = vxlan->dst_port; |
549 | } | ||
522 | 550 | ||
523 | if (tb[NDA_VNI]) { | 551 | if (tb[NDA_VNI]) { |
524 | if (nla_len(tb[NDA_VNI]) != sizeof(u32)) | 552 | if (nla_len(tb[NDA_VNI]) != sizeof(u32)) |
525 | return -EINVAL; | 553 | return -EINVAL; |
526 | vni = nla_get_u32(tb[NDA_VNI]); | 554 | *vni = nla_get_u32(tb[NDA_VNI]); |
527 | } else | 555 | } else { |
528 | vni = vxlan->default_dst.remote_vni; | 556 | *vni = vxlan->default_dst.remote_vni; |
557 | } | ||
529 | 558 | ||
530 | if (tb[NDA_IFINDEX]) { | 559 | if (tb[NDA_IFINDEX]) { |
531 | struct net_device *tdev; | 560 | struct net_device *tdev; |
532 | 561 | ||
533 | if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) | 562 | if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) |
534 | return -EINVAL; | 563 | return -EINVAL; |
535 | ifindex = nla_get_u32(tb[NDA_IFINDEX]); | 564 | *ifindex = nla_get_u32(tb[NDA_IFINDEX]); |
536 | tdev = dev_get_by_index(net, ifindex); | 565 | tdev = dev_get_by_index(net, *ifindex); |
537 | if (!tdev) | 566 | if (!tdev) |
538 | return -EADDRNOTAVAIL; | 567 | return -EADDRNOTAVAIL; |
539 | dev_put(tdev); | 568 | dev_put(tdev); |
540 | } else | 569 | } else { |
541 | ifindex = 0; | 570 | *ifindex = 0; |
571 | } | ||
572 | |||
573 | return 0; | ||
574 | } | ||
575 | |||
576 | /* Add static entry (via netlink) */ | ||
577 | static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], | ||
578 | struct net_device *dev, | ||
579 | const unsigned char *addr, u16 flags) | ||
580 | { | ||
581 | struct vxlan_dev *vxlan = netdev_priv(dev); | ||
582 | /* struct net *net = dev_net(vxlan->dev); */ | ||
583 | __be32 ip; | ||
584 | __be16 port; | ||
585 | u32 vni, ifindex; | ||
586 | int err; | ||
587 | |||
588 | if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) { | ||
589 | pr_info("RTM_NEWNEIGH with invalid state %#x\n", | ||
590 | ndm->ndm_state); | ||
591 | return -EINVAL; | ||
592 | } | ||
593 | |||
594 | if (tb[NDA_DST] == NULL) | ||
595 | return -EINVAL; | ||
596 | |||
597 | err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); | ||
598 | if (err) | ||
599 | return err; | ||
542 | 600 | ||
543 | spin_lock_bh(&vxlan->hash_lock); | 601 | spin_lock_bh(&vxlan->hash_lock); |
544 | err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, | 602 | err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, |
@@ -555,14 +613,43 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], | |||
555 | { | 613 | { |
556 | struct vxlan_dev *vxlan = netdev_priv(dev); | 614 | struct vxlan_dev *vxlan = netdev_priv(dev); |
557 | struct vxlan_fdb *f; | 615 | struct vxlan_fdb *f; |
558 | int err = -ENOENT; | 616 | struct vxlan_rdst *rd = NULL; |
617 | __be32 ip; | ||
618 | __be16 port; | ||
619 | u32 vni, ifindex; | ||
620 | int err; | ||
621 | |||
622 | err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); | ||
623 | if (err) | ||
624 | return err; | ||
625 | |||
626 | err = -ENOENT; | ||
559 | 627 | ||
560 | spin_lock_bh(&vxlan->hash_lock); | 628 | spin_lock_bh(&vxlan->hash_lock); |
561 | f = vxlan_find_mac(vxlan, addr); | 629 | f = vxlan_find_mac(vxlan, addr); |
562 | if (f) { | 630 | if (!f) |
563 | vxlan_fdb_destroy(vxlan, f); | 631 | goto out; |
564 | err = 0; | 632 | |
633 | if (ip != htonl(INADDR_ANY)) { | ||
634 | rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex); | ||
635 | if (!rd) | ||
636 | goto out; | ||
565 | } | 637 | } |
638 | |||
639 | err = 0; | ||
640 | |||
641 | /* remove a destination if it's not the only one on the list, | ||
642 | * otherwise destroy the fdb entry | ||
643 | */ | ||
644 | if (rd && !list_is_singular(&f->remotes)) { | ||
645 | list_del_rcu(&rd->list); | ||
646 | call_rcu(&rd->rcu, vxlan_fdb_free_rdst); | ||
647 | goto out; | ||
648 | } | ||
649 | |||
650 | vxlan_fdb_destroy(vxlan, f); | ||
651 | |||
652 | out: | ||
566 | spin_unlock_bh(&vxlan->hash_lock); | 653 | spin_unlock_bh(&vxlan->hash_lock); |
567 | 654 | ||
568 | return err; | 655 | return err; |
@@ -581,23 +668,24 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, | |||
581 | 668 | ||
582 | hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { | 669 | hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { |
583 | struct vxlan_rdst *rd; | 670 | struct vxlan_rdst *rd; |
584 | for (rd = &f->remote; rd; rd = rd->remote_next) { | ||
585 | if (idx < cb->args[0]) | ||
586 | goto skip; | ||
587 | 671 | ||
672 | if (idx < cb->args[0]) | ||
673 | goto skip; | ||
674 | |||
675 | list_for_each_entry_rcu(rd, &f->remotes, list) { | ||
588 | err = vxlan_fdb_info(skb, vxlan, f, | 676 | err = vxlan_fdb_info(skb, vxlan, f, |
589 | NETLINK_CB(cb->skb).portid, | 677 | NETLINK_CB(cb->skb).portid, |
590 | cb->nlh->nlmsg_seq, | 678 | cb->nlh->nlmsg_seq, |
591 | RTM_NEWNEIGH, | 679 | RTM_NEWNEIGH, |
592 | NLM_F_MULTI, rd); | 680 | NLM_F_MULTI, rd); |
593 | if (err < 0) | 681 | if (err < 0) |
594 | break; | 682 | goto out; |
595 | skip: | ||
596 | ++idx; | ||
597 | } | 683 | } |
684 | skip: | ||
685 | ++idx; | ||
598 | } | 686 | } |
599 | } | 687 | } |
600 | 688 | out: | |
601 | return idx; | 689 | return idx; |
602 | } | 690 | } |
603 | 691 | ||
@@ -613,7 +701,9 @@ static bool vxlan_snoop(struct net_device *dev, | |||
613 | 701 | ||
614 | f = vxlan_find_mac(vxlan, src_mac); | 702 | f = vxlan_find_mac(vxlan, src_mac); |
615 | if (likely(f)) { | 703 | if (likely(f)) { |
616 | if (likely(f->remote.remote_ip == src_ip)) | 704 | struct vxlan_rdst *rdst = first_remote(f); |
705 | |||
706 | if (likely(rdst->remote_ip == src_ip)) | ||
617 | return false; | 707 | return false; |
618 | 708 | ||
619 | /* Don't migrate static entries, drop packets */ | 709 | /* Don't migrate static entries, drop packets */ |
@@ -623,10 +713,11 @@ static bool vxlan_snoop(struct net_device *dev, | |||
623 | if (net_ratelimit()) | 713 | if (net_ratelimit()) |
624 | netdev_info(dev, | 714 | netdev_info(dev, |
625 | "%pM migrated from %pI4 to %pI4\n", | 715 | "%pM migrated from %pI4 to %pI4\n", |
626 | src_mac, &f->remote.remote_ip, &src_ip); | 716 | src_mac, &rdst->remote_ip, &src_ip); |
627 | 717 | ||
628 | f->remote.remote_ip = src_ip; | 718 | rdst->remote_ip = src_ip; |
629 | f->updated = jiffies; | 719 | f->updated = jiffies; |
720 | vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH); | ||
630 | } else { | 721 | } else { |
631 | /* learned new entry */ | 722 | /* learned new entry */ |
632 | spin_lock(&vxlan->hash_lock); | 723 | spin_lock(&vxlan->hash_lock); |
@@ -647,76 +738,61 @@ static bool vxlan_snoop(struct net_device *dev, | |||
647 | 738 | ||
648 | 739 | ||
649 | /* See if multicast group is already in use by other ID */ | 740 | /* See if multicast group is already in use by other ID */ |
650 | static bool vxlan_group_used(struct vxlan_net *vn, | 741 | static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip) |
651 | const struct vxlan_dev *this) | ||
652 | { | 742 | { |
653 | struct vxlan_dev *vxlan; | 743 | struct vxlan_dev *vxlan; |
654 | 744 | ||
655 | list_for_each_entry(vxlan, &vn->vxlan_list, next) { | 745 | list_for_each_entry(vxlan, &vn->vxlan_list, next) { |
656 | if (vxlan == this) | ||
657 | continue; | ||
658 | |||
659 | if (!netif_running(vxlan->dev)) | 746 | if (!netif_running(vxlan->dev)) |
660 | continue; | 747 | continue; |
661 | 748 | ||
662 | if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip) | 749 | if (vxlan->default_dst.remote_ip == remote_ip) |
663 | return true; | 750 | return true; |
664 | } | 751 | } |
665 | 752 | ||
666 | return false; | 753 | return false; |
667 | } | 754 | } |
668 | 755 | ||
669 | /* kernel equivalent to IP_ADD_MEMBERSHIP */ | 756 | static void vxlan_sock_hold(struct vxlan_sock *vs) |
670 | static int vxlan_join_group(struct net_device *dev) | ||
671 | { | 757 | { |
672 | struct vxlan_dev *vxlan = netdev_priv(dev); | 758 | atomic_inc(&vs->refcnt); |
673 | struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); | 759 | } |
674 | struct sock *sk = vxlan->vn_sock->sock->sk; | ||
675 | struct ip_mreqn mreq = { | ||
676 | .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, | ||
677 | .imr_ifindex = vxlan->default_dst.remote_ifindex, | ||
678 | }; | ||
679 | int err; | ||
680 | 760 | ||
681 | /* Already a member of group */ | 761 | static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs) |
682 | if (vxlan_group_used(vn, vxlan)) | 762 | { |
683 | return 0; | 763 | if (!atomic_dec_and_test(&vs->refcnt)) |
764 | return; | ||
684 | 765 | ||
685 | /* Need to drop RTNL to call multicast join */ | 766 | spin_lock(&vn->sock_lock); |
686 | rtnl_unlock(); | 767 | hlist_del_rcu(&vs->hlist); |
687 | lock_sock(sk); | 768 | spin_unlock(&vn->sock_lock); |
688 | err = ip_mc_join_group(sk, &mreq); | ||
689 | release_sock(sk); | ||
690 | rtnl_lock(); | ||
691 | 769 | ||
692 | return err; | 770 | queue_work(vxlan_wq, &vs->del_work); |
693 | } | 771 | } |
694 | 772 | ||
695 | 773 | /* Callback to update multicast group membership. | |
696 | /* kernel equivalent to IP_DROP_MEMBERSHIP */ | 774 | * Scheduled when vxlan goes up/down. |
697 | static int vxlan_leave_group(struct net_device *dev) | 775 | */ |
776 | static void vxlan_igmp_work(struct work_struct *work) | ||
698 | { | 777 | { |
699 | struct vxlan_dev *vxlan = netdev_priv(dev); | 778 | struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_work); |
700 | struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); | 779 | struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id); |
701 | int err = 0; | 780 | struct vxlan_sock *vs = vxlan->vn_sock; |
702 | struct sock *sk = vxlan->vn_sock->sock->sk; | 781 | struct sock *sk = vs->sock->sk; |
703 | struct ip_mreqn mreq = { | 782 | struct ip_mreqn mreq = { |
704 | .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, | 783 | .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, |
705 | .imr_ifindex = vxlan->default_dst.remote_ifindex, | 784 | .imr_ifindex = vxlan->default_dst.remote_ifindex, |
706 | }; | 785 | }; |
707 | 786 | ||
708 | /* Only leave group when last vxlan is done. */ | ||
709 | if (vxlan_group_used(vn, vxlan)) | ||
710 | return 0; | ||
711 | |||
712 | /* Need to drop RTNL to call multicast leave */ | ||
713 | rtnl_unlock(); | ||
714 | lock_sock(sk); | 787 | lock_sock(sk); |
715 | err = ip_mc_leave_group(sk, &mreq); | 788 | if (vxlan_group_used(vn, vxlan->default_dst.remote_ip)) |
789 | ip_mc_join_group(sk, &mreq); | ||
790 | else | ||
791 | ip_mc_leave_group(sk, &mreq); | ||
716 | release_sock(sk); | 792 | release_sock(sk); |
717 | rtnl_lock(); | ||
718 | 793 | ||
719 | return err; | 794 | vxlan_sock_release(vn, vs); |
795 | dev_put(vxlan->dev); | ||
720 | } | 796 | } |
721 | 797 | ||
722 | /* Callback from net/ipv4/udp.c to receive packets */ | 798 | /* Callback from net/ipv4/udp.c to receive packets */ |
@@ -873,7 +949,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) | |||
873 | } | 949 | } |
874 | 950 | ||
875 | f = vxlan_find_mac(vxlan, n->ha); | 951 | f = vxlan_find_mac(vxlan, n->ha); |
876 | if (f && f->remote.remote_ip == htonl(INADDR_ANY)) { | 952 | if (f && first_remote(f)->remote_ip == htonl(INADDR_ANY)) { |
877 | /* bridge-local neighbor */ | 953 | /* bridge-local neighbor */ |
878 | neigh_release(n); | 954 | neigh_release(n); |
879 | goto out; | 955 | goto out; |
@@ -1015,8 +1091,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, | |||
1015 | } | 1091 | } |
1016 | } | 1092 | } |
1017 | 1093 | ||
1018 | static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | 1094 | static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, |
1019 | struct vxlan_rdst *rdst, bool did_rsc) | 1095 | struct vxlan_rdst *rdst, bool did_rsc) |
1020 | { | 1096 | { |
1021 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1097 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1022 | struct rtable *rt; | 1098 | struct rtable *rt; |
@@ -1026,7 +1102,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1026 | struct flowi4 fl4; | 1102 | struct flowi4 fl4; |
1027 | __be32 dst; | 1103 | __be32 dst; |
1028 | __be16 src_port, dst_port; | 1104 | __be16 src_port, dst_port; |
1029 | u32 vni; | 1105 | u32 vni; |
1030 | __be16 df = 0; | 1106 | __be16 df = 0; |
1031 | __u8 tos, ttl; | 1107 | __u8 tos, ttl; |
1032 | int err; | 1108 | int err; |
@@ -1039,7 +1115,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1039 | if (did_rsc) { | 1115 | if (did_rsc) { |
1040 | /* short-circuited back to local bridge */ | 1116 | /* short-circuited back to local bridge */ |
1041 | vxlan_encap_bypass(skb, vxlan, vxlan); | 1117 | vxlan_encap_bypass(skb, vxlan, vxlan); |
1042 | return NETDEV_TX_OK; | 1118 | return; |
1043 | } | 1119 | } |
1044 | goto drop; | 1120 | goto drop; |
1045 | } | 1121 | } |
@@ -1095,7 +1171,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1095 | if (!dst_vxlan) | 1171 | if (!dst_vxlan) |
1096 | goto tx_error; | 1172 | goto tx_error; |
1097 | vxlan_encap_bypass(skb, vxlan, dst_vxlan); | 1173 | vxlan_encap_bypass(skb, vxlan, dst_vxlan); |
1098 | return NETDEV_TX_OK; | 1174 | return; |
1099 | } | 1175 | } |
1100 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); | 1176 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); |
1101 | vxh->vx_flags = htonl(VXLAN_FLAGS); | 1177 | vxh->vx_flags = htonl(VXLAN_FLAGS); |
@@ -1123,7 +1199,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1123 | IPPROTO_UDP, tos, ttl, df); | 1199 | IPPROTO_UDP, tos, ttl, df); |
1124 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); | 1200 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); |
1125 | 1201 | ||
1126 | return NETDEV_TX_OK; | 1202 | return; |
1127 | 1203 | ||
1128 | drop: | 1204 | drop: |
1129 | dev->stats.tx_dropped++; | 1205 | dev->stats.tx_dropped++; |
@@ -1133,7 +1209,6 @@ tx_error: | |||
1133 | dev->stats.tx_errors++; | 1209 | dev->stats.tx_errors++; |
1134 | tx_free: | 1210 | tx_free: |
1135 | dev_kfree_skb(skb); | 1211 | dev_kfree_skb(skb); |
1136 | return NETDEV_TX_OK; | ||
1137 | } | 1212 | } |
1138 | 1213 | ||
1139 | /* Transmit local packets over Vxlan | 1214 | /* Transmit local packets over Vxlan |
@@ -1147,9 +1222,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
1147 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1222 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1148 | struct ethhdr *eth; | 1223 | struct ethhdr *eth; |
1149 | bool did_rsc = false; | 1224 | bool did_rsc = false; |
1150 | struct vxlan_rdst *rdst0, *rdst; | 1225 | struct vxlan_rdst *rdst; |
1151 | struct vxlan_fdb *f; | 1226 | struct vxlan_fdb *f; |
1152 | int rc1, rc; | ||
1153 | 1227 | ||
1154 | skb_reset_mac_header(skb); | 1228 | skb_reset_mac_header(skb); |
1155 | eth = eth_hdr(skb); | 1229 | eth = eth_hdr(skb); |
@@ -1168,33 +1242,28 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
1168 | } | 1242 | } |
1169 | 1243 | ||
1170 | if (f == NULL) { | 1244 | if (f == NULL) { |
1171 | rdst0 = &vxlan->default_dst; | 1245 | f = vxlan_find_mac(vxlan, all_zeros_mac); |
1172 | 1246 | if (f == NULL) { | |
1173 | if (rdst0->remote_ip == htonl(INADDR_ANY) && | 1247 | if ((vxlan->flags & VXLAN_F_L2MISS) && |
1174 | (vxlan->flags & VXLAN_F_L2MISS) && | 1248 | !is_multicast_ether_addr(eth->h_dest)) |
1175 | !is_multicast_ether_addr(eth->h_dest)) | 1249 | vxlan_fdb_miss(vxlan, eth->h_dest); |
1176 | vxlan_fdb_miss(vxlan, eth->h_dest); | 1250 | |
1177 | } else | 1251 | dev->stats.tx_dropped++; |
1178 | rdst0 = &f->remote; | 1252 | dev_kfree_skb(skb); |
1179 | 1253 | return NETDEV_TX_OK; | |
1180 | rc = NETDEV_TX_OK; | 1254 | } |
1255 | } | ||
1181 | 1256 | ||
1182 | /* if there are multiple destinations, send copies */ | 1257 | list_for_each_entry_rcu(rdst, &f->remotes, list) { |
1183 | for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) { | ||
1184 | struct sk_buff *skb1; | 1258 | struct sk_buff *skb1; |
1185 | 1259 | ||
1186 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1260 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1187 | if (skb1) { | 1261 | if (skb1) |
1188 | rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc); | 1262 | vxlan_xmit_one(skb1, dev, rdst, did_rsc); |
1189 | if (rc == NETDEV_TX_OK) | ||
1190 | rc = rc1; | ||
1191 | } | ||
1192 | } | 1263 | } |
1193 | 1264 | ||
1194 | rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc); | 1265 | dev_kfree_skb(skb); |
1195 | if (rc == NETDEV_TX_OK) | 1266 | return NETDEV_TX_OK; |
1196 | rc = rc1; | ||
1197 | return rc; | ||
1198 | } | 1267 | } |
1199 | 1268 | ||
1200 | /* Walk the forwarding table and purge stale entries */ | 1269 | /* Walk the forwarding table and purge stale entries */ |
@@ -1237,23 +1306,70 @@ static void vxlan_cleanup(unsigned long arg) | |||
1237 | /* Setup stats when device is created */ | 1306 | /* Setup stats when device is created */ |
1238 | static int vxlan_init(struct net_device *dev) | 1307 | static int vxlan_init(struct net_device *dev) |
1239 | { | 1308 | { |
1309 | struct vxlan_dev *vxlan = netdev_priv(dev); | ||
1310 | struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); | ||
1311 | struct vxlan_sock *vs; | ||
1312 | __u32 vni = vxlan->default_dst.remote_vni; | ||
1313 | |||
1240 | dev->tstats = alloc_percpu(struct pcpu_tstats); | 1314 | dev->tstats = alloc_percpu(struct pcpu_tstats); |
1241 | if (!dev->tstats) | 1315 | if (!dev->tstats) |
1242 | return -ENOMEM; | 1316 | return -ENOMEM; |
1243 | 1317 | ||
1318 | spin_lock(&vn->sock_lock); | ||
1319 | vs = vxlan_find_port(dev_net(dev), vxlan->dst_port); | ||
1320 | if (vs) { | ||
1321 | /* If we have a socket with same port already, reuse it */ | ||
1322 | atomic_inc(&vs->refcnt); | ||
1323 | vxlan->vn_sock = vs; | ||
1324 | hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); | ||
1325 | } else { | ||
1326 | /* otherwise make new socket outside of RTNL */ | ||
1327 | dev_hold(dev); | ||
1328 | queue_work(vxlan_wq, &vxlan->sock_work); | ||
1329 | } | ||
1330 | spin_unlock(&vn->sock_lock); | ||
1331 | |||
1244 | return 0; | 1332 | return 0; |
1245 | } | 1333 | } |
1246 | 1334 | ||
1335 | static void vxlan_fdb_delete_defualt(struct vxlan_dev *vxlan) | ||
1336 | { | ||
1337 | struct vxlan_fdb *f; | ||
1338 | |||
1339 | spin_lock_bh(&vxlan->hash_lock); | ||
1340 | f = __vxlan_find_mac(vxlan, all_zeros_mac); | ||
1341 | if (f) | ||
1342 | vxlan_fdb_destroy(vxlan, f); | ||
1343 | spin_unlock_bh(&vxlan->hash_lock); | ||
1344 | } | ||
1345 | |||
1346 | static void vxlan_uninit(struct net_device *dev) | ||
1347 | { | ||
1348 | struct vxlan_dev *vxlan = netdev_priv(dev); | ||
1349 | struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); | ||
1350 | struct vxlan_sock *vs = vxlan->vn_sock; | ||
1351 | |||
1352 | vxlan_fdb_delete_defualt(vxlan); | ||
1353 | |||
1354 | if (vs) | ||
1355 | vxlan_sock_release(vn, vs); | ||
1356 | free_percpu(dev->tstats); | ||
1357 | } | ||
1358 | |||
1247 | /* Start ageing timer and join group when device is brought up */ | 1359 | /* Start ageing timer and join group when device is brought up */ |
1248 | static int vxlan_open(struct net_device *dev) | 1360 | static int vxlan_open(struct net_device *dev) |
1249 | { | 1361 | { |
1250 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1362 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1251 | int err; | 1363 | struct vxlan_sock *vs = vxlan->vn_sock; |
1364 | |||
1365 | /* socket hasn't been created */ | ||
1366 | if (!vs) | ||
1367 | return -ENOTCONN; | ||
1252 | 1368 | ||
1253 | if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { | 1369 | if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { |
1254 | err = vxlan_join_group(dev); | 1370 | vxlan_sock_hold(vs); |
1255 | if (err) | 1371 | dev_hold(dev); |
1256 | return err; | 1372 | queue_work(vxlan_wq, &vxlan->igmp_work); |
1257 | } | 1373 | } |
1258 | 1374 | ||
1259 | if (vxlan->age_interval) | 1375 | if (vxlan->age_interval) |
@@ -1273,7 +1389,9 @@ static void vxlan_flush(struct vxlan_dev *vxlan) | |||
1273 | hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { | 1389 | hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { |
1274 | struct vxlan_fdb *f | 1390 | struct vxlan_fdb *f |
1275 | = container_of(p, struct vxlan_fdb, hlist); | 1391 | = container_of(p, struct vxlan_fdb, hlist); |
1276 | vxlan_fdb_destroy(vxlan, f); | 1392 | /* the all_zeros_mac entry is deleted at vxlan_uninit */ |
1393 | if (!is_zero_ether_addr(f->eth_addr)) | ||
1394 | vxlan_fdb_destroy(vxlan, f); | ||
1277 | } | 1395 | } |
1278 | } | 1396 | } |
1279 | spin_unlock_bh(&vxlan->hash_lock); | 1397 | spin_unlock_bh(&vxlan->hash_lock); |
@@ -1283,9 +1401,13 @@ static void vxlan_flush(struct vxlan_dev *vxlan) | |||
1283 | static int vxlan_stop(struct net_device *dev) | 1401 | static int vxlan_stop(struct net_device *dev) |
1284 | { | 1402 | { |
1285 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1403 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1404 | struct vxlan_sock *vs = vxlan->vn_sock; | ||
1286 | 1405 | ||
1287 | if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) | 1406 | if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { |
1288 | vxlan_leave_group(dev); | 1407 | vxlan_sock_hold(vs); |
1408 | dev_hold(dev); | ||
1409 | queue_work(vxlan_wq, &vxlan->igmp_work); | ||
1410 | } | ||
1289 | 1411 | ||
1290 | del_timer_sync(&vxlan->age_timer); | 1412 | del_timer_sync(&vxlan->age_timer); |
1291 | 1413 | ||
@@ -1301,6 +1423,7 @@ static void vxlan_set_multicast_list(struct net_device *dev) | |||
1301 | 1423 | ||
1302 | static const struct net_device_ops vxlan_netdev_ops = { | 1424 | static const struct net_device_ops vxlan_netdev_ops = { |
1303 | .ndo_init = vxlan_init, | 1425 | .ndo_init = vxlan_init, |
1426 | .ndo_uninit = vxlan_uninit, | ||
1304 | .ndo_open = vxlan_open, | 1427 | .ndo_open = vxlan_open, |
1305 | .ndo_stop = vxlan_stop, | 1428 | .ndo_stop = vxlan_stop, |
1306 | .ndo_start_xmit = vxlan_xmit, | 1429 | .ndo_start_xmit = vxlan_xmit, |
@@ -1319,12 +1442,6 @@ static struct device_type vxlan_type = { | |||
1319 | .name = "vxlan", | 1442 | .name = "vxlan", |
1320 | }; | 1443 | }; |
1321 | 1444 | ||
1322 | static void vxlan_free(struct net_device *dev) | ||
1323 | { | ||
1324 | free_percpu(dev->tstats); | ||
1325 | free_netdev(dev); | ||
1326 | } | ||
1327 | |||
1328 | /* Initialize the device structure. */ | 1445 | /* Initialize the device structure. */ |
1329 | static void vxlan_setup(struct net_device *dev) | 1446 | static void vxlan_setup(struct net_device *dev) |
1330 | { | 1447 | { |
@@ -1337,7 +1454,7 @@ static void vxlan_setup(struct net_device *dev) | |||
1337 | dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM; | 1454 | dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM; |
1338 | 1455 | ||
1339 | dev->netdev_ops = &vxlan_netdev_ops; | 1456 | dev->netdev_ops = &vxlan_netdev_ops; |
1340 | dev->destructor = vxlan_free; | 1457 | dev->destructor = free_netdev; |
1341 | SET_NETDEV_DEVTYPE(dev, &vxlan_type); | 1458 | SET_NETDEV_DEVTYPE(dev, &vxlan_type); |
1342 | 1459 | ||
1343 | dev->tx_queue_len = 0; | 1460 | dev->tx_queue_len = 0; |
@@ -1354,6 +1471,8 @@ static void vxlan_setup(struct net_device *dev) | |||
1354 | 1471 | ||
1355 | INIT_LIST_HEAD(&vxlan->next); | 1472 | INIT_LIST_HEAD(&vxlan->next); |
1356 | spin_lock_init(&vxlan->hash_lock); | 1473 | spin_lock_init(&vxlan->hash_lock); |
1474 | INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work); | ||
1475 | INIT_WORK(&vxlan->sock_work, vxlan_sock_work); | ||
1357 | 1476 | ||
1358 | init_timer_deferrable(&vxlan->age_timer); | 1477 | init_timer_deferrable(&vxlan->age_timer); |
1359 | vxlan->age_timer.function = vxlan_cleanup; | 1478 | vxlan->age_timer.function = vxlan_cleanup; |
@@ -1445,7 +1564,6 @@ static void vxlan_del_work(struct work_struct *work) | |||
1445 | kfree_rcu(vs, rcu); | 1564 | kfree_rcu(vs, rcu); |
1446 | } | 1565 | } |
1447 | 1566 | ||
1448 | /* Create new listen socket if needed */ | ||
1449 | static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) | 1567 | static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) |
1450 | { | 1568 | { |
1451 | struct vxlan_sock *vs; | 1569 | struct vxlan_sock *vs; |
@@ -1453,6 +1571,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) | |||
1453 | struct sockaddr_in vxlan_addr = { | 1571 | struct sockaddr_in vxlan_addr = { |
1454 | .sin_family = AF_INET, | 1572 | .sin_family = AF_INET, |
1455 | .sin_addr.s_addr = htonl(INADDR_ANY), | 1573 | .sin_addr.s_addr = htonl(INADDR_ANY), |
1574 | .sin_port = port, | ||
1456 | }; | 1575 | }; |
1457 | int rc; | 1576 | int rc; |
1458 | unsigned int h; | 1577 | unsigned int h; |
@@ -1478,8 +1597,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) | |||
1478 | sk = vs->sock->sk; | 1597 | sk = vs->sock->sk; |
1479 | sk_change_net(sk, net); | 1598 | sk_change_net(sk, net); |
1480 | 1599 | ||
1481 | vxlan_addr.sin_port = port; | ||
1482 | |||
1483 | rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, | 1600 | rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, |
1484 | sizeof(vxlan_addr)); | 1601 | sizeof(vxlan_addr)); |
1485 | if (rc < 0) { | 1602 | if (rc < 0) { |
@@ -1497,18 +1614,57 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) | |||
1497 | udp_sk(sk)->encap_type = 1; | 1614 | udp_sk(sk)->encap_type = 1; |
1498 | udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; | 1615 | udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; |
1499 | udp_encap_enable(); | 1616 | udp_encap_enable(); |
1617 | atomic_set(&vs->refcnt, 1); | ||
1500 | 1618 | ||
1501 | vs->refcnt = 1; | ||
1502 | return vs; | 1619 | return vs; |
1503 | } | 1620 | } |
1504 | 1621 | ||
1622 | /* Scheduled at device creation to bind to a socket */ | ||
1623 | static void vxlan_sock_work(struct work_struct *work) | ||
1624 | { | ||
1625 | struct vxlan_dev *vxlan | ||
1626 | = container_of(work, struct vxlan_dev, sock_work); | ||
1627 | struct net_device *dev = vxlan->dev; | ||
1628 | struct net *net = dev_net(dev); | ||
1629 | __u32 vni = vxlan->default_dst.remote_vni; | ||
1630 | __be16 port = vxlan->dst_port; | ||
1631 | struct vxlan_net *vn = net_generic(net, vxlan_net_id); | ||
1632 | struct vxlan_sock *nvs, *ovs; | ||
1633 | |||
1634 | nvs = vxlan_socket_create(net, port); | ||
1635 | if (IS_ERR(nvs)) { | ||
1636 | netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n", | ||
1637 | PTR_ERR(nvs)); | ||
1638 | goto out; | ||
1639 | } | ||
1640 | |||
1641 | spin_lock(&vn->sock_lock); | ||
1642 | /* Look again to see if can reuse socket */ | ||
1643 | ovs = vxlan_find_port(net, port); | ||
1644 | if (ovs) { | ||
1645 | atomic_inc(&ovs->refcnt); | ||
1646 | vxlan->vn_sock = ovs; | ||
1647 | hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni)); | ||
1648 | spin_unlock(&vn->sock_lock); | ||
1649 | |||
1650 | sk_release_kernel(nvs->sock->sk); | ||
1651 | kfree(nvs); | ||
1652 | } else { | ||
1653 | vxlan->vn_sock = nvs; | ||
1654 | hlist_add_head_rcu(&nvs->hlist, vs_head(net, port)); | ||
1655 | hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni)); | ||
1656 | spin_unlock(&vn->sock_lock); | ||
1657 | } | ||
1658 | out: | ||
1659 | dev_put(dev); | ||
1660 | } | ||
1661 | |||
1505 | static int vxlan_newlink(struct net *net, struct net_device *dev, | 1662 | static int vxlan_newlink(struct net *net, struct net_device *dev, |
1506 | struct nlattr *tb[], struct nlattr *data[]) | 1663 | struct nlattr *tb[], struct nlattr *data[]) |
1507 | { | 1664 | { |
1508 | struct vxlan_net *vn = net_generic(net, vxlan_net_id); | 1665 | struct vxlan_net *vn = net_generic(net, vxlan_net_id); |
1509 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1666 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1510 | struct vxlan_rdst *dst = &vxlan->default_dst; | 1667 | struct vxlan_rdst *dst = &vxlan->default_dst; |
1511 | struct vxlan_sock *vs; | ||
1512 | __u32 vni; | 1668 | __u32 vni; |
1513 | int err; | 1669 | int err; |
1514 | 1670 | ||
@@ -1586,36 +1742,25 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, | |||
1586 | return -EEXIST; | 1742 | return -EEXIST; |
1587 | } | 1743 | } |
1588 | 1744 | ||
1589 | vs = vxlan_find_port(net, vxlan->dst_port); | ||
1590 | if (vs) | ||
1591 | ++vs->refcnt; | ||
1592 | else { | ||
1593 | /* Drop lock because socket create acquires RTNL lock */ | ||
1594 | rtnl_unlock(); | ||
1595 | vs = vxlan_socket_create(net, vxlan->dst_port); | ||
1596 | rtnl_lock(); | ||
1597 | if (IS_ERR(vs)) | ||
1598 | return PTR_ERR(vs); | ||
1599 | |||
1600 | hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port)); | ||
1601 | } | ||
1602 | vxlan->vn_sock = vs; | ||
1603 | |||
1604 | SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); | 1745 | SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); |
1605 | 1746 | ||
1747 | /* create an fdb entry for default destination */ | ||
1748 | err = vxlan_fdb_create(vxlan, all_zeros_mac, | ||
1749 | vxlan->default_dst.remote_ip, | ||
1750 | NUD_REACHABLE|NUD_PERMANENT, | ||
1751 | NLM_F_EXCL|NLM_F_CREATE, | ||
1752 | vxlan->dst_port, vxlan->default_dst.remote_vni, | ||
1753 | vxlan->default_dst.remote_ifindex, NTF_SELF); | ||
1754 | if (err) | ||
1755 | return err; | ||
1756 | |||
1606 | err = register_netdevice(dev); | 1757 | err = register_netdevice(dev); |
1607 | if (err) { | 1758 | if (err) { |
1608 | if (--vs->refcnt == 0) { | 1759 | vxlan_fdb_delete_defualt(vxlan); |
1609 | rtnl_unlock(); | ||
1610 | sk_release_kernel(vs->sock->sk); | ||
1611 | kfree(vs); | ||
1612 | rtnl_lock(); | ||
1613 | } | ||
1614 | return err; | 1760 | return err; |
1615 | } | 1761 | } |
1616 | 1762 | ||
1617 | list_add(&vxlan->next, &vn->vxlan_list); | 1763 | list_add(&vxlan->next, &vn->vxlan_list); |
1618 | hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); | ||
1619 | 1764 | ||
1620 | return 0; | 1765 | return 0; |
1621 | } | 1766 | } |
@@ -1623,16 +1768,10 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, | |||
1623 | static void vxlan_dellink(struct net_device *dev, struct list_head *head) | 1768 | static void vxlan_dellink(struct net_device *dev, struct list_head *head) |
1624 | { | 1769 | { |
1625 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1770 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1626 | struct vxlan_sock *vs = vxlan->vn_sock; | ||
1627 | 1771 | ||
1628 | hlist_del_rcu(&vxlan->hlist); | 1772 | hlist_del_rcu(&vxlan->hlist); |
1629 | list_del(&vxlan->next); | 1773 | list_del(&vxlan->next); |
1630 | unregister_netdevice_queue(dev, head); | 1774 | unregister_netdevice_queue(dev, head); |
1631 | |||
1632 | if (--vs->refcnt == 0) { | ||
1633 | hlist_del_rcu(&vs->hlist); | ||
1634 | schedule_work(&vs->del_work); | ||
1635 | } | ||
1636 | } | 1775 | } |
1637 | 1776 | ||
1638 | static size_t vxlan_get_size(const struct net_device *dev) | 1777 | static size_t vxlan_get_size(const struct net_device *dev) |
@@ -1721,6 +1860,7 @@ static __net_init int vxlan_init_net(struct net *net) | |||
1721 | unsigned int h; | 1860 | unsigned int h; |
1722 | 1861 | ||
1723 | INIT_LIST_HEAD(&vn->vxlan_list); | 1862 | INIT_LIST_HEAD(&vn->vxlan_list); |
1863 | spin_lock_init(&vn->sock_lock); | ||
1724 | 1864 | ||
1725 | for (h = 0; h < PORT_HASH_SIZE; ++h) | 1865 | for (h = 0; h < PORT_HASH_SIZE; ++h) |
1726 | INIT_HLIST_HEAD(&vn->sock_list[h]); | 1866 | INIT_HLIST_HEAD(&vn->sock_list[h]); |
@@ -1750,6 +1890,10 @@ static int __init vxlan_init_module(void) | |||
1750 | { | 1890 | { |
1751 | int rc; | 1891 | int rc; |
1752 | 1892 | ||
1893 | vxlan_wq = alloc_workqueue("vxlan", 0, 0); | ||
1894 | if (!vxlan_wq) | ||
1895 | return -ENOMEM; | ||
1896 | |||
1753 | get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); | 1897 | get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); |
1754 | 1898 | ||
1755 | rc = register_pernet_device(&vxlan_net_ops); | 1899 | rc = register_pernet_device(&vxlan_net_ops); |
@@ -1765,14 +1909,16 @@ static int __init vxlan_init_module(void) | |||
1765 | out2: | 1909 | out2: |
1766 | unregister_pernet_device(&vxlan_net_ops); | 1910 | unregister_pernet_device(&vxlan_net_ops); |
1767 | out1: | 1911 | out1: |
1912 | destroy_workqueue(vxlan_wq); | ||
1768 | return rc; | 1913 | return rc; |
1769 | } | 1914 | } |
1770 | late_initcall(vxlan_init_module); | 1915 | late_initcall(vxlan_init_module); |
1771 | 1916 | ||
1772 | static void __exit vxlan_cleanup_module(void) | 1917 | static void __exit vxlan_cleanup_module(void) |
1773 | { | 1918 | { |
1774 | rtnl_link_unregister(&vxlan_link_ops); | ||
1775 | unregister_pernet_device(&vxlan_net_ops); | 1919 | unregister_pernet_device(&vxlan_net_ops); |
1920 | rtnl_link_unregister(&vxlan_link_ops); | ||
1921 | destroy_workqueue(vxlan_wq); | ||
1776 | rcu_barrier(); | 1922 | rcu_barrier(); |
1777 | } | 1923 | } |
1778 | module_exit(vxlan_cleanup_module); | 1924 | module_exit(vxlan_cleanup_module); |
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ebfa4443c69b..60aca9109a50 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c | |||
@@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], | |||
707 | } | 707 | } |
708 | } | 708 | } |
709 | 709 | ||
710 | if (is_zero_ether_addr(addr)) { | ||
711 | pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); | ||
712 | return -EINVAL; | ||
713 | } | ||
714 | |||
710 | p = br_port_get_rtnl(dev); | 715 | p = br_port_get_rtnl(dev); |
711 | if (p == NULL) { | 716 | if (p == NULL) { |
712 | pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", | 717 | pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9007533867f0..3de740834d1f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -2109,10 +2109,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
2109 | } | 2109 | } |
2110 | 2110 | ||
2111 | addr = nla_data(tb[NDA_LLADDR]); | 2111 | addr = nla_data(tb[NDA_LLADDR]); |
2112 | if (is_zero_ether_addr(addr)) { | ||
2113 | pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); | ||
2114 | return -EINVAL; | ||
2115 | } | ||
2116 | 2112 | ||
2117 | err = -EOPNOTSUPP; | 2113 | err = -EOPNOTSUPP; |
2118 | 2114 | ||
@@ -2210,10 +2206,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
2210 | } | 2206 | } |
2211 | 2207 | ||
2212 | addr = nla_data(tb[NDA_LLADDR]); | 2208 | addr = nla_data(tb[NDA_LLADDR]); |
2213 | if (is_zero_ether_addr(addr)) { | ||
2214 | pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); | ||
2215 | return -EINVAL; | ||
2216 | } | ||
2217 | 2209 | ||
2218 | err = -EOPNOTSUPP; | 2210 | err = -EOPNOTSUPP; |
2219 | 2211 | ||