aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <stephen@networkplumber.org>2013-06-25 20:02:49 -0400
committerStephen Hemminger <stephen@networkplumber.org>2013-06-25 20:02:49 -0400
commit3f5d6af0948a33a58001182de9cbb6b3e674ea14 (patch)
tree73a4ccde21718f3890f594d8622df73495a43853
parent8599b52e14a1611dcb563289421bee76751f1d53 (diff)
parent537f7f8494be4219eb0ef47121ea16a6f9f0f49e (diff)
Merge ../vxlan-x
-rw-r--r--drivers/net/vxlan.c562
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/core/rtnetlink.c8
3 files changed, 359 insertions, 216 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 284c6c00c353..3e75f9726c33 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -68,24 +68,26 @@ struct vxlanhdr {
68 68
69/* UDP port for VXLAN traffic. 69/* UDP port for VXLAN traffic.
70 * The IANA assigned port is 4789, but the Linux default is 8472 70 * The IANA assigned port is 4789, but the Linux default is 8472
71 * for compatability with early adopters. 71 * for compatibility with early adopters.
72 */ 72 */
73static unsigned int vxlan_port __read_mostly = 8472; 73static unsigned short vxlan_port __read_mostly = 8472;
74module_param_named(udp_port, vxlan_port, uint, 0444); 74module_param_named(udp_port, vxlan_port, ushort, 0444);
75MODULE_PARM_DESC(udp_port, "Destination UDP port"); 75MODULE_PARM_DESC(udp_port, "Destination UDP port");
76 76
77static bool log_ecn_error = true; 77static bool log_ecn_error = true;
78module_param(log_ecn_error, bool, 0644); 78module_param(log_ecn_error, bool, 0644);
79MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 79MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
80 80
81static unsigned int vxlan_net_id; 81static int vxlan_net_id;
82
83static const u8 all_zeros_mac[ETH_ALEN];
82 84
83/* per UDP socket information */ 85/* per UDP socket information */
84struct vxlan_sock { 86struct vxlan_sock {
85 struct hlist_node hlist; 87 struct hlist_node hlist;
86 struct rcu_head rcu; 88 struct rcu_head rcu;
87 struct work_struct del_work; 89 struct work_struct del_work;
88 unsigned int refcnt; 90 atomic_t refcnt;
89 struct socket *sock; 91 struct socket *sock;
90 struct hlist_head vni_list[VNI_HASH_SIZE]; 92 struct hlist_head vni_list[VNI_HASH_SIZE];
91}; 93};
@@ -94,6 +96,7 @@ struct vxlan_sock {
94struct vxlan_net { 96struct vxlan_net {
95 struct list_head vxlan_list; 97 struct list_head vxlan_list;
96 struct hlist_head sock_list[PORT_HASH_SIZE]; 98 struct hlist_head sock_list[PORT_HASH_SIZE];
99 spinlock_t sock_lock;
97}; 100};
98 101
99struct vxlan_rdst { 102struct vxlan_rdst {
@@ -101,7 +104,8 @@ struct vxlan_rdst {
101 __be16 remote_port; 104 __be16 remote_port;
102 u32 remote_vni; 105 u32 remote_vni;
103 u32 remote_ifindex; 106 u32 remote_ifindex;
104 struct vxlan_rdst *remote_next; 107 struct list_head list;
108 struct rcu_head rcu;
105}; 109};
106 110
107/* Forwarding table entry */ 111/* Forwarding table entry */
@@ -110,7 +114,7 @@ struct vxlan_fdb {
110 struct rcu_head rcu; 114 struct rcu_head rcu;
111 unsigned long updated; /* jiffies */ 115 unsigned long updated; /* jiffies */
112 unsigned long used; 116 unsigned long used;
113 struct vxlan_rdst remote; 117 struct list_head remotes;
114 u16 state; /* see ndm_state */ 118 u16 state; /* see ndm_state */
115 u8 flags; /* see ndm_flags */ 119 u8 flags; /* see ndm_flags */
116 u8 eth_addr[ETH_ALEN]; 120 u8 eth_addr[ETH_ALEN];
@@ -131,6 +135,9 @@ struct vxlan_dev {
131 __u8 ttl; 135 __u8 ttl;
132 u32 flags; /* VXLAN_F_* below */ 136 u32 flags; /* VXLAN_F_* below */
133 137
138 struct work_struct sock_work;
139 struct work_struct igmp_work;
140
134 unsigned long age_interval; 141 unsigned long age_interval;
135 struct timer_list age_timer; 142 struct timer_list age_timer;
136 spinlock_t hash_lock; 143 spinlock_t hash_lock;
@@ -148,6 +155,9 @@ struct vxlan_dev {
148 155
149/* salt for hash table */ 156/* salt for hash table */
150static u32 vxlan_salt __read_mostly; 157static u32 vxlan_salt __read_mostly;
158static struct workqueue_struct *vxlan_wq;
159
160static void vxlan_sock_work(struct work_struct *work);
151 161
152/* Virtual Network hash table head */ 162/* Virtual Network hash table head */
153static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id) 163static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
@@ -163,6 +173,14 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
163 return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; 173 return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
164} 174}
165 175
176/* First remote destination for a forwarding entry.
177 * Guaranteed to be non-NULL because remotes are never deleted.
178 */
179static inline struct vxlan_rdst *first_remote(struct vxlan_fdb *fdb)
180{
181 return list_first_or_null_rcu(&fdb->remotes, struct vxlan_rdst, list);
182}
183
166/* Find VXLAN socket based on network namespace and UDP port */ 184/* Find VXLAN socket based on network namespace and UDP port */
167static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port) 185static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
168{ 186{
@@ -195,9 +213,9 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
195 213
196/* Fill in neighbour message in skbuff. */ 214/* Fill in neighbour message in skbuff. */
197static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, 215static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
198 const struct vxlan_fdb *fdb, 216 const struct vxlan_fdb *fdb,
199 u32 portid, u32 seq, int type, unsigned int flags, 217 u32 portid, u32 seq, int type, unsigned int flags,
200 const struct vxlan_rdst *rdst) 218 const struct vxlan_rdst *rdst)
201{ 219{
202 unsigned long now = jiffies; 220 unsigned long now = jiffies;
203 struct nda_cacheinfo ci; 221 struct nda_cacheinfo ci;
@@ -235,7 +253,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
235 nla_put_be16(skb, NDA_PORT, rdst->remote_port)) 253 nla_put_be16(skb, NDA_PORT, rdst->remote_port))
236 goto nla_put_failure; 254 goto nla_put_failure;
237 if (rdst->remote_vni != vxlan->default_dst.remote_vni && 255 if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
238 nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) 256 nla_put_u32(skb, NDA_VNI, rdst->remote_vni))
239 goto nla_put_failure; 257 goto nla_put_failure;
240 if (rdst->remote_ifindex && 258 if (rdst->remote_ifindex &&
241 nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) 259 nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
@@ -268,7 +286,7 @@ static inline size_t vxlan_nlmsg_size(void)
268} 286}
269 287
270static void vxlan_fdb_notify(struct vxlan_dev *vxlan, 288static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
271 const struct vxlan_fdb *fdb, int type) 289 struct vxlan_fdb *fdb, int type)
272{ 290{
273 struct net *net = dev_net(vxlan->dev); 291 struct net *net = dev_net(vxlan->dev);
274 struct sk_buff *skb; 292 struct sk_buff *skb;
@@ -278,7 +296,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
278 if (skb == NULL) 296 if (skb == NULL)
279 goto errout; 297 goto errout;
280 298
281 err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote); 299 err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, first_remote(fdb));
282 if (err < 0) { 300 if (err < 0) {
283 /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */ 301 /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
284 WARN_ON(err == -EMSGSIZE); 302 WARN_ON(err == -EMSGSIZE);
@@ -296,22 +314,27 @@ errout:
296static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) 314static void vxlan_ip_miss(struct net_device *dev, __be32 ipa)
297{ 315{
298 struct vxlan_dev *vxlan = netdev_priv(dev); 316 struct vxlan_dev *vxlan = netdev_priv(dev);
299 struct vxlan_fdb f; 317 struct vxlan_fdb f = {
318 .state = NUD_STALE,
319 };
320 struct vxlan_rdst remote = {
321 .remote_ip = ipa, /* goes to NDA_DST */
322 .remote_vni = VXLAN_N_VID,
323 };
300 324
301 memset(&f, 0, sizeof f); 325 INIT_LIST_HEAD(&f.remotes);
302 f.state = NUD_STALE; 326 list_add_rcu(&remote.list, &f.remotes);
303 f.remote.remote_ip = ipa; /* goes to NDA_DST */
304 f.remote.remote_vni = VXLAN_N_VID;
305 327
306 vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); 328 vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
307} 329}
308 330
309static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) 331static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
310{ 332{
311 struct vxlan_fdb f; 333 struct vxlan_fdb f = {
334 .state = NUD_STALE,
335 };
312 336
313 memset(&f, 0, sizeof f); 337 INIT_LIST_HEAD(&f.remotes);
314 f.state = NUD_STALE;
315 memcpy(f.eth_addr, eth_addr, ETH_ALEN); 338 memcpy(f.eth_addr, eth_addr, ETH_ALEN);
316 339
317 vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); 340 vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
@@ -366,21 +389,34 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
366 return f; 389 return f;
367} 390}
368 391
369/* Add/update destinations for multicast */ 392/* caller should hold vxlan->hash_lock */
370static int vxlan_fdb_append(struct vxlan_fdb *f, 393static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
371 __be32 ip, __be16 port, __u32 vni, __u32 ifindex) 394 __be32 ip, __be16 port,
395 __u32 vni, __u32 ifindex)
372{ 396{
373 struct vxlan_rdst *rd_prev, *rd; 397 struct vxlan_rdst *rd;
374 398
375 rd_prev = NULL; 399 list_for_each_entry(rd, &f->remotes, list) {
376 for (rd = &f->remote; rd; rd = rd->remote_next) {
377 if (rd->remote_ip == ip && 400 if (rd->remote_ip == ip &&
378 rd->remote_port == port && 401 rd->remote_port == port &&
379 rd->remote_vni == vni && 402 rd->remote_vni == vni &&
380 rd->remote_ifindex == ifindex) 403 rd->remote_ifindex == ifindex)
381 return 0; 404 return rd;
382 rd_prev = rd;
383 } 405 }
406
407 return NULL;
408}
409
410/* Add/update destinations for multicast */
411static int vxlan_fdb_append(struct vxlan_fdb *f,
412 __be32 ip, __be16 port, __u32 vni, __u32 ifindex)
413{
414 struct vxlan_rdst *rd;
415
416 rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
417 if (rd)
418 return 0;
419
384 rd = kmalloc(sizeof(*rd), GFP_ATOMIC); 420 rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
385 if (rd == NULL) 421 if (rd == NULL)
386 return -ENOBUFS; 422 return -ENOBUFS;
@@ -388,8 +424,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
388 rd->remote_port = port; 424 rd->remote_port = port;
389 rd->remote_vni = vni; 425 rd->remote_vni = vni;
390 rd->remote_ifindex = ifindex; 426 rd->remote_ifindex = ifindex;
391 rd->remote_next = NULL; 427
392 rd_prev->remote_next = rd; 428 list_add_tail_rcu(&rd->list, &f->remotes);
429
393 return 1; 430 return 1;
394} 431}
395 432
@@ -421,7 +458,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
421 notify = 1; 458 notify = 1;
422 } 459 }
423 if ((flags & NLM_F_APPEND) && 460 if ((flags & NLM_F_APPEND) &&
424 is_multicast_ether_addr(f->eth_addr)) { 461 (is_multicast_ether_addr(f->eth_addr) ||
462 is_zero_ether_addr(f->eth_addr))) {
425 int rc = vxlan_fdb_append(f, ip, port, vni, ifindex); 463 int rc = vxlan_fdb_append(f, ip, port, vni, ifindex);
426 464
427 if (rc < 0) 465 if (rc < 0)
@@ -441,16 +479,14 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
441 return -ENOMEM; 479 return -ENOMEM;
442 480
443 notify = 1; 481 notify = 1;
444 f->remote.remote_ip = ip;
445 f->remote.remote_port = port;
446 f->remote.remote_vni = vni;
447 f->remote.remote_ifindex = ifindex;
448 f->remote.remote_next = NULL;
449 f->state = state; 482 f->state = state;
450 f->flags = ndm_flags; 483 f->flags = ndm_flags;
451 f->updated = f->used = jiffies; 484 f->updated = f->used = jiffies;
485 INIT_LIST_HEAD(&f->remotes);
452 memcpy(f->eth_addr, mac, ETH_ALEN); 486 memcpy(f->eth_addr, mac, ETH_ALEN);
453 487
488 vxlan_fdb_append(f, ip, port, vni, ifindex);
489
454 ++vxlan->addrcnt; 490 ++vxlan->addrcnt;
455 hlist_add_head_rcu(&f->hlist, 491 hlist_add_head_rcu(&f->hlist,
456 vxlan_fdb_head(vxlan, mac)); 492 vxlan_fdb_head(vxlan, mac));
@@ -462,16 +498,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
462 return 0; 498 return 0;
463} 499}
464 500
501static void vxlan_fdb_free_rdst(struct rcu_head *head)
502{
503 struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
504 kfree(rd);
505}
506
465static void vxlan_fdb_free(struct rcu_head *head) 507static void vxlan_fdb_free(struct rcu_head *head)
466{ 508{
467 struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu); 509 struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
510 struct vxlan_rdst *rd, *nd;
468 511
469 while (f->remote.remote_next) { 512 list_for_each_entry_safe(rd, nd, &f->remotes, list)
470 struct vxlan_rdst *rd = f->remote.remote_next;
471
472 f->remote.remote_next = rd->remote_next;
473 kfree(rd); 513 kfree(rd);
474 }
475 kfree(f); 514 kfree(f);
476} 515}
477 516
@@ -487,58 +526,77 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
487 call_rcu(&f->rcu, vxlan_fdb_free); 526 call_rcu(&f->rcu, vxlan_fdb_free);
488} 527}
489 528
490/* Add static entry (via netlink) */ 529static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
491static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 530 __be32 *ip, __be16 *port, u32 *vni, u32 *ifindex)
492 struct net_device *dev,
493 const unsigned char *addr, u16 flags)
494{ 531{
495 struct vxlan_dev *vxlan = netdev_priv(dev);
496 struct net *net = dev_net(vxlan->dev); 532 struct net *net = dev_net(vxlan->dev);
497 __be32 ip;
498 __be16 port;
499 u32 vni, ifindex;
500 int err;
501
502 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
503 pr_info("RTM_NEWNEIGH with invalid state %#x\n",
504 ndm->ndm_state);
505 return -EINVAL;
506 }
507
508 if (tb[NDA_DST] == NULL)
509 return -EINVAL;
510 533
511 if (nla_len(tb[NDA_DST]) != sizeof(__be32)) 534 if (tb[NDA_DST]) {
512 return -EAFNOSUPPORT; 535 if (nla_len(tb[NDA_DST]) != sizeof(__be32))
536 return -EAFNOSUPPORT;
513 537
514 ip = nla_get_be32(tb[NDA_DST]); 538 *ip = nla_get_be32(tb[NDA_DST]);
539 } else {
540 *ip = htonl(INADDR_ANY);
541 }
515 542
516 if (tb[NDA_PORT]) { 543 if (tb[NDA_PORT]) {
517 if (nla_len(tb[NDA_PORT]) != sizeof(__be16)) 544 if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
518 return -EINVAL; 545 return -EINVAL;
519 port = nla_get_be16(tb[NDA_PORT]); 546 *port = nla_get_be16(tb[NDA_PORT]);
520 } else 547 } else {
521 port = vxlan->dst_port; 548 *port = vxlan->dst_port;
549 }
522 550
523 if (tb[NDA_VNI]) { 551 if (tb[NDA_VNI]) {
524 if (nla_len(tb[NDA_VNI]) != sizeof(u32)) 552 if (nla_len(tb[NDA_VNI]) != sizeof(u32))
525 return -EINVAL; 553 return -EINVAL;
526 vni = nla_get_u32(tb[NDA_VNI]); 554 *vni = nla_get_u32(tb[NDA_VNI]);
527 } else 555 } else {
528 vni = vxlan->default_dst.remote_vni; 556 *vni = vxlan->default_dst.remote_vni;
557 }
529 558
530 if (tb[NDA_IFINDEX]) { 559 if (tb[NDA_IFINDEX]) {
531 struct net_device *tdev; 560 struct net_device *tdev;
532 561
533 if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32)) 562 if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
534 return -EINVAL; 563 return -EINVAL;
535 ifindex = nla_get_u32(tb[NDA_IFINDEX]); 564 *ifindex = nla_get_u32(tb[NDA_IFINDEX]);
536 tdev = dev_get_by_index(net, ifindex); 565 tdev = dev_get_by_index(net, *ifindex);
537 if (!tdev) 566 if (!tdev)
538 return -EADDRNOTAVAIL; 567 return -EADDRNOTAVAIL;
539 dev_put(tdev); 568 dev_put(tdev);
540 } else 569 } else {
541 ifindex = 0; 570 *ifindex = 0;
571 }
572
573 return 0;
574}
575
576/* Add static entry (via netlink) */
577static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
578 struct net_device *dev,
579 const unsigned char *addr, u16 flags)
580{
581 struct vxlan_dev *vxlan = netdev_priv(dev);
582 /* struct net *net = dev_net(vxlan->dev); */
583 __be32 ip;
584 __be16 port;
585 u32 vni, ifindex;
586 int err;
587
588 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
589 pr_info("RTM_NEWNEIGH with invalid state %#x\n",
590 ndm->ndm_state);
591 return -EINVAL;
592 }
593
594 if (tb[NDA_DST] == NULL)
595 return -EINVAL;
596
597 err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
598 if (err)
599 return err;
542 600
543 spin_lock_bh(&vxlan->hash_lock); 601 spin_lock_bh(&vxlan->hash_lock);
544 err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, 602 err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags,
@@ -555,14 +613,43 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
555{ 613{
556 struct vxlan_dev *vxlan = netdev_priv(dev); 614 struct vxlan_dev *vxlan = netdev_priv(dev);
557 struct vxlan_fdb *f; 615 struct vxlan_fdb *f;
558 int err = -ENOENT; 616 struct vxlan_rdst *rd = NULL;
617 __be32 ip;
618 __be16 port;
619 u32 vni, ifindex;
620 int err;
621
622 err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
623 if (err)
624 return err;
625
626 err = -ENOENT;
559 627
560 spin_lock_bh(&vxlan->hash_lock); 628 spin_lock_bh(&vxlan->hash_lock);
561 f = vxlan_find_mac(vxlan, addr); 629 f = vxlan_find_mac(vxlan, addr);
562 if (f) { 630 if (!f)
563 vxlan_fdb_destroy(vxlan, f); 631 goto out;
564 err = 0; 632
633 if (ip != htonl(INADDR_ANY)) {
634 rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
635 if (!rd)
636 goto out;
565 } 637 }
638
639 err = 0;
640
641 /* remove a destination if it's not the only one on the list,
642 * otherwise destroy the fdb entry
643 */
644 if (rd && !list_is_singular(&f->remotes)) {
645 list_del_rcu(&rd->list);
646 call_rcu(&rd->rcu, vxlan_fdb_free_rdst);
647 goto out;
648 }
649
650 vxlan_fdb_destroy(vxlan, f);
651
652out:
566 spin_unlock_bh(&vxlan->hash_lock); 653 spin_unlock_bh(&vxlan->hash_lock);
567 654
568 return err; 655 return err;
@@ -581,23 +668,24 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
581 668
582 hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { 669 hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
583 struct vxlan_rdst *rd; 670 struct vxlan_rdst *rd;
584 for (rd = &f->remote; rd; rd = rd->remote_next) {
585 if (idx < cb->args[0])
586 goto skip;
587 671
672 if (idx < cb->args[0])
673 goto skip;
674
675 list_for_each_entry_rcu(rd, &f->remotes, list) {
588 err = vxlan_fdb_info(skb, vxlan, f, 676 err = vxlan_fdb_info(skb, vxlan, f,
589 NETLINK_CB(cb->skb).portid, 677 NETLINK_CB(cb->skb).portid,
590 cb->nlh->nlmsg_seq, 678 cb->nlh->nlmsg_seq,
591 RTM_NEWNEIGH, 679 RTM_NEWNEIGH,
592 NLM_F_MULTI, rd); 680 NLM_F_MULTI, rd);
593 if (err < 0) 681 if (err < 0)
594 break; 682 goto out;
595skip:
596 ++idx;
597 } 683 }
684skip:
685 ++idx;
598 } 686 }
599 } 687 }
600 688out:
601 return idx; 689 return idx;
602} 690}
603 691
@@ -613,7 +701,9 @@ static bool vxlan_snoop(struct net_device *dev,
613 701
614 f = vxlan_find_mac(vxlan, src_mac); 702 f = vxlan_find_mac(vxlan, src_mac);
615 if (likely(f)) { 703 if (likely(f)) {
616 if (likely(f->remote.remote_ip == src_ip)) 704 struct vxlan_rdst *rdst = first_remote(f);
705
706 if (likely(rdst->remote_ip == src_ip))
617 return false; 707 return false;
618 708
619 /* Don't migrate static entries, drop packets */ 709 /* Don't migrate static entries, drop packets */
@@ -623,10 +713,11 @@ static bool vxlan_snoop(struct net_device *dev,
623 if (net_ratelimit()) 713 if (net_ratelimit())
624 netdev_info(dev, 714 netdev_info(dev,
625 "%pM migrated from %pI4 to %pI4\n", 715 "%pM migrated from %pI4 to %pI4\n",
626 src_mac, &f->remote.remote_ip, &src_ip); 716 src_mac, &rdst->remote_ip, &src_ip);
627 717
628 f->remote.remote_ip = src_ip; 718 rdst->remote_ip = src_ip;
629 f->updated = jiffies; 719 f->updated = jiffies;
720 vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH);
630 } else { 721 } else {
631 /* learned new entry */ 722 /* learned new entry */
632 spin_lock(&vxlan->hash_lock); 723 spin_lock(&vxlan->hash_lock);
@@ -647,76 +738,61 @@ static bool vxlan_snoop(struct net_device *dev,
647 738
648 739
649/* See if multicast group is already in use by other ID */ 740/* See if multicast group is already in use by other ID */
650static bool vxlan_group_used(struct vxlan_net *vn, 741static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip)
651 const struct vxlan_dev *this)
652{ 742{
653 struct vxlan_dev *vxlan; 743 struct vxlan_dev *vxlan;
654 744
655 list_for_each_entry(vxlan, &vn->vxlan_list, next) { 745 list_for_each_entry(vxlan, &vn->vxlan_list, next) {
656 if (vxlan == this)
657 continue;
658
659 if (!netif_running(vxlan->dev)) 746 if (!netif_running(vxlan->dev))
660 continue; 747 continue;
661 748
662 if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip) 749 if (vxlan->default_dst.remote_ip == remote_ip)
663 return true; 750 return true;
664 } 751 }
665 752
666 return false; 753 return false;
667} 754}
668 755
669/* kernel equivalent to IP_ADD_MEMBERSHIP */ 756static void vxlan_sock_hold(struct vxlan_sock *vs)
670static int vxlan_join_group(struct net_device *dev)
671{ 757{
672 struct vxlan_dev *vxlan = netdev_priv(dev); 758 atomic_inc(&vs->refcnt);
673 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 759}
674 struct sock *sk = vxlan->vn_sock->sock->sk;
675 struct ip_mreqn mreq = {
676 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip,
677 .imr_ifindex = vxlan->default_dst.remote_ifindex,
678 };
679 int err;
680 760
681 /* Already a member of group */ 761static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
682 if (vxlan_group_used(vn, vxlan)) 762{
683 return 0; 763 if (!atomic_dec_and_test(&vs->refcnt))
764 return;
684 765
685 /* Need to drop RTNL to call multicast join */ 766 spin_lock(&vn->sock_lock);
686 rtnl_unlock(); 767 hlist_del_rcu(&vs->hlist);
687 lock_sock(sk); 768 spin_unlock(&vn->sock_lock);
688 err = ip_mc_join_group(sk, &mreq);
689 release_sock(sk);
690 rtnl_lock();
691 769
692 return err; 770 queue_work(vxlan_wq, &vs->del_work);
693} 771}
694 772
695 773/* Callback to update multicast group membership.
696/* kernel equivalent to IP_DROP_MEMBERSHIP */ 774 * Scheduled when vxlan goes up/down.
697static int vxlan_leave_group(struct net_device *dev) 775 */
776static void vxlan_igmp_work(struct work_struct *work)
698{ 777{
699 struct vxlan_dev *vxlan = netdev_priv(dev); 778 struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_work);
700 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 779 struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
701 int err = 0; 780 struct vxlan_sock *vs = vxlan->vn_sock;
702 struct sock *sk = vxlan->vn_sock->sock->sk; 781 struct sock *sk = vs->sock->sk;
703 struct ip_mreqn mreq = { 782 struct ip_mreqn mreq = {
704 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, 783 .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip,
705 .imr_ifindex = vxlan->default_dst.remote_ifindex, 784 .imr_ifindex = vxlan->default_dst.remote_ifindex,
706 }; 785 };
707 786
708 /* Only leave group when last vxlan is done. */
709 if (vxlan_group_used(vn, vxlan))
710 return 0;
711
712 /* Need to drop RTNL to call multicast leave */
713 rtnl_unlock();
714 lock_sock(sk); 787 lock_sock(sk);
715 err = ip_mc_leave_group(sk, &mreq); 788 if (vxlan_group_used(vn, vxlan->default_dst.remote_ip))
789 ip_mc_join_group(sk, &mreq);
790 else
791 ip_mc_leave_group(sk, &mreq);
716 release_sock(sk); 792 release_sock(sk);
717 rtnl_lock();
718 793
719 return err; 794 vxlan_sock_release(vn, vs);
795 dev_put(vxlan->dev);
720} 796}
721 797
722/* Callback from net/ipv4/udp.c to receive packets */ 798/* Callback from net/ipv4/udp.c to receive packets */
@@ -873,7 +949,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
873 } 949 }
874 950
875 f = vxlan_find_mac(vxlan, n->ha); 951 f = vxlan_find_mac(vxlan, n->ha);
876 if (f && f->remote.remote_ip == htonl(INADDR_ANY)) { 952 if (f && first_remote(f)->remote_ip == htonl(INADDR_ANY)) {
877 /* bridge-local neighbor */ 953 /* bridge-local neighbor */
878 neigh_release(n); 954 neigh_release(n);
879 goto out; 955 goto out;
@@ -1015,8 +1091,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
1015 } 1091 }
1016} 1092}
1017 1093
1018static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, 1094static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1019 struct vxlan_rdst *rdst, bool did_rsc) 1095 struct vxlan_rdst *rdst, bool did_rsc)
1020{ 1096{
1021 struct vxlan_dev *vxlan = netdev_priv(dev); 1097 struct vxlan_dev *vxlan = netdev_priv(dev);
1022 struct rtable *rt; 1098 struct rtable *rt;
@@ -1026,7 +1102,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1026 struct flowi4 fl4; 1102 struct flowi4 fl4;
1027 __be32 dst; 1103 __be32 dst;
1028 __be16 src_port, dst_port; 1104 __be16 src_port, dst_port;
1029 u32 vni; 1105 u32 vni;
1030 __be16 df = 0; 1106 __be16 df = 0;
1031 __u8 tos, ttl; 1107 __u8 tos, ttl;
1032 int err; 1108 int err;
@@ -1039,7 +1115,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1039 if (did_rsc) { 1115 if (did_rsc) {
1040 /* short-circuited back to local bridge */ 1116 /* short-circuited back to local bridge */
1041 vxlan_encap_bypass(skb, vxlan, vxlan); 1117 vxlan_encap_bypass(skb, vxlan, vxlan);
1042 return NETDEV_TX_OK; 1118 return;
1043 } 1119 }
1044 goto drop; 1120 goto drop;
1045 } 1121 }
@@ -1095,7 +1171,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1095 if (!dst_vxlan) 1171 if (!dst_vxlan)
1096 goto tx_error; 1172 goto tx_error;
1097 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1173 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
1098 return NETDEV_TX_OK; 1174 return;
1099 } 1175 }
1100 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1176 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
1101 vxh->vx_flags = htonl(VXLAN_FLAGS); 1177 vxh->vx_flags = htonl(VXLAN_FLAGS);
@@ -1123,7 +1199,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1123 IPPROTO_UDP, tos, ttl, df); 1199 IPPROTO_UDP, tos, ttl, df);
1124 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 1200 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
1125 1201
1126 return NETDEV_TX_OK; 1202 return;
1127 1203
1128drop: 1204drop:
1129 dev->stats.tx_dropped++; 1205 dev->stats.tx_dropped++;
@@ -1133,7 +1209,6 @@ tx_error:
1133 dev->stats.tx_errors++; 1209 dev->stats.tx_errors++;
1134tx_free: 1210tx_free:
1135 dev_kfree_skb(skb); 1211 dev_kfree_skb(skb);
1136 return NETDEV_TX_OK;
1137} 1212}
1138 1213
1139/* Transmit local packets over Vxlan 1214/* Transmit local packets over Vxlan
@@ -1147,9 +1222,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
1147 struct vxlan_dev *vxlan = netdev_priv(dev); 1222 struct vxlan_dev *vxlan = netdev_priv(dev);
1148 struct ethhdr *eth; 1223 struct ethhdr *eth;
1149 bool did_rsc = false; 1224 bool did_rsc = false;
1150 struct vxlan_rdst *rdst0, *rdst; 1225 struct vxlan_rdst *rdst;
1151 struct vxlan_fdb *f; 1226 struct vxlan_fdb *f;
1152 int rc1, rc;
1153 1227
1154 skb_reset_mac_header(skb); 1228 skb_reset_mac_header(skb);
1155 eth = eth_hdr(skb); 1229 eth = eth_hdr(skb);
@@ -1168,33 +1242,28 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
1168 } 1242 }
1169 1243
1170 if (f == NULL) { 1244 if (f == NULL) {
1171 rdst0 = &vxlan->default_dst; 1245 f = vxlan_find_mac(vxlan, all_zeros_mac);
1172 1246 if (f == NULL) {
1173 if (rdst0->remote_ip == htonl(INADDR_ANY) && 1247 if ((vxlan->flags & VXLAN_F_L2MISS) &&
1174 (vxlan->flags & VXLAN_F_L2MISS) && 1248 !is_multicast_ether_addr(eth->h_dest))
1175 !is_multicast_ether_addr(eth->h_dest)) 1249 vxlan_fdb_miss(vxlan, eth->h_dest);
1176 vxlan_fdb_miss(vxlan, eth->h_dest); 1250
1177 } else 1251 dev->stats.tx_dropped++;
1178 rdst0 = &f->remote; 1252 dev_kfree_skb(skb);
1179 1253 return NETDEV_TX_OK;
1180 rc = NETDEV_TX_OK; 1254 }
1255 }
1181 1256
1182 /* if there are multiple destinations, send copies */ 1257 list_for_each_entry_rcu(rdst, &f->remotes, list) {
1183 for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) {
1184 struct sk_buff *skb1; 1258 struct sk_buff *skb1;
1185 1259
1186 skb1 = skb_clone(skb, GFP_ATOMIC); 1260 skb1 = skb_clone(skb, GFP_ATOMIC);
1187 if (skb1) { 1261 if (skb1)
1188 rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc); 1262 vxlan_xmit_one(skb1, dev, rdst, did_rsc);
1189 if (rc == NETDEV_TX_OK)
1190 rc = rc1;
1191 }
1192 } 1263 }
1193 1264
1194 rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc); 1265 dev_kfree_skb(skb);
1195 if (rc == NETDEV_TX_OK) 1266 return NETDEV_TX_OK;
1196 rc = rc1;
1197 return rc;
1198} 1267}
1199 1268
1200/* Walk the forwarding table and purge stale entries */ 1269/* Walk the forwarding table and purge stale entries */
@@ -1237,23 +1306,70 @@ static void vxlan_cleanup(unsigned long arg)
1237/* Setup stats when device is created */ 1306/* Setup stats when device is created */
1238static int vxlan_init(struct net_device *dev) 1307static int vxlan_init(struct net_device *dev)
1239{ 1308{
1309 struct vxlan_dev *vxlan = netdev_priv(dev);
1310 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
1311 struct vxlan_sock *vs;
1312 __u32 vni = vxlan->default_dst.remote_vni;
1313
1240 dev->tstats = alloc_percpu(struct pcpu_tstats); 1314 dev->tstats = alloc_percpu(struct pcpu_tstats);
1241 if (!dev->tstats) 1315 if (!dev->tstats)
1242 return -ENOMEM; 1316 return -ENOMEM;
1243 1317
1318 spin_lock(&vn->sock_lock);
1319 vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
1320 if (vs) {
1321 /* If we have a socket with same port already, reuse it */
1322 atomic_inc(&vs->refcnt);
1323 vxlan->vn_sock = vs;
1324 hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
1325 } else {
1326 /* otherwise make new socket outside of RTNL */
1327 dev_hold(dev);
1328 queue_work(vxlan_wq, &vxlan->sock_work);
1329 }
1330 spin_unlock(&vn->sock_lock);
1331
1244 return 0; 1332 return 0;
1245} 1333}
1246 1334
1335static void vxlan_fdb_delete_defualt(struct vxlan_dev *vxlan)
1336{
1337 struct vxlan_fdb *f;
1338
1339 spin_lock_bh(&vxlan->hash_lock);
1340 f = __vxlan_find_mac(vxlan, all_zeros_mac);
1341 if (f)
1342 vxlan_fdb_destroy(vxlan, f);
1343 spin_unlock_bh(&vxlan->hash_lock);
1344}
1345
1346static void vxlan_uninit(struct net_device *dev)
1347{
1348 struct vxlan_dev *vxlan = netdev_priv(dev);
1349 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
1350 struct vxlan_sock *vs = vxlan->vn_sock;
1351
1352 vxlan_fdb_delete_defualt(vxlan);
1353
1354 if (vs)
1355 vxlan_sock_release(vn, vs);
1356 free_percpu(dev->tstats);
1357}
1358
1247/* Start ageing timer and join group when device is brought up */ 1359/* Start ageing timer and join group when device is brought up */
1248static int vxlan_open(struct net_device *dev) 1360static int vxlan_open(struct net_device *dev)
1249{ 1361{
1250 struct vxlan_dev *vxlan = netdev_priv(dev); 1362 struct vxlan_dev *vxlan = netdev_priv(dev);
1251 int err; 1363 struct vxlan_sock *vs = vxlan->vn_sock;
1364
1365 /* socket hasn't been created */
1366 if (!vs)
1367 return -ENOTCONN;
1252 1368
1253 if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { 1369 if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
1254 err = vxlan_join_group(dev); 1370 vxlan_sock_hold(vs);
1255 if (err) 1371 dev_hold(dev);
1256 return err; 1372 queue_work(vxlan_wq, &vxlan->igmp_work);
1257 } 1373 }
1258 1374
1259 if (vxlan->age_interval) 1375 if (vxlan->age_interval)
@@ -1273,7 +1389,9 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
1273 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) { 1389 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
1274 struct vxlan_fdb *f 1390 struct vxlan_fdb *f
1275 = container_of(p, struct vxlan_fdb, hlist); 1391 = container_of(p, struct vxlan_fdb, hlist);
1276 vxlan_fdb_destroy(vxlan, f); 1392 /* the all_zeros_mac entry is deleted at vxlan_uninit */
1393 if (!is_zero_ether_addr(f->eth_addr))
1394 vxlan_fdb_destroy(vxlan, f);
1277 } 1395 }
1278 } 1396 }
1279 spin_unlock_bh(&vxlan->hash_lock); 1397 spin_unlock_bh(&vxlan->hash_lock);
@@ -1283,9 +1401,13 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
1283static int vxlan_stop(struct net_device *dev) 1401static int vxlan_stop(struct net_device *dev)
1284{ 1402{
1285 struct vxlan_dev *vxlan = netdev_priv(dev); 1403 struct vxlan_dev *vxlan = netdev_priv(dev);
1404 struct vxlan_sock *vs = vxlan->vn_sock;
1286 1405
1287 if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) 1406 if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
1288 vxlan_leave_group(dev); 1407 vxlan_sock_hold(vs);
1408 dev_hold(dev);
1409 queue_work(vxlan_wq, &vxlan->igmp_work);
1410 }
1289 1411
1290 del_timer_sync(&vxlan->age_timer); 1412 del_timer_sync(&vxlan->age_timer);
1291 1413
@@ -1301,6 +1423,7 @@ static void vxlan_set_multicast_list(struct net_device *dev)
1301 1423
1302static const struct net_device_ops vxlan_netdev_ops = { 1424static const struct net_device_ops vxlan_netdev_ops = {
1303 .ndo_init = vxlan_init, 1425 .ndo_init = vxlan_init,
1426 .ndo_uninit = vxlan_uninit,
1304 .ndo_open = vxlan_open, 1427 .ndo_open = vxlan_open,
1305 .ndo_stop = vxlan_stop, 1428 .ndo_stop = vxlan_stop,
1306 .ndo_start_xmit = vxlan_xmit, 1429 .ndo_start_xmit = vxlan_xmit,
@@ -1319,12 +1442,6 @@ static struct device_type vxlan_type = {
1319 .name = "vxlan", 1442 .name = "vxlan",
1320}; 1443};
1321 1444
1322static void vxlan_free(struct net_device *dev)
1323{
1324 free_percpu(dev->tstats);
1325 free_netdev(dev);
1326}
1327
1328/* Initialize the device structure. */ 1445/* Initialize the device structure. */
1329static void vxlan_setup(struct net_device *dev) 1446static void vxlan_setup(struct net_device *dev)
1330{ 1447{
@@ -1337,7 +1454,7 @@ static void vxlan_setup(struct net_device *dev)
1337 dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM; 1454 dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM;
1338 1455
1339 dev->netdev_ops = &vxlan_netdev_ops; 1456 dev->netdev_ops = &vxlan_netdev_ops;
1340 dev->destructor = vxlan_free; 1457 dev->destructor = free_netdev;
1341 SET_NETDEV_DEVTYPE(dev, &vxlan_type); 1458 SET_NETDEV_DEVTYPE(dev, &vxlan_type);
1342 1459
1343 dev->tx_queue_len = 0; 1460 dev->tx_queue_len = 0;
@@ -1354,6 +1471,8 @@ static void vxlan_setup(struct net_device *dev)
1354 1471
1355 INIT_LIST_HEAD(&vxlan->next); 1472 INIT_LIST_HEAD(&vxlan->next);
1356 spin_lock_init(&vxlan->hash_lock); 1473 spin_lock_init(&vxlan->hash_lock);
1474 INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
1475 INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
1357 1476
1358 init_timer_deferrable(&vxlan->age_timer); 1477 init_timer_deferrable(&vxlan->age_timer);
1359 vxlan->age_timer.function = vxlan_cleanup; 1478 vxlan->age_timer.function = vxlan_cleanup;
@@ -1445,7 +1564,6 @@ static void vxlan_del_work(struct work_struct *work)
1445 kfree_rcu(vs, rcu); 1564 kfree_rcu(vs, rcu);
1446} 1565}
1447 1566
1448/* Create new listen socket if needed */
1449static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) 1567static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1450{ 1568{
1451 struct vxlan_sock *vs; 1569 struct vxlan_sock *vs;
@@ -1453,6 +1571,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1453 struct sockaddr_in vxlan_addr = { 1571 struct sockaddr_in vxlan_addr = {
1454 .sin_family = AF_INET, 1572 .sin_family = AF_INET,
1455 .sin_addr.s_addr = htonl(INADDR_ANY), 1573 .sin_addr.s_addr = htonl(INADDR_ANY),
1574 .sin_port = port,
1456 }; 1575 };
1457 int rc; 1576 int rc;
1458 unsigned int h; 1577 unsigned int h;
@@ -1478,8 +1597,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1478 sk = vs->sock->sk; 1597 sk = vs->sock->sk;
1479 sk_change_net(sk, net); 1598 sk_change_net(sk, net);
1480 1599
1481 vxlan_addr.sin_port = port;
1482
1483 rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, 1600 rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr,
1484 sizeof(vxlan_addr)); 1601 sizeof(vxlan_addr));
1485 if (rc < 0) { 1602 if (rc < 0) {
@@ -1497,18 +1614,57 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
1497 udp_sk(sk)->encap_type = 1; 1614 udp_sk(sk)->encap_type = 1;
1498 udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; 1615 udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
1499 udp_encap_enable(); 1616 udp_encap_enable();
1617 atomic_set(&vs->refcnt, 1);
1500 1618
1501 vs->refcnt = 1;
1502 return vs; 1619 return vs;
1503} 1620}
1504 1621
1622/* Scheduled at device creation to bind to a socket */
1623static void vxlan_sock_work(struct work_struct *work)
1624{
1625 struct vxlan_dev *vxlan
1626 = container_of(work, struct vxlan_dev, sock_work);
1627 struct net_device *dev = vxlan->dev;
1628 struct net *net = dev_net(dev);
1629 __u32 vni = vxlan->default_dst.remote_vni;
1630 __be16 port = vxlan->dst_port;
1631 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1632 struct vxlan_sock *nvs, *ovs;
1633
1634 nvs = vxlan_socket_create(net, port);
1635 if (IS_ERR(nvs)) {
1636 netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
1637 PTR_ERR(nvs));
1638 goto out;
1639 }
1640
1641 spin_lock(&vn->sock_lock);
1642 /* Look again to see if can reuse socket */
1643 ovs = vxlan_find_port(net, port);
1644 if (ovs) {
1645 atomic_inc(&ovs->refcnt);
1646 vxlan->vn_sock = ovs;
1647 hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
1648 spin_unlock(&vn->sock_lock);
1649
1650 sk_release_kernel(nvs->sock->sk);
1651 kfree(nvs);
1652 } else {
1653 vxlan->vn_sock = nvs;
1654 hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
1655 hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
1656 spin_unlock(&vn->sock_lock);
1657 }
1658out:
1659 dev_put(dev);
1660}
1661
1505static int vxlan_newlink(struct net *net, struct net_device *dev, 1662static int vxlan_newlink(struct net *net, struct net_device *dev,
1506 struct nlattr *tb[], struct nlattr *data[]) 1663 struct nlattr *tb[], struct nlattr *data[])
1507{ 1664{
1508 struct vxlan_net *vn = net_generic(net, vxlan_net_id); 1665 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1509 struct vxlan_dev *vxlan = netdev_priv(dev); 1666 struct vxlan_dev *vxlan = netdev_priv(dev);
1510 struct vxlan_rdst *dst = &vxlan->default_dst; 1667 struct vxlan_rdst *dst = &vxlan->default_dst;
1511 struct vxlan_sock *vs;
1512 __u32 vni; 1668 __u32 vni;
1513 int err; 1669 int err;
1514 1670
@@ -1586,36 +1742,25 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
1586 return -EEXIST; 1742 return -EEXIST;
1587 } 1743 }
1588 1744
1589 vs = vxlan_find_port(net, vxlan->dst_port);
1590 if (vs)
1591 ++vs->refcnt;
1592 else {
1593 /* Drop lock because socket create acquires RTNL lock */
1594 rtnl_unlock();
1595 vs = vxlan_socket_create(net, vxlan->dst_port);
1596 rtnl_lock();
1597 if (IS_ERR(vs))
1598 return PTR_ERR(vs);
1599
1600 hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
1601 }
1602 vxlan->vn_sock = vs;
1603
1604 SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops); 1745 SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
1605 1746
1747 /* create an fdb entry for default destination */
1748 err = vxlan_fdb_create(vxlan, all_zeros_mac,
1749 vxlan->default_dst.remote_ip,
1750 NUD_REACHABLE|NUD_PERMANENT,
1751 NLM_F_EXCL|NLM_F_CREATE,
1752 vxlan->dst_port, vxlan->default_dst.remote_vni,
1753 vxlan->default_dst.remote_ifindex, NTF_SELF);
1754 if (err)
1755 return err;
1756
1606 err = register_netdevice(dev); 1757 err = register_netdevice(dev);
1607 if (err) { 1758 if (err) {
1608 if (--vs->refcnt == 0) { 1759 vxlan_fdb_delete_defualt(vxlan);
1609 rtnl_unlock();
1610 sk_release_kernel(vs->sock->sk);
1611 kfree(vs);
1612 rtnl_lock();
1613 }
1614 return err; 1760 return err;
1615 } 1761 }
1616 1762
1617 list_add(&vxlan->next, &vn->vxlan_list); 1763 list_add(&vxlan->next, &vn->vxlan_list);
1618 hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
1619 1764
1620 return 0; 1765 return 0;
1621} 1766}
@@ -1623,16 +1768,10 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
1623static void vxlan_dellink(struct net_device *dev, struct list_head *head) 1768static void vxlan_dellink(struct net_device *dev, struct list_head *head)
1624{ 1769{
1625 struct vxlan_dev *vxlan = netdev_priv(dev); 1770 struct vxlan_dev *vxlan = netdev_priv(dev);
1626 struct vxlan_sock *vs = vxlan->vn_sock;
1627 1771
1628 hlist_del_rcu(&vxlan->hlist); 1772 hlist_del_rcu(&vxlan->hlist);
1629 list_del(&vxlan->next); 1773 list_del(&vxlan->next);
1630 unregister_netdevice_queue(dev, head); 1774 unregister_netdevice_queue(dev, head);
1631
1632 if (--vs->refcnt == 0) {
1633 hlist_del_rcu(&vs->hlist);
1634 schedule_work(&vs->del_work);
1635 }
1636} 1775}
1637 1776
1638static size_t vxlan_get_size(const struct net_device *dev) 1777static size_t vxlan_get_size(const struct net_device *dev)
@@ -1721,6 +1860,7 @@ static __net_init int vxlan_init_net(struct net *net)
1721 unsigned int h; 1860 unsigned int h;
1722 1861
1723 INIT_LIST_HEAD(&vn->vxlan_list); 1862 INIT_LIST_HEAD(&vn->vxlan_list);
1863 spin_lock_init(&vn->sock_lock);
1724 1864
1725 for (h = 0; h < PORT_HASH_SIZE; ++h) 1865 for (h = 0; h < PORT_HASH_SIZE; ++h)
1726 INIT_HLIST_HEAD(&vn->sock_list[h]); 1866 INIT_HLIST_HEAD(&vn->sock_list[h]);
@@ -1750,6 +1890,10 @@ static int __init vxlan_init_module(void)
1750{ 1890{
1751 int rc; 1891 int rc;
1752 1892
1893 vxlan_wq = alloc_workqueue("vxlan", 0, 0);
1894 if (!vxlan_wq)
1895 return -ENOMEM;
1896
1753 get_random_bytes(&vxlan_salt, sizeof(vxlan_salt)); 1897 get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
1754 1898
1755 rc = register_pernet_device(&vxlan_net_ops); 1899 rc = register_pernet_device(&vxlan_net_ops);
@@ -1765,14 +1909,16 @@ static int __init vxlan_init_module(void)
1765out2: 1909out2:
1766 unregister_pernet_device(&vxlan_net_ops); 1910 unregister_pernet_device(&vxlan_net_ops);
1767out1: 1911out1:
1912 destroy_workqueue(vxlan_wq);
1768 return rc; 1913 return rc;
1769} 1914}
1770late_initcall(vxlan_init_module); 1915late_initcall(vxlan_init_module);
1771 1916
1772static void __exit vxlan_cleanup_module(void) 1917static void __exit vxlan_cleanup_module(void)
1773{ 1918{
1774 rtnl_link_unregister(&vxlan_link_ops);
1775 unregister_pernet_device(&vxlan_net_ops); 1919 unregister_pernet_device(&vxlan_net_ops);
1920 rtnl_link_unregister(&vxlan_link_ops);
1921 destroy_workqueue(vxlan_wq);
1776 rcu_barrier(); 1922 rcu_barrier();
1777} 1923}
1778module_exit(vxlan_cleanup_module); 1924module_exit(vxlan_cleanup_module);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ebfa4443c69b..60aca9109a50 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
707 } 707 }
708 } 708 }
709 709
710 if (is_zero_ether_addr(addr)) {
711 pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
712 return -EINVAL;
713 }
714
710 p = br_port_get_rtnl(dev); 715 p = br_port_get_rtnl(dev);
711 if (p == NULL) { 716 if (p == NULL) {
712 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", 717 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9007533867f0..3de740834d1f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2109,10 +2109,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
2109 } 2109 }
2110 2110
2111 addr = nla_data(tb[NDA_LLADDR]); 2111 addr = nla_data(tb[NDA_LLADDR]);
2112 if (is_zero_ether_addr(addr)) {
2113 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2114 return -EINVAL;
2115 }
2116 2112
2117 err = -EOPNOTSUPP; 2113 err = -EOPNOTSUPP;
2118 2114
@@ -2210,10 +2206,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
2210 } 2206 }
2211 2207
2212 addr = nla_data(tb[NDA_LLADDR]); 2208 addr = nla_data(tb[NDA_LLADDR]);
2213 if (is_zero_ether_addr(addr)) {
2214 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
2215 return -EINVAL;
2216 }
2217 2209
2218 err = -EOPNOTSUPP; 2210 err = -EOPNOTSUPP;
2219 2211