aboutsummaryrefslogtreecommitdiffstats
path: root/net/bridge
diff options
context:
space:
mode:
authorJohn Fastabend <john.r.fastabend@intel.com>2012-04-15 02:43:56 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-15 13:06:04 -0400
commit77162022ab26a1f99d3af30c03760a76f86e193d (patch)
treedabae13dc00b91198c5bed91ebb790942d456081 /net/bridge
parent136cd14e1ea2bfde66d212d8e18e81552c94e4e3 (diff)
net: add generic PF_BRIDGE:RTM_ FDB hooks
This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/bridge')
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_fdb.c128
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h15
4 files changed, 46 insertions, 112 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ba829de84423..d6e5929458b1 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -317,6 +317,9 @@ static const struct net_device_ops br_netdev_ops = {
317 .ndo_add_slave = br_add_slave, 317 .ndo_add_slave = br_add_slave,
318 .ndo_del_slave = br_del_slave, 318 .ndo_del_slave = br_del_slave,
319 .ndo_fix_features = br_fix_features, 319 .ndo_fix_features = br_fix_features,
320 .ndo_fdb_add = br_fdb_add,
321 .ndo_fdb_del = br_fdb_delete,
322 .ndo_fdb_dump = br_fdb_dump,
320}; 323};
321 324
322static void br_dev_free(struct net_device *dev) 325static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 80dbce4974ce..5945c54bc2de 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -535,44 +535,38 @@ errout:
535} 535}
536 536
537/* Dump information about entries, in response to GETNEIGH */ 537/* Dump information about entries, in response to GETNEIGH */
538int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) 538int br_fdb_dump(struct sk_buff *skb,
539 struct netlink_callback *cb,
540 struct net_device *dev,
541 int idx)
539{ 542{
540 struct net *net = sock_net(skb->sk); 543 struct net_bridge *br = netdev_priv(dev);
541 struct net_device *dev; 544 int i;
542 int idx = 0;
543
544 rcu_read_lock();
545 for_each_netdev_rcu(net, dev) {
546 struct net_bridge *br = netdev_priv(dev);
547 int i;
548
549 if (!(dev->priv_flags & IFF_EBRIDGE))
550 continue;
551 545
552 for (i = 0; i < BR_HASH_SIZE; i++) { 546 if (!(dev->priv_flags & IFF_EBRIDGE))
553 struct hlist_node *h; 547 goto out;
554 struct net_bridge_fdb_entry *f;
555 548
556 hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { 549 for (i = 0; i < BR_HASH_SIZE; i++) {
557 if (idx < cb->args[0]) 550 struct hlist_node *h;
558 goto skip; 551 struct net_bridge_fdb_entry *f;
559 552
560 if (fdb_fill_info(skb, br, f, 553 hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
561 NETLINK_CB(cb->skb).pid, 554 if (idx < cb->args[0])
562 cb->nlh->nlmsg_seq, 555 goto skip;
563 RTM_NEWNEIGH, 556
564 NLM_F_MULTI) < 0) 557 if (fdb_fill_info(skb, br, f,
565 break; 558 NETLINK_CB(cb->skb).pid,
559 cb->nlh->nlmsg_seq,
560 RTM_NEWNEIGH,
561 NLM_F_MULTI) < 0)
562 break;
566skip: 563skip:
567 ++idx; 564 ++idx;
568 }
569 } 565 }
570 } 566 }
571 rcu_read_unlock();
572
573 cb->args[0] = idx;
574 567
575 return skb->len; 568out:
569 return idx;
576} 570}
577 571
578/* Update (create or replace) forwarding database entry */ 572/* Update (create or replace) forwarding database entry */
@@ -614,43 +608,11 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
614} 608}
615 609
616/* Add new permanent fdb entry with RTM_NEWNEIGH */ 610/* Add new permanent fdb entry with RTM_NEWNEIGH */
617int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 611int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
612 unsigned char *addr, u16 nlh_flags)
618{ 613{
619 struct net *net = sock_net(skb->sk);
620 struct ndmsg *ndm;
621 struct nlattr *tb[NDA_MAX+1];
622 struct net_device *dev;
623 struct net_bridge_port *p; 614 struct net_bridge_port *p;
624 const __u8 *addr; 615 int err = 0;
625 int err;
626
627 ASSERT_RTNL();
628 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
629 if (err < 0)
630 return err;
631
632 ndm = nlmsg_data(nlh);
633 if (ndm->ndm_ifindex == 0) {
634 pr_info("bridge: RTM_NEWNEIGH with invalid ifindex\n");
635 return -EINVAL;
636 }
637
638 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
639 if (dev == NULL) {
640 pr_info("bridge: RTM_NEWNEIGH with unknown ifindex\n");
641 return -ENODEV;
642 }
643
644 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
645 pr_info("bridge: RTM_NEWNEIGH with invalid address\n");
646 return -EINVAL;
647 }
648
649 addr = nla_data(tb[NDA_LLADDR]);
650 if (!is_valid_ether_addr(addr)) {
651 pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
652 return -EINVAL;
653 }
654 616
655 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { 617 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
656 pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); 618 pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
@@ -670,14 +632,14 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
670 rcu_read_unlock(); 632 rcu_read_unlock();
671 } else { 633 } else {
672 spin_lock_bh(&p->br->hash_lock); 634 spin_lock_bh(&p->br->hash_lock);
673 err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags); 635 err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
674 spin_unlock_bh(&p->br->hash_lock); 636 spin_unlock_bh(&p->br->hash_lock);
675 } 637 }
676 638
677 return err; 639 return err;
678} 640}
679 641
680static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) 642static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
681{ 643{
682 struct net_bridge *br = p->br; 644 struct net_bridge *br = p->br;
683 struct hlist_head *head = &br->hash[br_mac_hash(addr)]; 645 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
@@ -692,40 +654,12 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
692} 654}
693 655
694/* Remove neighbor entry with RTM_DELNEIGH */ 656/* Remove neighbor entry with RTM_DELNEIGH */
695int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 657int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
658 unsigned char *addr)
696{ 659{
697 struct net *net = sock_net(skb->sk);
698 struct ndmsg *ndm;
699 struct net_bridge_port *p; 660 struct net_bridge_port *p;
700 struct nlattr *llattr;
701 const __u8 *addr;
702 struct net_device *dev;
703 int err; 661 int err;
704 662
705 ASSERT_RTNL();
706 if (nlmsg_len(nlh) < sizeof(*ndm))
707 return -EINVAL;
708
709 ndm = nlmsg_data(nlh);
710 if (ndm->ndm_ifindex == 0) {
711 pr_info("bridge: RTM_DELNEIGH with invalid ifindex\n");
712 return -EINVAL;
713 }
714
715 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
716 if (dev == NULL) {
717 pr_info("bridge: RTM_DELNEIGH with unknown ifindex\n");
718 return -ENODEV;
719 }
720
721 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
722 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
723 pr_info("bridge: RTM_DELNEIGH with invalid address\n");
724 return -EINVAL;
725 }
726
727 addr = nla_data(llattr);
728
729 p = br_port_get_rtnl(dev); 663 p = br_port_get_rtnl(dev);
730 if (p == NULL) { 664 if (p == NULL) {
731 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", 665 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index df38108f6973..2080485515f1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -232,18 +232,6 @@ int __init br_netlink_init(void)
232 br_rtm_setlink, NULL, NULL); 232 br_rtm_setlink, NULL, NULL);
233 if (err) 233 if (err)
234 goto err3; 234 goto err3;
235 err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH,
236 br_fdb_add, NULL, NULL);
237 if (err)
238 goto err3;
239 err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH,
240 br_fdb_delete, NULL, NULL);
241 if (err)
242 goto err3;
243 err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH,
244 NULL, br_fdb_dump, NULL);
245 if (err)
246 goto err3;
247 235
248 return 0; 236 return 0;
249 237
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f8ffd8c49054..1a8ad4fb9a6b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -360,9 +360,18 @@ extern int br_fdb_insert(struct net_bridge *br,
360extern void br_fdb_update(struct net_bridge *br, 360extern void br_fdb_update(struct net_bridge *br,
361 struct net_bridge_port *source, 361 struct net_bridge_port *source,
362 const unsigned char *addr); 362 const unsigned char *addr);
363extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb); 363
364extern int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); 364extern int br_fdb_delete(struct ndmsg *ndm,
365extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); 365 struct net_device *dev,
366 unsigned char *addr);
367extern int br_fdb_add(struct ndmsg *nlh,
368 struct net_device *dev,
369 unsigned char *addr,
370 u16 nlh_flags);
371extern int br_fdb_dump(struct sk_buff *skb,
372 struct netlink_callback *cb,
373 struct net_device *dev,
374 int idx);
366 375
367/* br_forward.c */ 376/* br_forward.c */
368extern void br_deliver(const struct net_bridge_port *to, 377extern void br_deliver(const struct net_bridge_port *to,