aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJohn Fastabend <john.r.fastabend@intel.com>2012-04-15 02:43:56 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-15 13:06:04 -0400
commit77162022ab26a1f99d3af30c03760a76f86e193d (patch)
treedabae13dc00b91198c5bed91ebb790942d456081 /net
parent136cd14e1ea2bfde66d212d8e18e81552c94e4e3 (diff)
net: add generic PF_BRIDGE:RTM_ FDB hooks
This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_fdb.c128
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h15
-rw-r--r--net/core/rtnetlink.c152
5 files changed, 198 insertions, 112 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ba829de84423..d6e5929458b1 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -317,6 +317,9 @@ static const struct net_device_ops br_netdev_ops = {
317 .ndo_add_slave = br_add_slave, 317 .ndo_add_slave = br_add_slave,
318 .ndo_del_slave = br_del_slave, 318 .ndo_del_slave = br_del_slave,
319 .ndo_fix_features = br_fix_features, 319 .ndo_fix_features = br_fix_features,
320 .ndo_fdb_add = br_fdb_add,
321 .ndo_fdb_del = br_fdb_delete,
322 .ndo_fdb_dump = br_fdb_dump,
320}; 323};
321 324
322static void br_dev_free(struct net_device *dev) 325static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 80dbce4974ce..5945c54bc2de 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -535,44 +535,38 @@ errout:
535} 535}
536 536
537/* Dump information about entries, in response to GETNEIGH */ 537/* Dump information about entries, in response to GETNEIGH */
538int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) 538int br_fdb_dump(struct sk_buff *skb,
539 struct netlink_callback *cb,
540 struct net_device *dev,
541 int idx)
539{ 542{
540 struct net *net = sock_net(skb->sk); 543 struct net_bridge *br = netdev_priv(dev);
541 struct net_device *dev; 544 int i;
542 int idx = 0;
543
544 rcu_read_lock();
545 for_each_netdev_rcu(net, dev) {
546 struct net_bridge *br = netdev_priv(dev);
547 int i;
548
549 if (!(dev->priv_flags & IFF_EBRIDGE))
550 continue;
551 545
552 for (i = 0; i < BR_HASH_SIZE; i++) { 546 if (!(dev->priv_flags & IFF_EBRIDGE))
553 struct hlist_node *h; 547 goto out;
554 struct net_bridge_fdb_entry *f;
555 548
556 hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { 549 for (i = 0; i < BR_HASH_SIZE; i++) {
557 if (idx < cb->args[0]) 550 struct hlist_node *h;
558 goto skip; 551 struct net_bridge_fdb_entry *f;
559 552
560 if (fdb_fill_info(skb, br, f, 553 hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
561 NETLINK_CB(cb->skb).pid, 554 if (idx < cb->args[0])
562 cb->nlh->nlmsg_seq, 555 goto skip;
563 RTM_NEWNEIGH, 556
564 NLM_F_MULTI) < 0) 557 if (fdb_fill_info(skb, br, f,
565 break; 558 NETLINK_CB(cb->skb).pid,
559 cb->nlh->nlmsg_seq,
560 RTM_NEWNEIGH,
561 NLM_F_MULTI) < 0)
562 break;
566skip: 563skip:
567 ++idx; 564 ++idx;
568 }
569 } 565 }
570 } 566 }
571 rcu_read_unlock();
572
573 cb->args[0] = idx;
574 567
575 return skb->len; 568out:
569 return idx;
576} 570}
577 571
578/* Update (create or replace) forwarding database entry */ 572/* Update (create or replace) forwarding database entry */
@@ -614,43 +608,11 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
614} 608}
615 609
616/* Add new permanent fdb entry with RTM_NEWNEIGH */ 610/* Add new permanent fdb entry with RTM_NEWNEIGH */
617int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 611int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
612 unsigned char *addr, u16 nlh_flags)
618{ 613{
619 struct net *net = sock_net(skb->sk);
620 struct ndmsg *ndm;
621 struct nlattr *tb[NDA_MAX+1];
622 struct net_device *dev;
623 struct net_bridge_port *p; 614 struct net_bridge_port *p;
624 const __u8 *addr; 615 int err = 0;
625 int err;
626
627 ASSERT_RTNL();
628 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
629 if (err < 0)
630 return err;
631
632 ndm = nlmsg_data(nlh);
633 if (ndm->ndm_ifindex == 0) {
634 pr_info("bridge: RTM_NEWNEIGH with invalid ifindex\n");
635 return -EINVAL;
636 }
637
638 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
639 if (dev == NULL) {
640 pr_info("bridge: RTM_NEWNEIGH with unknown ifindex\n");
641 return -ENODEV;
642 }
643
644 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
645 pr_info("bridge: RTM_NEWNEIGH with invalid address\n");
646 return -EINVAL;
647 }
648
649 addr = nla_data(tb[NDA_LLADDR]);
650 if (!is_valid_ether_addr(addr)) {
651 pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
652 return -EINVAL;
653 }
654 616
655 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { 617 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
656 pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); 618 pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
@@ -670,14 +632,14 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
670 rcu_read_unlock(); 632 rcu_read_unlock();
671 } else { 633 } else {
672 spin_lock_bh(&p->br->hash_lock); 634 spin_lock_bh(&p->br->hash_lock);
673 err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags); 635 err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
674 spin_unlock_bh(&p->br->hash_lock); 636 spin_unlock_bh(&p->br->hash_lock);
675 } 637 }
676 638
677 return err; 639 return err;
678} 640}
679 641
680static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) 642static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
681{ 643{
682 struct net_bridge *br = p->br; 644 struct net_bridge *br = p->br;
683 struct hlist_head *head = &br->hash[br_mac_hash(addr)]; 645 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
@@ -692,40 +654,12 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
692} 654}
693 655
694/* Remove neighbor entry with RTM_DELNEIGH */ 656/* Remove neighbor entry with RTM_DELNEIGH */
695int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 657int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
658 unsigned char *addr)
696{ 659{
697 struct net *net = sock_net(skb->sk);
698 struct ndmsg *ndm;
699 struct net_bridge_port *p; 660 struct net_bridge_port *p;
700 struct nlattr *llattr;
701 const __u8 *addr;
702 struct net_device *dev;
703 int err; 661 int err;
704 662
705 ASSERT_RTNL();
706 if (nlmsg_len(nlh) < sizeof(*ndm))
707 return -EINVAL;
708
709 ndm = nlmsg_data(nlh);
710 if (ndm->ndm_ifindex == 0) {
711 pr_info("bridge: RTM_DELNEIGH with invalid ifindex\n");
712 return -EINVAL;
713 }
714
715 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
716 if (dev == NULL) {
717 pr_info("bridge: RTM_DELNEIGH with unknown ifindex\n");
718 return -ENODEV;
719 }
720
721 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
722 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
723 pr_info("bridge: RTM_DELNEIGH with invalid address\n");
724 return -EINVAL;
725 }
726
727 addr = nla_data(llattr);
728
729 p = br_port_get_rtnl(dev); 663 p = br_port_get_rtnl(dev);
730 if (p == NULL) { 664 if (p == NULL) {
731 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", 665 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index df38108f6973..2080485515f1 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -232,18 +232,6 @@ int __init br_netlink_init(void)
232 br_rtm_setlink, NULL, NULL); 232 br_rtm_setlink, NULL, NULL);
233 if (err) 233 if (err)
234 goto err3; 234 goto err3;
235 err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH,
236 br_fdb_add, NULL, NULL);
237 if (err)
238 goto err3;
239 err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH,
240 br_fdb_delete, NULL, NULL);
241 if (err)
242 goto err3;
243 err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH,
244 NULL, br_fdb_dump, NULL);
245 if (err)
246 goto err3;
247 235
248 return 0; 236 return 0;
249 237
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f8ffd8c49054..1a8ad4fb9a6b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -360,9 +360,18 @@ extern int br_fdb_insert(struct net_bridge *br,
360extern void br_fdb_update(struct net_bridge *br, 360extern void br_fdb_update(struct net_bridge *br,
361 struct net_bridge_port *source, 361 struct net_bridge_port *source,
362 const unsigned char *addr); 362 const unsigned char *addr);
363extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb); 363
364extern int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); 364extern int br_fdb_delete(struct ndmsg *ndm,
365extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); 365 struct net_device *dev,
366 unsigned char *addr);
367extern int br_fdb_add(struct ndmsg *nlh,
368 struct net_device *dev,
369 unsigned char *addr,
370 u16 nlh_flags);
371extern int br_fdb_dump(struct sk_buff *skb,
372 struct netlink_callback *cb,
373 struct net_device *dev,
374 int idx);
366 375
367/* br_forward.c */ 376/* br_forward.c */
368extern void br_deliver(const struct net_bridge_port *to, 377extern void br_deliver(const struct net_bridge_port *to,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2ff6fe4bada4..b348b7fbf53a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,7 +35,9 @@
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/if_addr.h> 37#include <linux/if_addr.h>
38#include <linux/if_bridge.h>
38#include <linux/pci.h> 39#include <linux/pci.h>
40#include <linux/etherdevice.h>
39 41
40#include <asm/uaccess.h> 42#include <asm/uaccess.h>
41 43
@@ -1978,6 +1980,152 @@ errout:
1978 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 1980 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
1979} 1981}
1980 1982
1983static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1984{
1985 struct net *net = sock_net(skb->sk);
1986 struct net_device *master = NULL;
1987 struct ndmsg *ndm;
1988 struct nlattr *tb[NDA_MAX+1];
1989 struct net_device *dev;
1990 u8 *addr;
1991 int err;
1992
1993 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1994 if (err < 0)
1995 return err;
1996
1997 ndm = nlmsg_data(nlh);
1998 if (ndm->ndm_ifindex == 0) {
1999 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
2000 return -EINVAL;
2001 }
2002
2003 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2004 if (dev == NULL) {
2005 pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
2006 return -ENODEV;
2007 }
2008
2009 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
2010 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
2011 return -EINVAL;
2012 }
2013
2014 addr = nla_data(tb[NDA_LLADDR]);
2015 if (!is_valid_ether_addr(addr)) {
2016 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2017 return -EINVAL;
2018 }
2019
2020 err = -EOPNOTSUPP;
2021
2022 /* Support fdb on master device the net/bridge default case */
2023 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2024 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2025 master = dev->master;
2026 err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr,
2027 nlh->nlmsg_flags);
2028 if (err)
2029 goto out;
2030 else
2031 ndm->ndm_flags &= ~NTF_MASTER;
2032 }
2033
2034 /* Embedded bridge, macvlan, and any other device support */
2035 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
2036 err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr,
2037 nlh->nlmsg_flags);
2038
2039 if (!err)
2040 ndm->ndm_flags &= ~NTF_SELF;
2041 }
2042out:
2043 return err;
2044}
2045
2046static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2047{
2048 struct net *net = sock_net(skb->sk);
2049 struct ndmsg *ndm;
2050 struct nlattr *llattr;
2051 struct net_device *dev;
2052 int err = -EINVAL;
2053 __u8 *addr;
2054
2055 if (nlmsg_len(nlh) < sizeof(*ndm))
2056 return -EINVAL;
2057
2058 ndm = nlmsg_data(nlh);
2059 if (ndm->ndm_ifindex == 0) {
2060 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
2061 return -EINVAL;
2062 }
2063
2064 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2065 if (dev == NULL) {
2066 pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
2067 return -ENODEV;
2068 }
2069
2070 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
2071 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
2072 pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n");
2073 return -EINVAL;
2074 }
2075
2076 addr = nla_data(llattr);
2077 err = -EOPNOTSUPP;
2078
2079 /* Support fdb on master device the net/bridge default case */
2080 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2081 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2082 struct net_device *master = dev->master;
2083
2084 if (master->netdev_ops->ndo_fdb_del)
2085 err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr);
2086
2087 if (err)
2088 goto out;
2089 else
2090 ndm->ndm_flags &= ~NTF_MASTER;
2091 }
2092
2093 /* Embedded bridge, macvlan, and any other device support */
2094 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
2095 err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr);
2096
2097 if (!err)
2098 ndm->ndm_flags &= ~NTF_SELF;
2099 }
2100out:
2101 return err;
2102}
2103
2104static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2105{
2106 int idx = 0;
2107 struct net *net = sock_net(skb->sk);
2108 struct net_device *dev;
2109
2110 rcu_read_lock();
2111 for_each_netdev_rcu(net, dev) {
2112 if (dev->priv_flags & IFF_BRIDGE_PORT) {
2113 struct net_device *master = dev->master;
2114 const struct net_device_ops *ops = master->netdev_ops;
2115
2116 if (ops->ndo_fdb_dump)
2117 idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
2118 }
2119
2120 if (dev->netdev_ops->ndo_fdb_dump)
2121 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
2122 }
2123 rcu_read_unlock();
2124
2125 cb->args[0] = idx;
2126 return skb->len;
2127}
2128
1981/* Protected by RTNL sempahore. */ 2129/* Protected by RTNL sempahore. */
1982static struct rtattr **rta_buf; 2130static struct rtattr **rta_buf;
1983static int rtattr_max; 2131static int rtattr_max;
@@ -2150,5 +2298,9 @@ void __init rtnetlink_init(void)
2150 2298
2151 rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); 2299 rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
2152 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); 2300 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
2301
2302 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
2303 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
2304 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2153} 2305}
2154 2306