aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/rtnetlink.c
diff options
context:
space:
mode:
authorJohn Fastabend <john.r.fastabend@intel.com>2012-04-15 02:43:56 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-15 13:06:04 -0400
commit77162022ab26a1f99d3af30c03760a76f86e193d (patch)
treedabae13dc00b91198c5bed91ebb790942d456081 /net/core/rtnetlink.c
parent136cd14e1ea2bfde66d212d8e18e81552c94e4e3 (diff)
net: add generic PF_BRIDGE:RTM_ FDB hooks
This adds two new flags NTF_MASTER and NTF_SELF that can now be used to specify where PF_BRIDGE netlink commands should be sent. NTF_MASTER sends the commands to the 'dev->master' device for parsing. Typically this will be the linux net/bridge, or open-vswitch devices. Also without any flags set the command will be handled by the master device as well so that current user space tools continue to work as expected. The NTF_SELF flag will push the PF_BRIDGE commands to the device. In the basic example below the commands are then parsed and programmed in the embedded bridge. Note if both NTF_SELF and NTF_MASTER bits are set then the command will be sent to both 'dev->master' and 'dev' this allows user space to easily keep the embedded bridge and software bridge in sync. There is a slight complication in the case with both flags set when an error occurs. To resolve this the rtnl handler clears the NTF_ flag in the netlink ack to indicate which sets completed successfully. The add/del handlers will abort as soon as any error occurs. To support this new net device ops were added to call into the device and the existing bridging code was refactored to use these. There should be no required changes in user space to support the current bridge behavior. A basic setup with a SR-IOV enabled NIC looks like this, veth0 veth2 | | ------------ | bridge0 | <---- software bridging ------------ / / ethx.y ethx VF PF \ \ <---- propagate FDB entries to HW \ \ -------------------- | Embedded Bridge | <---- hardware offloaded switching -------------------- In this case the embedded bridge must be managed to allow 'veth0' to communicate with 'ethx.y' correctly. At present drivers managing the embedded bridge either send frames onto the network which then get dropped by the switch OR the embedded bridge will flood these frames. With this patch we have a mechanism to manage the embedded bridge correctly from user space. This example is specific to SR-IOV but replacing the VF with another PF or dropping this into the DSA framework generates similar management issues. Examples session using the 'br'[1] tool to add, dump and then delete a mac address with a new "embedded" option and enabled ixgbe driver: # br fdb add 22:35:19:ac:60:59 dev eth3 # br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static #br fdb add 22:35:19:ac:60:59 embedded dev eth3 #br fdb port mac addr flags veth0 22:35:19:ac:60:58 static veth0 9a:5f:81:f7:f6:ec local eth3 00:1b:21:55:23:59 local eth3 22:35:19:ac:60:59 static veth0 22:35:19:ac:60:57 static eth3 22:35:19:ac:60:59 local embedded #br fdb del 22:35:19:ac:60:59 embedded dev eth3 I added a couple lines to 'br' to set the flags correctly is all. It is my opinion that the merit of this patch is now embedded and SW bridges can both be modeled correctly in user space using very nearly the same message passing. [1] 'br' tool was published as an RFC here and will be renamed 'bridge' http://patchwork.ozlabs.org/patch/117664/ Thanks to Jamal Hadi Salim, Stephen Hemminger and Ben Hutchings for valuable feedback, suggestions, and review. v2: fixed api descriptions and error case with both NTF_SELF and NTF_MASTER set plus updated patch description. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/rtnetlink.c')
-rw-r--r--net/core/rtnetlink.c152
1 files changed, 152 insertions, 0 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2ff6fe4bada4..b348b7fbf53a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,7 +35,9 @@
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/if_addr.h> 37#include <linux/if_addr.h>
38#include <linux/if_bridge.h>
38#include <linux/pci.h> 39#include <linux/pci.h>
40#include <linux/etherdevice.h>
39 41
40#include <asm/uaccess.h> 42#include <asm/uaccess.h>
41 43
@@ -1978,6 +1980,152 @@ errout:
1978 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 1980 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
1979} 1981}
1980 1982
1983static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1984{
1985 struct net *net = sock_net(skb->sk);
1986 struct net_device *master = NULL;
1987 struct ndmsg *ndm;
1988 struct nlattr *tb[NDA_MAX+1];
1989 struct net_device *dev;
1990 u8 *addr;
1991 int err;
1992
1993 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1994 if (err < 0)
1995 return err;
1996
1997 ndm = nlmsg_data(nlh);
1998 if (ndm->ndm_ifindex == 0) {
1999 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
2000 return -EINVAL;
2001 }
2002
2003 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2004 if (dev == NULL) {
2005 pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
2006 return -ENODEV;
2007 }
2008
2009 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
2010 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
2011 return -EINVAL;
2012 }
2013
2014 addr = nla_data(tb[NDA_LLADDR]);
2015 if (!is_valid_ether_addr(addr)) {
2016 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2017 return -EINVAL;
2018 }
2019
2020 err = -EOPNOTSUPP;
2021
2022 /* Support fdb on master device the net/bridge default case */
2023 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2024 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2025 master = dev->master;
2026 err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr,
2027 nlh->nlmsg_flags);
2028 if (err)
2029 goto out;
2030 else
2031 ndm->ndm_flags &= ~NTF_MASTER;
2032 }
2033
2034 /* Embedded bridge, macvlan, and any other device support */
2035 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
2036 err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr,
2037 nlh->nlmsg_flags);
2038
2039 if (!err)
2040 ndm->ndm_flags &= ~NTF_SELF;
2041 }
2042out:
2043 return err;
2044}
2045
2046static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2047{
2048 struct net *net = sock_net(skb->sk);
2049 struct ndmsg *ndm;
2050 struct nlattr *llattr;
2051 struct net_device *dev;
2052 int err = -EINVAL;
2053 __u8 *addr;
2054
2055 if (nlmsg_len(nlh) < sizeof(*ndm))
2056 return -EINVAL;
2057
2058 ndm = nlmsg_data(nlh);
2059 if (ndm->ndm_ifindex == 0) {
2060 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
2061 return -EINVAL;
2062 }
2063
2064 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2065 if (dev == NULL) {
2066 pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
2067 return -ENODEV;
2068 }
2069
2070 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
2071 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
2072 pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n");
2073 return -EINVAL;
2074 }
2075
2076 addr = nla_data(llattr);
2077 err = -EOPNOTSUPP;
2078
2079 /* Support fdb on master device the net/bridge default case */
2080 if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
2081 (dev->priv_flags & IFF_BRIDGE_PORT)) {
2082 struct net_device *master = dev->master;
2083
2084 if (master->netdev_ops->ndo_fdb_del)
2085 err = master->netdev_ops->ndo_fdb_del(ndm, dev, addr);
2086
2087 if (err)
2088 goto out;
2089 else
2090 ndm->ndm_flags &= ~NTF_MASTER;
2091 }
2092
2093 /* Embedded bridge, macvlan, and any other device support */
2094 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
2095 err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr);
2096
2097 if (!err)
2098 ndm->ndm_flags &= ~NTF_SELF;
2099 }
2100out:
2101 return err;
2102}
2103
2104static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2105{
2106 int idx = 0;
2107 struct net *net = sock_net(skb->sk);
2108 struct net_device *dev;
2109
2110 rcu_read_lock();
2111 for_each_netdev_rcu(net, dev) {
2112 if (dev->priv_flags & IFF_BRIDGE_PORT) {
2113 struct net_device *master = dev->master;
2114 const struct net_device_ops *ops = master->netdev_ops;
2115
2116 if (ops->ndo_fdb_dump)
2117 idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
2118 }
2119
2120 if (dev->netdev_ops->ndo_fdb_dump)
2121 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
2122 }
2123 rcu_read_unlock();
2124
2125 cb->args[0] = idx;
2126 return skb->len;
2127}
2128
1981/* Protected by RTNL sempahore. */ 2129/* Protected by RTNL sempahore. */
1982static struct rtattr **rta_buf; 2130static struct rtattr **rta_buf;
1983static int rtattr_max; 2131static int rtattr_max;
@@ -2150,5 +2298,9 @@ void __init rtnetlink_init(void)
2150 2298
2151 rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); 2299 rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
2152 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); 2300 rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
2301
2302 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
2303 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
2304 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2153} 2305}
2154 2306