aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorGreg Rose <gregory.v.rose@intel.com>2012-02-21 16:54:48 -0500
committerDavid S. Miller <davem@davemloft.net>2012-02-21 16:56:45 -0500
commit115c9b81928360d769a76c632bae62d15206a94a (patch)
treebbbe349bdf3803439cf3a757b9f2a839726348f5 /net
parent84338a6c9dbb6ff3de4749864020f8f25d86fc81 (diff)
rtnetlink: Fix problem with buffer allocation
Implement a new netlink attribute type IFLA_EXT_MASK. The mask is a 32 bit value that can be used to indicate to the kernel that certain extended ifinfo values are requested by the user application. At this time the only mask value defined is RTEXT_FILTER_VF to indicate that the user wants the ifinfo dump to send information about the VFs belonging to the interface. This patch fixes a bug in which certain applications do not have large enough buffers to accommodate the extra information returned by the kernel with large numbers of SR-IOV virtual functions. Those applications will not send the new netlink attribute with the interface info dump request netlink messages so they will not get unexpectedly large request buffers returned by the kernel. Modifies the rtnl_calcit function to traverse the list of net devices and compute the minimum buffer size that can hold the info dumps of all matching devices based upon the filter passed in via the new netlink attribute filter mask. If no filter mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE. With this change it is possible to add yet to be defined netlink attributes to the dump request which should make it fairly extensible in the future. Signed-off-by: Greg Rose <gregory.v.rose@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/rtnetlink.c78
1 files changed, 57 insertions, 21 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 65aebd450027..606a6e8f3671 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link {
60}; 60};
61 61
62static DEFINE_MUTEX(rtnl_mutex); 62static DEFINE_MUTEX(rtnl_mutex);
63static u16 min_ifinfo_dump_size;
64 63
65void rtnl_lock(void) 64void rtnl_lock(void)
66{ 65{
@@ -724,10 +723,11 @@ static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
724} 723}
725 724
726/* All VF info */ 725/* All VF info */
727static inline int rtnl_vfinfo_size(const struct net_device *dev) 726static inline int rtnl_vfinfo_size(const struct net_device *dev,
727 u32 ext_filter_mask)
728{ 728{
729 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { 729 if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
730 730 (ext_filter_mask & RTEXT_FILTER_VF)) {
731 int num_vfs = dev_num_vf(dev->dev.parent); 731 int num_vfs = dev_num_vf(dev->dev.parent);
732 size_t size = nla_total_size(sizeof(struct nlattr)); 732 size_t size = nla_total_size(sizeof(struct nlattr));
733 size += nla_total_size(num_vfs * sizeof(struct nlattr)); 733 size += nla_total_size(num_vfs * sizeof(struct nlattr));
@@ -766,7 +766,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
766 return port_self_size; 766 return port_self_size;
767} 767}
768 768
769static noinline size_t if_nlmsg_size(const struct net_device *dev) 769static noinline size_t if_nlmsg_size(const struct net_device *dev,
770 u32 ext_filter_mask)
770{ 771{
771 return NLMSG_ALIGN(sizeof(struct ifinfomsg)) 772 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
772 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ 773 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -784,8 +785,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
784 + nla_total_size(4) /* IFLA_MASTER */ 785 + nla_total_size(4) /* IFLA_MASTER */
785 + nla_total_size(1) /* IFLA_OPERSTATE */ 786 + nla_total_size(1) /* IFLA_OPERSTATE */
786 + nla_total_size(1) /* IFLA_LINKMODE */ 787 + nla_total_size(1) /* IFLA_LINKMODE */
787 + nla_total_size(4) /* IFLA_NUM_VF */ 788 + nla_total_size(ext_filter_mask
788 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ 789 & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
790 + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
789 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ 791 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
790 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ 792 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
791 + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ 793 + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
@@ -868,7 +870,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
868 870
869static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 871static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
870 int type, u32 pid, u32 seq, u32 change, 872 int type, u32 pid, u32 seq, u32 change,
871 unsigned int flags) 873 unsigned int flags, u32 ext_filter_mask)
872{ 874{
873 struct ifinfomsg *ifm; 875 struct ifinfomsg *ifm;
874 struct nlmsghdr *nlh; 876 struct nlmsghdr *nlh;
@@ -941,10 +943,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
941 goto nla_put_failure; 943 goto nla_put_failure;
942 copy_rtnl_link_stats64(nla_data(attr), stats); 944 copy_rtnl_link_stats64(nla_data(attr), stats);
943 945
944 if (dev->dev.parent) 946 if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
945 NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); 947 NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
946 948
947 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { 949 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
950 && (ext_filter_mask & RTEXT_FILTER_VF)) {
948 int i; 951 int i;
949 952
950 struct nlattr *vfinfo, *vf; 953 struct nlattr *vfinfo, *vf;
@@ -1048,6 +1051,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1048 struct net_device *dev; 1051 struct net_device *dev;
1049 struct hlist_head *head; 1052 struct hlist_head *head;
1050 struct hlist_node *node; 1053 struct hlist_node *node;
1054 struct nlattr *tb[IFLA_MAX+1];
1055 u32 ext_filter_mask = 0;
1051 1056
1052 s_h = cb->args[0]; 1057 s_h = cb->args[0];
1053 s_idx = cb->args[1]; 1058 s_idx = cb->args[1];
@@ -1055,6 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1055 rcu_read_lock(); 1060 rcu_read_lock();
1056 cb->seq = net->dev_base_seq; 1061 cb->seq = net->dev_base_seq;
1057 1062
1063 nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
1064 ifla_policy);
1065
1066 if (tb[IFLA_EXT_MASK])
1067 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1068
1058 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1069 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1059 idx = 0; 1070 idx = 0;
1060 head = &net->dev_index_head[h]; 1071 head = &net->dev_index_head[h];
@@ -1064,7 +1075,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1064 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1075 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
1065 NETLINK_CB(cb->skb).pid, 1076 NETLINK_CB(cb->skb).pid,
1066 cb->nlh->nlmsg_seq, 0, 1077 cb->nlh->nlmsg_seq, 0,
1067 NLM_F_MULTI) <= 0) 1078 NLM_F_MULTI,
1079 ext_filter_mask) <= 0)
1068 goto out; 1080 goto out;
1069 1081
1070 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1082 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1100,6 +1112,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
1100 [IFLA_VF_PORTS] = { .type = NLA_NESTED }, 1112 [IFLA_VF_PORTS] = { .type = NLA_NESTED },
1101 [IFLA_PORT_SELF] = { .type = NLA_NESTED }, 1113 [IFLA_PORT_SELF] = { .type = NLA_NESTED },
1102 [IFLA_AF_SPEC] = { .type = NLA_NESTED }, 1114 [IFLA_AF_SPEC] = { .type = NLA_NESTED },
1115 [IFLA_EXT_MASK] = { .type = NLA_U32 },
1103}; 1116};
1104EXPORT_SYMBOL(ifla_policy); 1117EXPORT_SYMBOL(ifla_policy);
1105 1118
@@ -1509,8 +1522,6 @@ errout:
1509 1522
1510 if (send_addr_notify) 1523 if (send_addr_notify)
1511 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); 1524 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1512 min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev),
1513 min_ifinfo_dump_size);
1514 1525
1515 return err; 1526 return err;
1516} 1527}
@@ -1842,6 +1853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1842 struct net_device *dev = NULL; 1853 struct net_device *dev = NULL;
1843 struct sk_buff *nskb; 1854 struct sk_buff *nskb;
1844 int err; 1855 int err;
1856 u32 ext_filter_mask = 0;
1845 1857
1846 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); 1858 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
1847 if (err < 0) 1859 if (err < 0)
@@ -1850,6 +1862,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1850 if (tb[IFLA_IFNAME]) 1862 if (tb[IFLA_IFNAME])
1851 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 1863 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
1852 1864
1865 if (tb[IFLA_EXT_MASK])
1866 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1867
1853 ifm = nlmsg_data(nlh); 1868 ifm = nlmsg_data(nlh);
1854 if (ifm->ifi_index > 0) 1869 if (ifm->ifi_index > 0)
1855 dev = __dev_get_by_index(net, ifm->ifi_index); 1870 dev = __dev_get_by_index(net, ifm->ifi_index);
@@ -1861,12 +1876,12 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1861 if (dev == NULL) 1876 if (dev == NULL)
1862 return -ENODEV; 1877 return -ENODEV;
1863 1878
1864 nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); 1879 nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
1865 if (nskb == NULL) 1880 if (nskb == NULL)
1866 return -ENOBUFS; 1881 return -ENOBUFS;
1867 1882
1868 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, 1883 err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
1869 nlh->nlmsg_seq, 0, 0); 1884 nlh->nlmsg_seq, 0, 0, ext_filter_mask);
1870 if (err < 0) { 1885 if (err < 0) {
1871 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 1886 /* -EMSGSIZE implies BUG in if_nlmsg_size */
1872 WARN_ON(err == -EMSGSIZE); 1887 WARN_ON(err == -EMSGSIZE);
@@ -1877,8 +1892,31 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1877 return err; 1892 return err;
1878} 1893}
1879 1894
1880static u16 rtnl_calcit(struct sk_buff *skb) 1895static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
1881{ 1896{
1897 struct net *net = sock_net(skb->sk);
1898 struct net_device *dev;
1899 struct nlattr *tb[IFLA_MAX+1];
1900 u32 ext_filter_mask = 0;
1901 u16 min_ifinfo_dump_size = 0;
1902
1903 nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy);
1904
1905 if (tb[IFLA_EXT_MASK])
1906 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1907
1908 if (!ext_filter_mask)
1909 return NLMSG_GOODSIZE;
1910 /*
1911 * traverse the list of net devices and compute the minimum
1912 * buffer size based upon the filter mask.
1913 */
1914 list_for_each_entry(dev, &net->dev_base_head, dev_list) {
1915 min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
1916 if_nlmsg_size(dev,
1917 ext_filter_mask));
1918 }
1919
1882 return min_ifinfo_dump_size; 1920 return min_ifinfo_dump_size;
1883} 1921}
1884 1922
@@ -1913,13 +1951,11 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1913 int err = -ENOBUFS; 1951 int err = -ENOBUFS;
1914 size_t if_info_size; 1952 size_t if_info_size;
1915 1953
1916 skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); 1954 skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
1917 if (skb == NULL) 1955 if (skb == NULL)
1918 goto errout; 1956 goto errout;
1919 1957
1920 min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); 1958 err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
1921
1922 err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
1923 if (err < 0) { 1959 if (err < 0) {
1924 /* -EMSGSIZE implies BUG in if_nlmsg_size() */ 1960 /* -EMSGSIZE implies BUG in if_nlmsg_size() */
1925 WARN_ON(err == -EMSGSIZE); 1961 WARN_ON(err == -EMSGSIZE);
@@ -1977,7 +2013,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1977 return -EOPNOTSUPP; 2013 return -EOPNOTSUPP;
1978 calcit = rtnl_get_calcit(family, type); 2014 calcit = rtnl_get_calcit(family, type);
1979 if (calcit) 2015 if (calcit)
1980 min_dump_alloc = calcit(skb); 2016 min_dump_alloc = calcit(skb, nlh);
1981 2017
1982 __rtnl_unlock(); 2018 __rtnl_unlock();
1983 rtnl = net->rtnl; 2019 rtnl = net->rtnl;