aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-04-27 18:56:07 -0400
committerDavid S. Miller <davem@davemloft.net>2011-05-02 18:26:28 -0400
commite67f88dd12f610da98ca838822f2c9b4e7c6100e (patch)
tree6cf01b794984aaad97b6e6ff6e5103bc48d68191 /net
parentdcfd9cdc1222f14d6180514e533289493a0716fb (diff)
net: dont hold rtnl mutex during netlink dump callbacks
Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits : 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Patrick McHardy <kaber@trash.net> Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_netlink.c7
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/decnet/dn_dev.c10
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/phonet/pn_dev.c6
-rw-r--r--net/phonet/pn_netlink.c4
7 files changed, 24 insertions, 22 deletions
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 134a2ff6b98b..ffb0dc4cc0e8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
120 int idx; 120 int idx;
121 121
122 idx = 0; 122 idx = 0;
123 for_each_netdev(net, dev) { 123 rcu_read_lock();
124 struct net_bridge_port *port = br_port_get_rtnl(dev); 124 for_each_netdev_rcu(net, dev) {
125 struct net_bridge_port *port = br_port_get_rcu(dev);
125 126
126 /* not a bridge port */ 127 /* not a bridge port */
127 if (!port || idx < cb->args[0]) 128 if (!port || idx < cb->args[0])
@@ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
135skip: 136skip:
136 ++idx; 137 ++idx;
137 } 138 }
138 139 rcu_read_unlock();
139 cb->args[0] = idx; 140 cb->args[0] = idx;
140 141
141 return skb->len; 142 return skb->len;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 8248ebb5891d..3911586e12e4 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
590 int idx = 0; 590 int idx = 0;
591 struct fib_rule *rule; 591 struct fib_rule *rule;
592 592
593 list_for_each_entry(rule, &ops->rules_list, list) { 593 rcu_read_lock();
594 list_for_each_entry_rcu(rule, &ops->rules_list, list) {
594 if (idx < cb->args[1]) 595 if (idx < cb->args[1])
595 goto skip; 596 goto skip;
596 597
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4b1820..296331257195 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1007 s_h = cb->args[0]; 1007 s_h = cb->args[0];
1008 s_idx = cb->args[1]; 1008 s_idx = cb->args[1];
1009 1009
1010 rcu_read_lock();
1010 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1011 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1011 idx = 0; 1012 idx = 0;
1012 head = &net->dev_index_head[h]; 1013 head = &net->dev_index_head[h];
1013 hlist_for_each_entry(dev, node, head, index_hlist) { 1014 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1014 if (idx < s_idx) 1015 if (idx < s_idx)
1015 goto cont; 1016 goto cont;
1016 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1017 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -1023,6 +1024,7 @@ cont:
1023 } 1024 }
1024 } 1025 }
1025out: 1026out:
1027 rcu_read_unlock();
1026 cb->args[1] = idx; 1028 cb->args[1] = idx;
1027 cb->args[0] = h; 1029 cb->args[0] = h;
1028 1030
@@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1879 int min_len; 1881 int min_len;
1880 int family; 1882 int family;
1881 int type; 1883 int type;
1882 int err;
1883 1884
1884 type = nlh->nlmsg_type; 1885 type = nlh->nlmsg_type;
1885 if (type > RTM_MAX) 1886 if (type > RTM_MAX)
@@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1906 if (dumpit == NULL) 1907 if (dumpit == NULL)
1907 return -EOPNOTSUPP; 1908 return -EOPNOTSUPP;
1908 1909
1909 __rtnl_unlock();
1910 rtnl = net->rtnl; 1910 rtnl = net->rtnl;
1911 err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); 1911 return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
1912 rtnl_lock();
1913 return err;
1914 } 1912 }
1915 1913
1916 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); 1914 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
@@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
1980{ 1978{
1981 struct sock *sk; 1979 struct sock *sk;
1982 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, 1980 sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
1983 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); 1981 rtnetlink_rcv, NULL, THIS_MODULE);
1984 if (!sk) 1982 if (!sk)
1985 return -ENOMEM; 1983 return -ENOMEM;
1986 net->rtnl = sk; 1984 net->rtnl = sk;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0dcaa903e00e..404fa1591027 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
752 skip_naddr = cb->args[1]; 752 skip_naddr = cb->args[1];
753 753
754 idx = 0; 754 idx = 0;
755 for_each_netdev(&init_net, dev) { 755 rcu_read_lock();
756 for_each_netdev_rcu(&init_net, dev) {
756 if (idx < skip_ndevs) 757 if (idx < skip_ndevs)
757 goto cont; 758 goto cont;
758 else if (idx > skip_ndevs) { 759 else if (idx > skip_ndevs) {
@@ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
761 skip_naddr = 0; 762 skip_naddr = 0;
762 } 763 }
763 764
764 if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) 765 if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
765 goto cont; 766 goto cont;
766 767
767 for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; 768 for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
768 ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { 769 ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
769 if (dn_idx < skip_naddr) 770 if (dn_idx < skip_naddr)
770 continue; 771 continue;
771 772
@@ -778,6 +779,7 @@ cont:
778 idx++; 779 idx++;
779 } 780 }
780done: 781done:
782 rcu_read_unlock();
781 cb->args[0] = idx; 783 cb->args[0] = idx;
782 cb->args[1] = dn_idx; 784 cb->args[1] = dn_idx;
783 785
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index dd88df0a5d7f..4076a0b14b20 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
394 arg.net = net; 394 arg.net = net;
395 w->args = &arg; 395 w->args = &arg;
396 396
397 rcu_read_lock();
397 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { 398 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
398 e = 0; 399 e = 0;
399 head = &net->ipv6.fib_table_hash[h]; 400 head = &net->ipv6.fib_table_hash[h];
400 hlist_for_each_entry(tb, node, head, tb6_hlist) { 401 hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
401 if (e < s_e) 402 if (e < s_e)
402 goto next; 403 goto next;
403 res = fib6_dump_table(tb, skb, cb); 404 res = fib6_dump_table(tb, skb, cb);
@@ -408,6 +409,7 @@ next:
408 } 409 }
409 } 410 }
410out: 411out:
412 rcu_read_unlock();
411 cb->args[1] = e; 413 cb->args[1] = e;
412 cb->args[0] = h; 414 cb->args[0] = h;
413 415
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 947038ddd04c..47b3452675b6 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -426,18 +426,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
426 return 0; 426 return 0;
427} 427}
428 428
429struct net_device *phonet_route_get(struct net *net, u8 daddr) 429struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr)
430{ 430{
431 struct phonet_net *pnn = phonet_pernet(net); 431 struct phonet_net *pnn = phonet_pernet(net);
432 struct phonet_routes *routes = &pnn->routes; 432 struct phonet_routes *routes = &pnn->routes;
433 struct net_device *dev; 433 struct net_device *dev;
434 434
435 ASSERT_RTNL(); /* no need to hold the device */
436
437 daddr >>= 2; 435 daddr >>= 2;
438 rcu_read_lock();
439 dev = rcu_dereference(routes->table[daddr]); 436 dev = rcu_dereference(routes->table[daddr]);
440 rcu_read_unlock();
441 return dev; 437 return dev;
442} 438}
443 439
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 58b3b1f991ed..438accb7a5a8 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
264 struct net *net = sock_net(skb->sk); 264 struct net *net = sock_net(skb->sk);
265 u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; 265 u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
266 266
267 rcu_read_lock();
267 for (addr = 0; addr < 64; addr++) { 268 for (addr = 0; addr < 64; addr++) {
268 struct net_device *dev; 269 struct net_device *dev;
269 270
270 dev = phonet_route_get(net, addr << 2); 271 dev = phonet_route_get_rcu(net, addr << 2);
271 if (!dev) 272 if (!dev)
272 continue; 273 continue;
273 274
@@ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
279 } 280 }
280 281
281out: 282out:
283 rcu_read_unlock();
282 cb->args[0] = addr_idx; 284 cb->args[0] = addr_idx;
283 cb->args[1] = 0; 285 cb->args[1] = 0;
284 286