diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-04-27 18:56:07 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-05-02 18:26:28 -0400 |
commit | e67f88dd12f610da98ca838822f2c9b4e7c6100e (patch) | |
tree | 6cf01b794984aaad97b6e6ff6e5103bc48d68191 /net | |
parent | dcfd9cdc1222f14d6180514e533289493a0716fb (diff) |
net: dont hold rtnl mutex during netlink dump callbacks
Four years ago, Patrick made a change to hold rtnl mutex during netlink
dump callbacks.
I believe it was a wrong move. This slows down concurrent dumps, making
good old /proc/net/ files faster than rtnetlink in some situations.
This occurred to me because one "ip link show dev ..." was _very_ slow
on a workload adding/removing network devices in background.
All dump callbacks are able to use RCU locking now, so this patch does
roughly a revert of commits :
1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks
6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks
This let writers fight for rtnl mutex and readers going full speed.
It also takes care of phonet : phonet_route_get() is now called from rcu
read section. I renamed it to phonet_route_get_rcu()
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Remi Denis-Courmont <remi.denis-courmont@nokia.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_netlink.c | 7 | ||||
-rw-r--r-- | net/core/fib_rules.c | 3 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 12 | ||||
-rw-r--r-- | net/decnet/dn_dev.c | 10 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 4 | ||||
-rw-r--r-- | net/phonet/pn_dev.c | 6 | ||||
-rw-r--r-- | net/phonet/pn_netlink.c | 4 |
7 files changed, 24 insertions, 22 deletions
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 134a2ff6b98b..ffb0dc4cc0e8 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c | |||
@@ -120,8 +120,9 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
120 | int idx; | 120 | int idx; |
121 | 121 | ||
122 | idx = 0; | 122 | idx = 0; |
123 | for_each_netdev(net, dev) { | 123 | rcu_read_lock(); |
124 | struct net_bridge_port *port = br_port_get_rtnl(dev); | 124 | for_each_netdev_rcu(net, dev) { |
125 | struct net_bridge_port *port = br_port_get_rcu(dev); | ||
125 | 126 | ||
126 | /* not a bridge port */ | 127 | /* not a bridge port */ |
127 | if (!port || idx < cb->args[0]) | 128 | if (!port || idx < cb->args[0]) |
@@ -135,7 +136,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
135 | skip: | 136 | skip: |
136 | ++idx; | 137 | ++idx; |
137 | } | 138 | } |
138 | 139 | rcu_read_unlock(); | |
139 | cb->args[0] = idx; | 140 | cb->args[0] = idx; |
140 | 141 | ||
141 | return skb->len; | 142 | return skb->len; |
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8248ebb5891d..3911586e12e4 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, | |||
590 | int idx = 0; | 590 | int idx = 0; |
591 | struct fib_rule *rule; | 591 | struct fib_rule *rule; |
592 | 592 | ||
593 | list_for_each_entry(rule, &ops->rules_list, list) { | 593 | rcu_read_lock(); |
594 | list_for_each_entry_rcu(rule, &ops->rules_list, list) { | ||
594 | if (idx < cb->args[1]) | 595 | if (idx < cb->args[1]) |
595 | goto skip; | 596 | goto skip; |
596 | 597 | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d7c4bb4b1820..296331257195 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
1007 | s_h = cb->args[0]; | 1007 | s_h = cb->args[0]; |
1008 | s_idx = cb->args[1]; | 1008 | s_idx = cb->args[1]; |
1009 | 1009 | ||
1010 | rcu_read_lock(); | ||
1010 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { | 1011 | for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { |
1011 | idx = 0; | 1012 | idx = 0; |
1012 | head = &net->dev_index_head[h]; | 1013 | head = &net->dev_index_head[h]; |
1013 | hlist_for_each_entry(dev, node, head, index_hlist) { | 1014 | hlist_for_each_entry_rcu(dev, node, head, index_hlist) { |
1014 | if (idx < s_idx) | 1015 | if (idx < s_idx) |
1015 | goto cont; | 1016 | goto cont; |
1016 | if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, | 1017 | if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, |
@@ -1023,6 +1024,7 @@ cont: | |||
1023 | } | 1024 | } |
1024 | } | 1025 | } |
1025 | out: | 1026 | out: |
1027 | rcu_read_unlock(); | ||
1026 | cb->args[1] = idx; | 1028 | cb->args[1] = idx; |
1027 | cb->args[0] = h; | 1029 | cb->args[0] = h; |
1028 | 1030 | ||
@@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
1879 | int min_len; | 1881 | int min_len; |
1880 | int family; | 1882 | int family; |
1881 | int type; | 1883 | int type; |
1882 | int err; | ||
1883 | 1884 | ||
1884 | type = nlh->nlmsg_type; | 1885 | type = nlh->nlmsg_type; |
1885 | if (type > RTM_MAX) | 1886 | if (type > RTM_MAX) |
@@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
1906 | if (dumpit == NULL) | 1907 | if (dumpit == NULL) |
1907 | return -EOPNOTSUPP; | 1908 | return -EOPNOTSUPP; |
1908 | 1909 | ||
1909 | __rtnl_unlock(); | ||
1910 | rtnl = net->rtnl; | 1910 | rtnl = net->rtnl; |
1911 | err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); | 1911 | return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); |
1912 | rtnl_lock(); | ||
1913 | return err; | ||
1914 | } | 1912 | } |
1915 | 1913 | ||
1916 | memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); | 1914 | memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); |
@@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net) | |||
1980 | { | 1978 | { |
1981 | struct sock *sk; | 1979 | struct sock *sk; |
1982 | sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, | 1980 | sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, |
1983 | rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); | 1981 | rtnetlink_rcv, NULL, THIS_MODULE); |
1984 | if (!sk) | 1982 | if (!sk) |
1985 | return -ENOMEM; | 1983 | return -ENOMEM; |
1986 | net->rtnl = sk; | 1984 | net->rtnl = sk; |
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0dcaa903e00e..404fa1591027 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c | |||
@@ -752,7 +752,8 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
752 | skip_naddr = cb->args[1]; | 752 | skip_naddr = cb->args[1]; |
753 | 753 | ||
754 | idx = 0; | 754 | idx = 0; |
755 | for_each_netdev(&init_net, dev) { | 755 | rcu_read_lock(); |
756 | for_each_netdev_rcu(&init_net, dev) { | ||
756 | if (idx < skip_ndevs) | 757 | if (idx < skip_ndevs) |
757 | goto cont; | 758 | goto cont; |
758 | else if (idx > skip_ndevs) { | 759 | else if (idx > skip_ndevs) { |
@@ -761,11 +762,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
761 | skip_naddr = 0; | 762 | skip_naddr = 0; |
762 | } | 763 | } |
763 | 764 | ||
764 | if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) | 765 | if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL) |
765 | goto cont; | 766 | goto cont; |
766 | 767 | ||
767 | for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa; | 768 | for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa; |
768 | ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) { | 769 | ifa = rcu_dereference(ifa->ifa_next), dn_idx++) { |
769 | if (dn_idx < skip_naddr) | 770 | if (dn_idx < skip_naddr) |
770 | continue; | 771 | continue; |
771 | 772 | ||
@@ -778,6 +779,7 @@ cont: | |||
778 | idx++; | 779 | idx++; |
779 | } | 780 | } |
780 | done: | 781 | done: |
782 | rcu_read_unlock(); | ||
781 | cb->args[0] = idx; | 783 | cb->args[0] = idx; |
782 | cb->args[1] = dn_idx; | 784 | cb->args[1] = dn_idx; |
783 | 785 | ||
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index dd88df0a5d7f..4076a0b14b20 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -394,10 +394,11 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | |||
394 | arg.net = net; | 394 | arg.net = net; |
395 | w->args = &arg; | 395 | w->args = &arg; |
396 | 396 | ||
397 | rcu_read_lock(); | ||
397 | for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { | 398 | for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { |
398 | e = 0; | 399 | e = 0; |
399 | head = &net->ipv6.fib_table_hash[h]; | 400 | head = &net->ipv6.fib_table_hash[h]; |
400 | hlist_for_each_entry(tb, node, head, tb6_hlist) { | 401 | hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { |
401 | if (e < s_e) | 402 | if (e < s_e) |
402 | goto next; | 403 | goto next; |
403 | res = fib6_dump_table(tb, skb, cb); | 404 | res = fib6_dump_table(tb, skb, cb); |
@@ -408,6 +409,7 @@ next: | |||
408 | } | 409 | } |
409 | } | 410 | } |
410 | out: | 411 | out: |
412 | rcu_read_unlock(); | ||
411 | cb->args[1] = e; | 413 | cb->args[1] = e; |
412 | cb->args[0] = h; | 414 | cb->args[0] = h; |
413 | 415 | ||
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 947038ddd04c..47b3452675b6 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c | |||
@@ -426,18 +426,14 @@ int phonet_route_del(struct net_device *dev, u8 daddr) | |||
426 | return 0; | 426 | return 0; |
427 | } | 427 | } |
428 | 428 | ||
429 | struct net_device *phonet_route_get(struct net *net, u8 daddr) | 429 | struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr) |
430 | { | 430 | { |
431 | struct phonet_net *pnn = phonet_pernet(net); | 431 | struct phonet_net *pnn = phonet_pernet(net); |
432 | struct phonet_routes *routes = &pnn->routes; | 432 | struct phonet_routes *routes = &pnn->routes; |
433 | struct net_device *dev; | 433 | struct net_device *dev; |
434 | 434 | ||
435 | ASSERT_RTNL(); /* no need to hold the device */ | ||
436 | |||
437 | daddr >>= 2; | 435 | daddr >>= 2; |
438 | rcu_read_lock(); | ||
439 | dev = rcu_dereference(routes->table[daddr]); | 436 | dev = rcu_dereference(routes->table[daddr]); |
440 | rcu_read_unlock(); | ||
441 | return dev; | 437 | return dev; |
442 | } | 438 | } |
443 | 439 | ||
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 58b3b1f991ed..438accb7a5a8 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c | |||
@@ -264,10 +264,11 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) | |||
264 | struct net *net = sock_net(skb->sk); | 264 | struct net *net = sock_net(skb->sk); |
265 | u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; | 265 | u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; |
266 | 266 | ||
267 | rcu_read_lock(); | ||
267 | for (addr = 0; addr < 64; addr++) { | 268 | for (addr = 0; addr < 64; addr++) { |
268 | struct net_device *dev; | 269 | struct net_device *dev; |
269 | 270 | ||
270 | dev = phonet_route_get(net, addr << 2); | 271 | dev = phonet_route_get_rcu(net, addr << 2); |
271 | if (!dev) | 272 | if (!dev) |
272 | continue; | 273 | continue; |
273 | 274 | ||
@@ -279,6 +280,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) | |||
279 | } | 280 | } |
280 | 281 | ||
281 | out: | 282 | out: |
283 | rcu_read_unlock(); | ||
282 | cb->args[0] = addr_idx; | 284 | cb->args[0] = addr_idx; |
283 | cb->args[1] = 0; | 285 | cb->args[1] = 0; |
284 | 286 | ||