aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill Tkhai <ktkhai@virtuozzo.com>2018-03-29 12:20:32 -0400
committerDavid S. Miller <davem@davemloft.net>2018-03-29 13:47:53 -0400
commitf0b07bb151b098d291fd1fd71ef7a2df56fb124a (patch)
tree24f28ec5ec61e4b0950fef35da79853357a34afb
parent906edee91e79af5a348f1ad1b3f9b4b948db3db7 (diff)
net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high. When someone wants to iterate over alive net namespaces, he/she has no a possibility to do that without exclusive lock. But the exclusive rtnl_lock() in such places is overkill, and it just increases the contention. Yes, there is already for_each_net_rcu() in kernel, but it requires rcu_read_lock(), and this can't be sleepable. Also, sometimes it may be need really prevent net_namespace_list growth, so for_each_net_rcu() is not fit there. This patch introduces new rw_semaphore, which will be used instead of rtnl_mutex to protect net_namespace_list. It is sleepable and allows not-exclusive iterations over net namespaces list. It allows to stop using rtnl_lock() in several places (what is made in next patches) and makes less the time, we keep rtnl_mutex. Here we just add new lock, while the explanation of we can remove rtnl_lock() there are in next patches. Fine grained locks generally are better, then one big lock, so let's do that with net_namespace_list, while the situation allows that. Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c2
-rw-r--r--include/linux/rtnetlink.h1
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/fib_notifier.c2
-rw-r--r--net/core/net_namespace.c18
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--security/selinux/include/xfrm.h2
11 files changed, 37 insertions, 5 deletions
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 5a52ec77940a..cc2966380c0c 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -403,10 +403,12 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
403 * our feet 403 * our feet
404 */ 404 */
405 rtnl_lock(); 405 rtnl_lock();
406 down_read(&net_rwsem);
406 for_each_net(net) 407 for_each_net(net)
407 for_each_netdev(net, ndev) 408 for_each_netdev(net, ndev)
408 if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) 409 if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
409 add_netdev_ips(ib_dev, port, rdma_ndev, ndev); 410 add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
411 up_read(&net_rwsem);
410 rtnl_unlock(); 412 rtnl_unlock();
411} 413}
412 414
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c7d1e4689325..5225832bd6ff 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void);
37 37
38extern wait_queue_head_t netdev_unregistering_wq; 38extern wait_queue_head_t netdev_unregistering_wq;
39extern struct rw_semaphore pernet_ops_rwsem; 39extern struct rw_semaphore pernet_ops_rwsem;
40extern struct rw_semaphore net_rwsem;
40 41
41#ifdef CONFIG_PROVE_LOCKING 42#ifdef CONFIG_PROVE_LOCKING
42extern bool lockdep_rtnl_is_held(void); 43extern bool lockdep_rtnl_is_held(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1ab4f920f109..47e35cce3b64 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
291#endif 291#endif
292} 292}
293 293
294/* Protected by net_rwsem */
294#define for_each_net(VAR) \ 295#define for_each_net(VAR) \
295 list_for_each_entry(VAR, &net_namespace_list, list) 296 list_for_each_entry(VAR, &net_namespace_list, list)
296 297
diff --git a/net/core/dev.c b/net/core/dev.c
index e13807b5c84d..eca5458b2753 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
1629 goto unlock; 1629 goto unlock;
1630 if (dev_boot_phase) 1630 if (dev_boot_phase)
1631 goto unlock; 1631 goto unlock;
1632 down_read(&net_rwsem);
1632 for_each_net(net) { 1633 for_each_net(net) {
1633 for_each_netdev(net, dev) { 1634 for_each_netdev(net, dev) {
1634 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); 1635 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
@@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
1642 call_netdevice_notifier(nb, NETDEV_UP, dev); 1643 call_netdevice_notifier(nb, NETDEV_UP, dev);
1643 } 1644 }
1644 } 1645 }
1646 up_read(&net_rwsem);
1645 1647
1646unlock: 1648unlock:
1647 rtnl_unlock(); 1649 rtnl_unlock();
@@ -1664,6 +1666,7 @@ rollback:
1664 } 1666 }
1665 1667
1666outroll: 1668outroll:
1669 up_read(&net_rwsem);
1667 raw_notifier_chain_unregister(&netdev_chain, nb); 1670 raw_notifier_chain_unregister(&netdev_chain, nb);
1668 goto unlock; 1671 goto unlock;
1669} 1672}
@@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
1694 if (err) 1697 if (err)
1695 goto unlock; 1698 goto unlock;
1696 1699
1700 down_read(&net_rwsem);
1697 for_each_net(net) { 1701 for_each_net(net) {
1698 for_each_netdev(net, dev) { 1702 for_each_netdev(net, dev) {
1699 if (dev->flags & IFF_UP) { 1703 if (dev->flags & IFF_UP) {
@@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
1704 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); 1708 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1705 } 1709 }
1706 } 1710 }
1711 up_read(&net_rwsem);
1707unlock: 1712unlock:
1708 rtnl_unlock(); 1713 rtnl_unlock();
1709 return err; 1714 return err;
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..614b985c92a4 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void)
33 struct net *net; 33 struct net *net;
34 34
35 rtnl_lock(); 35 rtnl_lock();
36 down_read(&net_rwsem);
36 for_each_net(net) { 37 for_each_net(net) {
37 rcu_read_lock(); 38 rcu_read_lock();
38 list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { 39 list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
@@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void)
43 } 44 }
44 rcu_read_unlock(); 45 rcu_read_unlock();
45 } 46 }
47 up_read(&net_rwsem);
46 rtnl_unlock(); 48 rtnl_unlock();
47 49
48 return fib_seq; 50 return fib_seq;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b5796d17a302..7fdf321d4997 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list;
33LIST_HEAD(net_namespace_list); 33LIST_HEAD(net_namespace_list);
34EXPORT_SYMBOL_GPL(net_namespace_list); 34EXPORT_SYMBOL_GPL(net_namespace_list);
35 35
36/* Protects net_namespace_list. Nests iside rtnl_lock() */
37DECLARE_RWSEM(net_rwsem);
38EXPORT_SYMBOL_GPL(net_rwsem);
39
36struct net init_net = { 40struct net init_net = {
37 .count = REFCOUNT_INIT(1), 41 .count = REFCOUNT_INIT(1),
38 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), 42 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
@@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
309 if (error < 0) 313 if (error < 0)
310 goto out_undo; 314 goto out_undo;
311 } 315 }
312 rtnl_lock(); 316 down_write(&net_rwsem);
313 list_add_tail_rcu(&net->list, &net_namespace_list); 317 list_add_tail_rcu(&net->list, &net_namespace_list);
314 rtnl_unlock(); 318 up_write(&net_rwsem);
315out: 319out:
316 return error; 320 return error;
317 321
@@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last)
450 * and this work is the only process, that may delete 454 * and this work is the only process, that may delete
451 * a net from net_namespace_list. So, when the below 455 * a net from net_namespace_list. So, when the below
452 * is executing, the list may only grow. Thus, we do not 456 * is executing, the list may only grow. Thus, we do not
453 * use for_each_net_rcu() or rtnl_lock(). 457 * use for_each_net_rcu() or net_rwsem.
454 */ 458 */
455 for_each_net(tmp) { 459 for_each_net(tmp) {
456 int id; 460 int id;
@@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work)
485 down_read(&pernet_ops_rwsem); 489 down_read(&pernet_ops_rwsem);
486 490
487 /* Don't let anyone else find us. */ 491 /* Don't let anyone else find us. */
488 rtnl_lock(); 492 down_write(&net_rwsem);
489 llist_for_each_entry(net, net_kill_list, cleanup_list) 493 llist_for_each_entry(net, net_kill_list, cleanup_list)
490 list_del_rcu(&net->list); 494 list_del_rcu(&net->list);
491 /* Cache last net. After we unlock rtnl, no one new net 495 /* Cache last net. After we unlock rtnl, no one new net
@@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work)
499 * useless anyway, as netns_ids are destroyed there. 503 * useless anyway, as netns_ids are destroyed there.
500 */ 504 */
501 last = list_last_entry(&net_namespace_list, struct net, list); 505 last = list_last_entry(&net_namespace_list, struct net, list);
502 rtnl_unlock(); 506 up_write(&net_rwsem);
503 507
504 llist_for_each_entry(net, net_kill_list, cleanup_list) { 508 llist_for_each_entry(net, net_kill_list, cleanup_list) {
505 unhash_nsid(net, last); 509 unhash_nsid(net, last);
@@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list,
900 904
901 list_add_tail(&ops->list, list); 905 list_add_tail(&ops->list, list);
902 if (ops->init || (ops->id && ops->size)) { 906 if (ops->init || (ops->id && ops->size)) {
907 /* We held write locked pernet_ops_rwsem, and parallel
908 * setup_net() and cleanup_net() are not possible.
909 */
903 for_each_net(net) { 910 for_each_net(net) {
904 error = ops_init(ops, net); 911 error = ops_init(ops, net);
905 if (error) 912 if (error)
@@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
923 LIST_HEAD(net_exit_list); 930 LIST_HEAD(net_exit_list);
924 931
925 list_del(&ops->list); 932 list_del(&ops->list);
933 /* See comment in __register_pernet_operations() */
926 for_each_net(net) 934 for_each_net(net)
927 list_add_tail(&net->exit_list, &net_exit_list); 935 list_add_tail(&net->exit_list, &net_exit_list);
928 ops_exit_list(ops, &net_exit_list); 936 ops_exit_list(ops, &net_exit_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d3949789cef..e86b28482ca7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
418{ 418{
419 struct net *net; 419 struct net *net;
420 420
421 down_read(&net_rwsem);
421 for_each_net(net) { 422 for_each_net(net) {
422 __rtnl_kill_links(net, ops); 423 __rtnl_kill_links(net, ops);
423 } 424 }
425 up_read(&net_rwsem);
424 list_del(&ops->list); 426 list_del(&ops->list);
425} 427}
426EXPORT_SYMBOL_GPL(__rtnl_link_unregister); 428EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
@@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
438 for (;;) { 440 for (;;) {
439 unregistering = false; 441 unregistering = false;
440 rtnl_lock(); 442 rtnl_lock();
443 /* We held write locked pernet_ops_rwsem, and parallel
444 * setup_net() and cleanup_net() are not possible.
445 */
441 for_each_net(net) { 446 for_each_net(net) {
442 if (net->dev_unreg_count > 0) { 447 if (net->dev_unreg_count > 0) {
443 unregistering = true; 448 unregistering = true;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 705198de671d..370f9b7f051b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1764,12 +1764,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
1764 struct net *net; 1764 struct net *net;
1765 1765
1766 rtnl_lock(); 1766 rtnl_lock();
1767 down_read(&net_rwsem);
1767 for_each_net(net) { 1768 for_each_net(net) {
1768 if (atomic_read(&net->ct.count) == 0) 1769 if (atomic_read(&net->ct.count) == 0)
1769 continue; 1770 continue;
1770 __nf_ct_unconfirmed_destroy(net); 1771 __nf_ct_unconfirmed_destroy(net);
1771 nf_queue_nf_hook_drop(net); 1772 nf_queue_nf_hook_drop(net);
1772 } 1773 }
1774 up_read(&net_rwsem);
1773 rtnl_unlock(); 1775 rtnl_unlock();
1774 1776
1775 /* Need to wait for netns cleanup worker to finish, if its 1777 /* Need to wait for netns cleanup worker to finish, if its
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ef38e5aecd28..9746ee30a99b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2364,8 +2364,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
2364 __dp_destroy(dp); 2364 __dp_destroy(dp);
2365 2365
2366 rtnl_lock(); 2366 rtnl_lock();
2367 down_read(&net_rwsem);
2367 for_each_net(net) 2368 for_each_net(net)
2368 list_vports_from_net(net, dnet, &head); 2369 list_vports_from_net(net, dnet, &head);
2370 up_read(&net_rwsem);
2369 rtnl_unlock(); 2371 rtnl_unlock();
2370 2372
2371 /* Detach all vports from given namespace. */ 2373 /* Detach all vports from given namespace. */
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 9efbfc753347..544d7b62d7ca 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -349,11 +349,13 @@ void wireless_nlevent_flush(void)
349 349
350 ASSERT_RTNL(); 350 ASSERT_RTNL();
351 351
352 down_read(&net_rwsem);
352 for_each_net(net) { 353 for_each_net(net) {
353 while ((skb = skb_dequeue(&net->wext_nlevents))) 354 while ((skb = skb_dequeue(&net->wext_nlevents)))
354 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, 355 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
355 GFP_KERNEL); 356 GFP_KERNEL);
356 } 357 }
358 up_read(&net_rwsem);
357} 359}
358EXPORT_SYMBOL_GPL(wireless_nlevent_flush); 360EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
359 361
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1f173a7a4daa..31d66431be1e 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -48,8 +48,10 @@ static inline void selinux_xfrm_notify_policyload(void)
48 struct net *net; 48 struct net *net;
49 49
50 rtnl_lock(); 50 rtnl_lock();
51 down_read(&net_rwsem);
51 for_each_net(net) 52 for_each_net(net)
52 rt_genid_bump_all(net); 53 rt_genid_bump_all(net);
54 up_read(&net_rwsem);
53 rtnl_unlock(); 55 rtnl_unlock();
54} 56}
55#else 57#else