diff options
| author | Kirill Tkhai <ktkhai@virtuozzo.com> | 2018-03-29 12:20:32 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2018-03-29 13:47:53 -0400 |
| commit | f0b07bb151b098d291fd1fd71ef7a2df56fb124a (patch) | |
| tree | 24f28ec5ec61e4b0950fef35da79853357a34afb /net | |
| parent | 906edee91e79af5a348f1ad1b3f9b4b948db3db7 (diff) | |
net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.
This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.
Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/dev.c | 5 | ||||
| -rw-r--r-- | net/core/fib_notifier.c | 2 | ||||
| -rw-r--r-- | net/core/net_namespace.c | 18 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 5 | ||||
| -rw-r--r-- | net/netfilter/nf_conntrack_core.c | 2 | ||||
| -rw-r--r-- | net/openvswitch/datapath.c | 2 | ||||
| -rw-r--r-- | net/wireless/wext-core.c | 2 |
7 files changed, 31 insertions, 5 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index e13807b5c84d..eca5458b2753 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb) | |||
| 1629 | goto unlock; | 1629 | goto unlock; |
| 1630 | if (dev_boot_phase) | 1630 | if (dev_boot_phase) |
| 1631 | goto unlock; | 1631 | goto unlock; |
| 1632 | down_read(&net_rwsem); | ||
| 1632 | for_each_net(net) { | 1633 | for_each_net(net) { |
| 1633 | for_each_netdev(net, dev) { | 1634 | for_each_netdev(net, dev) { |
| 1634 | err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); | 1635 | err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); |
| @@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb) | |||
| 1642 | call_netdevice_notifier(nb, NETDEV_UP, dev); | 1643 | call_netdevice_notifier(nb, NETDEV_UP, dev); |
| 1643 | } | 1644 | } |
| 1644 | } | 1645 | } |
| 1646 | up_read(&net_rwsem); | ||
| 1645 | 1647 | ||
| 1646 | unlock: | 1648 | unlock: |
| 1647 | rtnl_unlock(); | 1649 | rtnl_unlock(); |
| @@ -1664,6 +1666,7 @@ rollback: | |||
| 1664 | } | 1666 | } |
| 1665 | 1667 | ||
| 1666 | outroll: | 1668 | outroll: |
| 1669 | up_read(&net_rwsem); | ||
| 1667 | raw_notifier_chain_unregister(&netdev_chain, nb); | 1670 | raw_notifier_chain_unregister(&netdev_chain, nb); |
| 1668 | goto unlock; | 1671 | goto unlock; |
| 1669 | } | 1672 | } |
| @@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) | |||
| 1694 | if (err) | 1697 | if (err) |
| 1695 | goto unlock; | 1698 | goto unlock; |
| 1696 | 1699 | ||
| 1700 | down_read(&net_rwsem); | ||
| 1697 | for_each_net(net) { | 1701 | for_each_net(net) { |
| 1698 | for_each_netdev(net, dev) { | 1702 | for_each_netdev(net, dev) { |
| 1699 | if (dev->flags & IFF_UP) { | 1703 | if (dev->flags & IFF_UP) { |
| @@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) | |||
| 1704 | call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); | 1708 | call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); |
| 1705 | } | 1709 | } |
| 1706 | } | 1710 | } |
| 1711 | up_read(&net_rwsem); | ||
| 1707 | unlock: | 1712 | unlock: |
| 1708 | rtnl_unlock(); | 1713 | rtnl_unlock(); |
| 1709 | return err; | 1714 | return err; |
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 0c048bdeb016..614b985c92a4 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c | |||
| @@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void) | |||
| 33 | struct net *net; | 33 | struct net *net; |
| 34 | 34 | ||
| 35 | rtnl_lock(); | 35 | rtnl_lock(); |
| 36 | down_read(&net_rwsem); | ||
| 36 | for_each_net(net) { | 37 | for_each_net(net) { |
| 37 | rcu_read_lock(); | 38 | rcu_read_lock(); |
| 38 | list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { | 39 | list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { |
| @@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void) | |||
| 43 | } | 44 | } |
| 44 | rcu_read_unlock(); | 45 | rcu_read_unlock(); |
| 45 | } | 46 | } |
| 47 | up_read(&net_rwsem); | ||
| 46 | rtnl_unlock(); | 48 | rtnl_unlock(); |
| 47 | 49 | ||
| 48 | return fib_seq; | 50 | return fib_seq; |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b5796d17a302..7fdf321d4997 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
| @@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list; | |||
| 33 | LIST_HEAD(net_namespace_list); | 33 | LIST_HEAD(net_namespace_list); |
| 34 | EXPORT_SYMBOL_GPL(net_namespace_list); | 34 | EXPORT_SYMBOL_GPL(net_namespace_list); |
| 35 | 35 | ||
| 36 | /* Protects net_namespace_list. Nests iside rtnl_lock() */ | ||
| 37 | DECLARE_RWSEM(net_rwsem); | ||
| 38 | EXPORT_SYMBOL_GPL(net_rwsem); | ||
| 39 | |||
| 36 | struct net init_net = { | 40 | struct net init_net = { |
| 37 | .count = REFCOUNT_INIT(1), | 41 | .count = REFCOUNT_INIT(1), |
| 38 | .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), | 42 | .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), |
| @@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) | |||
| 309 | if (error < 0) | 313 | if (error < 0) |
| 310 | goto out_undo; | 314 | goto out_undo; |
| 311 | } | 315 | } |
| 312 | rtnl_lock(); | 316 | down_write(&net_rwsem); |
| 313 | list_add_tail_rcu(&net->list, &net_namespace_list); | 317 | list_add_tail_rcu(&net->list, &net_namespace_list); |
| 314 | rtnl_unlock(); | 318 | up_write(&net_rwsem); |
| 315 | out: | 319 | out: |
| 316 | return error; | 320 | return error; |
| 317 | 321 | ||
| @@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last) | |||
| 450 | * and this work is the only process, that may delete | 454 | * and this work is the only process, that may delete |
| 451 | * a net from net_namespace_list. So, when the below | 455 | * a net from net_namespace_list. So, when the below |
| 452 | * is executing, the list may only grow. Thus, we do not | 456 | * is executing, the list may only grow. Thus, we do not |
| 453 | * use for_each_net_rcu() or rtnl_lock(). | 457 | * use for_each_net_rcu() or net_rwsem. |
| 454 | */ | 458 | */ |
| 455 | for_each_net(tmp) { | 459 | for_each_net(tmp) { |
| 456 | int id; | 460 | int id; |
| @@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work) | |||
| 485 | down_read(&pernet_ops_rwsem); | 489 | down_read(&pernet_ops_rwsem); |
| 486 | 490 | ||
| 487 | /* Don't let anyone else find us. */ | 491 | /* Don't let anyone else find us. */ |
| 488 | rtnl_lock(); | 492 | down_write(&net_rwsem); |
| 489 | llist_for_each_entry(net, net_kill_list, cleanup_list) | 493 | llist_for_each_entry(net, net_kill_list, cleanup_list) |
| 490 | list_del_rcu(&net->list); | 494 | list_del_rcu(&net->list); |
| 491 | /* Cache last net. After we unlock rtnl, no one new net | 495 | /* Cache last net. After we unlock rtnl, no one new net |
| @@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work) | |||
| 499 | * useless anyway, as netns_ids are destroyed there. | 503 | * useless anyway, as netns_ids are destroyed there. |
| 500 | */ | 504 | */ |
| 501 | last = list_last_entry(&net_namespace_list, struct net, list); | 505 | last = list_last_entry(&net_namespace_list, struct net, list); |
| 502 | rtnl_unlock(); | 506 | up_write(&net_rwsem); |
| 503 | 507 | ||
| 504 | llist_for_each_entry(net, net_kill_list, cleanup_list) { | 508 | llist_for_each_entry(net, net_kill_list, cleanup_list) { |
| 505 | unhash_nsid(net, last); | 509 | unhash_nsid(net, last); |
| @@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list, | |||
| 900 | 904 | ||
| 901 | list_add_tail(&ops->list, list); | 905 | list_add_tail(&ops->list, list); |
| 902 | if (ops->init || (ops->id && ops->size)) { | 906 | if (ops->init || (ops->id && ops->size)) { |
| 907 | /* We held write locked pernet_ops_rwsem, and parallel | ||
| 908 | * setup_net() and cleanup_net() are not possible. | ||
| 909 | */ | ||
| 903 | for_each_net(net) { | 910 | for_each_net(net) { |
| 904 | error = ops_init(ops, net); | 911 | error = ops_init(ops, net); |
| 905 | if (error) | 912 | if (error) |
| @@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) | |||
| 923 | LIST_HEAD(net_exit_list); | 930 | LIST_HEAD(net_exit_list); |
| 924 | 931 | ||
| 925 | list_del(&ops->list); | 932 | list_del(&ops->list); |
| 933 | /* See comment in __register_pernet_operations() */ | ||
| 926 | for_each_net(net) | 934 | for_each_net(net) |
| 927 | list_add_tail(&net->exit_list, &net_exit_list); | 935 | list_add_tail(&net->exit_list, &net_exit_list); |
| 928 | ops_exit_list(ops, &net_exit_list); | 936 | ops_exit_list(ops, &net_exit_list); |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d3949789cef..e86b28482ca7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
| @@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) | |||
| 418 | { | 418 | { |
| 419 | struct net *net; | 419 | struct net *net; |
| 420 | 420 | ||
| 421 | down_read(&net_rwsem); | ||
| 421 | for_each_net(net) { | 422 | for_each_net(net) { |
| 422 | __rtnl_kill_links(net, ops); | 423 | __rtnl_kill_links(net, ops); |
| 423 | } | 424 | } |
| 425 | up_read(&net_rwsem); | ||
| 424 | list_del(&ops->list); | 426 | list_del(&ops->list); |
| 425 | } | 427 | } |
| 426 | EXPORT_SYMBOL_GPL(__rtnl_link_unregister); | 428 | EXPORT_SYMBOL_GPL(__rtnl_link_unregister); |
| @@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void) | |||
| 438 | for (;;) { | 440 | for (;;) { |
| 439 | unregistering = false; | 441 | unregistering = false; |
| 440 | rtnl_lock(); | 442 | rtnl_lock(); |
| 443 | /* We held write locked pernet_ops_rwsem, and parallel | ||
| 444 | * setup_net() and cleanup_net() are not possible. | ||
| 445 | */ | ||
| 441 | for_each_net(net) { | 446 | for_each_net(net) { |
| 442 | if (net->dev_unreg_count > 0) { | 447 | if (net->dev_unreg_count > 0) { |
| 443 | unregistering = true; | 448 | unregistering = true; |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 705198de671d..370f9b7f051b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
| @@ -1764,12 +1764,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) | |||
| 1764 | struct net *net; | 1764 | struct net *net; |
| 1765 | 1765 | ||
| 1766 | rtnl_lock(); | 1766 | rtnl_lock(); |
| 1767 | down_read(&net_rwsem); | ||
| 1767 | for_each_net(net) { | 1768 | for_each_net(net) { |
| 1768 | if (atomic_read(&net->ct.count) == 0) | 1769 | if (atomic_read(&net->ct.count) == 0) |
| 1769 | continue; | 1770 | continue; |
| 1770 | __nf_ct_unconfirmed_destroy(net); | 1771 | __nf_ct_unconfirmed_destroy(net); |
| 1771 | nf_queue_nf_hook_drop(net); | 1772 | nf_queue_nf_hook_drop(net); |
| 1772 | } | 1773 | } |
| 1774 | up_read(&net_rwsem); | ||
| 1773 | rtnl_unlock(); | 1775 | rtnl_unlock(); |
| 1774 | 1776 | ||
| 1775 | /* Need to wait for netns cleanup worker to finish, if its | 1777 | /* Need to wait for netns cleanup worker to finish, if its |
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ef38e5aecd28..9746ee30a99b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c | |||
| @@ -2364,8 +2364,10 @@ static void __net_exit ovs_exit_net(struct net *dnet) | |||
| 2364 | __dp_destroy(dp); | 2364 | __dp_destroy(dp); |
| 2365 | 2365 | ||
| 2366 | rtnl_lock(); | 2366 | rtnl_lock(); |
| 2367 | down_read(&net_rwsem); | ||
| 2367 | for_each_net(net) | 2368 | for_each_net(net) |
| 2368 | list_vports_from_net(net, dnet, &head); | 2369 | list_vports_from_net(net, dnet, &head); |
| 2370 | up_read(&net_rwsem); | ||
| 2369 | rtnl_unlock(); | 2371 | rtnl_unlock(); |
| 2370 | 2372 | ||
| 2371 | /* Detach all vports from given namespace. */ | 2373 | /* Detach all vports from given namespace. */ |
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 9efbfc753347..544d7b62d7ca 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c | |||
| @@ -349,11 +349,13 @@ void wireless_nlevent_flush(void) | |||
| 349 | 349 | ||
| 350 | ASSERT_RTNL(); | 350 | ASSERT_RTNL(); |
| 351 | 351 | ||
| 352 | down_read(&net_rwsem); | ||
| 352 | for_each_net(net) { | 353 | for_each_net(net) { |
| 353 | while ((skb = skb_dequeue(&net->wext_nlevents))) | 354 | while ((skb = skb_dequeue(&net->wext_nlevents))) |
| 354 | rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, | 355 | rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, |
| 355 | GFP_KERNEL); | 356 | GFP_KERNEL); |
| 356 | } | 357 | } |
| 358 | up_read(&net_rwsem); | ||
| 357 | } | 359 | } |
| 358 | EXPORT_SYMBOL_GPL(wireless_nlevent_flush); | 360 | EXPORT_SYMBOL_GPL(wireless_nlevent_flush); |
| 359 | 361 | ||
