aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/net_namespace.c
diff options
context:
space:
mode:
authorKirill Tkhai <ktkhai@virtuozzo.com>2018-02-13 04:26:23 -0500
committerDavid S. Miller <davem@davemloft.net>2018-02-13 10:36:04 -0500
commit1a57feb847c56d6193f67d0e892c24e71f9e3ab1 (patch)
tree3e09866e7e939a6a350eb3385478260cfed4c101 /net/core/net_namespace.c
parent5ba049a5cc8e24a1643df75bbf65b4efa070fa74 (diff)
net: Introduce net_sem for protection of pernet_list
Currently, the mutex is mostly used to protect pernet operations list. It orders setup_net() and cleanup_net() with parallel {un,}register_pernet_operations() calls, so ->exit{,batch} methods of the same pernet operations are executed for a dying net, as were used to call ->init methods, even after the net namespace is unlinked from net_namespace_list in cleanup_net(). But there are several problems with scalability. The first one is that more than one net can't be created or destroyed at the same moment on the node. For big machines with many cpus running many containers it's very sensitive. The second one is that it's need to synchronize_rcu() after net is removed from net_namespace_list(): Destroy net_ns: cleanup_net() mutex_lock(&net_mutex) list_del_rcu(&net->list) synchronize_rcu() <--- Sleep there for ages list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list) list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list) mutex_unlock(&net_mutex) This primitive is not fast, especially on the systems with many processors and/or when preemptible RCU is enabled in config. So, all the time, while cleanup_net() is waiting for RCU grace period, creation of new net namespaces is not possible, the tasks, who makes it, are sleeping on the same mutex: Create net_ns: copy_net_ns() mutex_lock_killable(&net_mutex) <--- Sleep there for ages I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop with preemptible RCU enabled after CRIU tests round is finished. The solution is to convert net_mutex to the rw_semaphore and add fine grain locks to really small number of pernet_operations, what really need them. Then, pernet_operations::init/::exit methods, modifying the net-related data, will require down_read() locking only, while down_write() will be used for changing pernet_list (i.e., when modules are being loaded and unloaded). This gives signify performance increase, after all patch set is applied, like you may see here: %for i in {1..10000}; do unshare -n bash -c exit; done *before* real 1m40,377s user 0m9,672s sys 0m19,928s *after* real 0m17,007s user 0m5,311s sys 0m11,779 (5.8 times faster) This patch starts replacing net_mutex to net_sem. It adds rw_semaphore, describes the variables it protects, and makes to use, where appropriate. net_mutex is still present, and next patches will kick it out step-by-step. Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> Acked-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/net_namespace.c')
-rw-r--r--net/core/net_namespace.c39
1 files changed, 26 insertions, 13 deletions
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 81384386f91b..e89b2b7abd36 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -41,6 +41,11 @@ struct net init_net = {
41EXPORT_SYMBOL(init_net); 41EXPORT_SYMBOL(init_net);
42 42
43static bool init_net_initialized; 43static bool init_net_initialized;
44/*
45 * net_sem: protects: pernet_list, net_generic_ids,
46 * init_net_initialized and first_device pointer.
47 */
48DECLARE_RWSEM(net_sem);
44 49
45#define MIN_PERNET_OPS_ID \ 50#define MIN_PERNET_OPS_ID \
46 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) 51 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -286,7 +291,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
286 */ 291 */
287static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) 292static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
288{ 293{
289 /* Must be called with net_mutex held */ 294 /* Must be called with net_sem held */
290 const struct pernet_operations *ops, *saved_ops; 295 const struct pernet_operations *ops, *saved_ops;
291 int error = 0; 296 int error = 0;
292 LIST_HEAD(net_exit_list); 297 LIST_HEAD(net_exit_list);
@@ -418,12 +423,16 @@ struct net *copy_net_ns(unsigned long flags,
418 net->ucounts = ucounts; 423 net->ucounts = ucounts;
419 get_user_ns(user_ns); 424 get_user_ns(user_ns);
420 425
421 rv = mutex_lock_killable(&net_mutex); 426 rv = down_read_killable(&net_sem);
422 if (rv < 0) 427 if (rv < 0)
423 goto put_userns; 428 goto put_userns;
424 429 rv = mutex_lock_killable(&net_mutex);
430 if (rv < 0)
431 goto up_read;
425 rv = setup_net(net, user_ns); 432 rv = setup_net(net, user_ns);
426 mutex_unlock(&net_mutex); 433 mutex_unlock(&net_mutex);
434up_read:
435 up_read(&net_sem);
427 if (rv < 0) { 436 if (rv < 0) {
428put_userns: 437put_userns:
429 put_user_ns(user_ns); 438 put_user_ns(user_ns);
@@ -477,6 +486,7 @@ static void cleanup_net(struct work_struct *work)
477 list_replace_init(&cleanup_list, &net_kill_list); 486 list_replace_init(&cleanup_list, &net_kill_list);
478 spin_unlock_irq(&cleanup_list_lock); 487 spin_unlock_irq(&cleanup_list_lock);
479 488
489 down_read(&net_sem);
480 mutex_lock(&net_mutex); 490 mutex_lock(&net_mutex);
481 491
482 /* Don't let anyone else find us. */ 492 /* Don't let anyone else find us. */
@@ -517,6 +527,7 @@ static void cleanup_net(struct work_struct *work)
517 ops_free_list(ops, &net_exit_list); 527 ops_free_list(ops, &net_exit_list);
518 528
519 mutex_unlock(&net_mutex); 529 mutex_unlock(&net_mutex);
530 up_read(&net_sem);
520 531
521 /* Ensure there are no outstanding rcu callbacks using this 532 /* Ensure there are no outstanding rcu callbacks using this
522 * network namespace. 533 * network namespace.
@@ -543,8 +554,10 @@ static void cleanup_net(struct work_struct *work)
543 */ 554 */
544void net_ns_barrier(void) 555void net_ns_barrier(void)
545{ 556{
557 down_write(&net_sem);
546 mutex_lock(&net_mutex); 558 mutex_lock(&net_mutex);
547 mutex_unlock(&net_mutex); 559 mutex_unlock(&net_mutex);
560 up_write(&net_sem);
548} 561}
549EXPORT_SYMBOL(net_ns_barrier); 562EXPORT_SYMBOL(net_ns_barrier);
550 563
@@ -871,12 +884,12 @@ static int __init net_ns_init(void)
871 884
872 rcu_assign_pointer(init_net.gen, ng); 885 rcu_assign_pointer(init_net.gen, ng);
873 886
874 mutex_lock(&net_mutex); 887 down_write(&net_sem);
875 if (setup_net(&init_net, &init_user_ns)) 888 if (setup_net(&init_net, &init_user_ns))
876 panic("Could not setup the initial network namespace"); 889 panic("Could not setup the initial network namespace");
877 890
878 init_net_initialized = true; 891 init_net_initialized = true;
879 mutex_unlock(&net_mutex); 892 up_write(&net_sem);
880 893
881 register_pernet_subsys(&net_ns_ops); 894 register_pernet_subsys(&net_ns_ops);
882 895
@@ -1016,9 +1029,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
1016int register_pernet_subsys(struct pernet_operations *ops) 1029int register_pernet_subsys(struct pernet_operations *ops)
1017{ 1030{
1018 int error; 1031 int error;
1019 mutex_lock(&net_mutex); 1032 down_write(&net_sem);
1020 error = register_pernet_operations(first_device, ops); 1033 error = register_pernet_operations(first_device, ops);
1021 mutex_unlock(&net_mutex); 1034 up_write(&net_sem);
1022 return error; 1035 return error;
1023} 1036}
1024EXPORT_SYMBOL_GPL(register_pernet_subsys); 1037EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1034,9 +1047,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
1034 */ 1047 */
1035void unregister_pernet_subsys(struct pernet_operations *ops) 1048void unregister_pernet_subsys(struct pernet_operations *ops)
1036{ 1049{
1037 mutex_lock(&net_mutex); 1050 down_write(&net_sem);
1038 unregister_pernet_operations(ops); 1051 unregister_pernet_operations(ops);
1039 mutex_unlock(&net_mutex); 1052 up_write(&net_sem);
1040} 1053}
1041EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 1054EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1042 1055
@@ -1062,11 +1075,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1062int register_pernet_device(struct pernet_operations *ops) 1075int register_pernet_device(struct pernet_operations *ops)
1063{ 1076{
1064 int error; 1077 int error;
1065 mutex_lock(&net_mutex); 1078 down_write(&net_sem);
1066 error = register_pernet_operations(&pernet_list, ops); 1079 error = register_pernet_operations(&pernet_list, ops);
1067 if (!error && (first_device == &pernet_list)) 1080 if (!error && (first_device == &pernet_list))
1068 first_device = &ops->list; 1081 first_device = &ops->list;
1069 mutex_unlock(&net_mutex); 1082 up_write(&net_sem);
1070 return error; 1083 return error;
1071} 1084}
1072EXPORT_SYMBOL_GPL(register_pernet_device); 1085EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1082,11 +1095,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
1082 */ 1095 */
1083void unregister_pernet_device(struct pernet_operations *ops) 1096void unregister_pernet_device(struct pernet_operations *ops)
1084{ 1097{
1085 mutex_lock(&net_mutex); 1098 down_write(&net_sem);
1086 if (&ops->list == first_device) 1099 if (&ops->list == first_device)
1087 first_device = first_device->next; 1100 first_device = first_device->next;
1088 unregister_pernet_operations(ops); 1101 unregister_pernet_operations(ops);
1089 mutex_unlock(&net_mutex); 1102 up_write(&net_sem);
1090} 1103}
1091EXPORT_SYMBOL_GPL(unregister_pernet_device); 1104EXPORT_SYMBOL_GPL(unregister_pernet_device);
1092 1105