aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@openvz.org>2007-10-19 02:39:54 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:37 -0400
commitcf7b708c8d1d7a27736771bcf4c457b332b0f818 (patch)
tree10f80257b052313b283f18ddfe35145882e0b47f /kernel
parenta6f5e06378970a2687332c2d54046245fcff1e7e (diff)
Make access to task's nsproxy lighter
When someone wants to deal with some other taks's namespaces it has to lock the task and then to get the desired namespace if the one exists. This is slow on read-only paths and may be impossible in some cases. E.g. Oleg recently noticed a race between unshare() and the (sent for review in cgroups) pid namespaces - when the task notifies the parent it has to know the parent's namespace, but taking the task_lock() is impossible there - the code is under write locked tasklist lock. On the other hand switching the namespace on task (daemonize) and releasing the namespace (after the last task exit) is rather rare operation and we can sacrifice its speed to solve the issues above. The access to other task namespaces is proposed to be performed like this: rcu_read_lock(); nsproxy = task_nsproxy(tsk); if (nsproxy != NULL) { / * * work with the namespaces here * e.g. get the reference on one of them * / } / * * NULL task_nsproxy() means that this task is * almost dead (zombie) * / rcu_read_unlock(); This patch has passed the review by Eric and Oleg :) and, of course, tested. [clg@fr.ibm.com: fix unshare()] [ebiederm@xmission.com: Update get_net_ns_by_pid] Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Serge Hallyn <serue@us.ibm.com> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/nsproxy.c40
3 files changed, 36 insertions, 22 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index d22aefabb129..db9764186d5a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -400,9 +400,10 @@ void daemonize(const char *name, ...)
400 current->fs = fs; 400 current->fs = fs;
401 atomic_inc(&fs->count); 401 atomic_inc(&fs->count);
402 402
403 exit_task_namespaces(current); 403 if (current->nsproxy != init_task.nsproxy) {
404 current->nsproxy = init_task.nsproxy; 404 get_nsproxy(init_task.nsproxy);
405 get_task_namespaces(current); 405 switch_task_namespaces(current, init_task.nsproxy);
406 }
406 407
407 exit_files(current); 408 exit_files(current);
408 current->files = init_task.files; 409 current->files = init_task.files;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2deaf481efab..d2f4a420a5b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1632,7 +1632,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1632 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; 1632 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1633 struct files_struct *fd, *new_fd = NULL; 1633 struct files_struct *fd, *new_fd = NULL;
1634 struct sem_undo_list *new_ulist = NULL; 1634 struct sem_undo_list *new_ulist = NULL;
1635 struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; 1635 struct nsproxy *new_nsproxy = NULL;
1636 1636
1637 check_unshare_flags(&unshare_flags); 1637 check_unshare_flags(&unshare_flags);
1638 1638
@@ -1662,14 +1662,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1662 1662
1663 if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { 1663 if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) {
1664 1664
1665 task_lock(current);
1666
1667 if (new_nsproxy) { 1665 if (new_nsproxy) {
1668 old_nsproxy = current->nsproxy; 1666 switch_task_namespaces(current, new_nsproxy);
1669 current->nsproxy = new_nsproxy; 1667 new_nsproxy = NULL;
1670 new_nsproxy = old_nsproxy;
1671 } 1668 }
1672 1669
1670 task_lock(current);
1671
1673 if (new_fs) { 1672 if (new_fs) {
1674 fs = current->fs; 1673 fs = current->fs;
1675 current->fs = new_fs; 1674 current->fs = new_fs;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index e981c61304f1..c8ef7c2992ed 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep;
26 26
27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
28 28
29static inline void get_nsproxy(struct nsproxy *ns)
30{
31 atomic_inc(&ns->count);
32}
33
34void get_task_namespaces(struct task_struct *tsk)
35{
36 struct nsproxy *ns = tsk->nsproxy;
37 if (ns) {
38 get_nsproxy(ns);
39 }
40}
41
42/* 29/*
43 * creates a copy of "orig" with refcount 1. 30 * creates a copy of "orig" with refcount 1.
44 */ 31 */
@@ -216,6 +203,33 @@ out:
216 return err; 203 return err;
217} 204}
218 205
206void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
207{
208 struct nsproxy *ns;
209
210 might_sleep();
211
212 ns = p->nsproxy;
213
214 rcu_assign_pointer(p->nsproxy, new);
215
216 if (ns && atomic_dec_and_test(&ns->count)) {
217 /*
218 * wait for others to get what they want from this nsproxy.
219 *
220 * cannot release this nsproxy via the call_rcu() since
221 * put_mnt_ns() will want to sleep
222 */
223 synchronize_rcu();
224 free_nsproxy(ns);
225 }
226}
227
228void exit_task_namespaces(struct task_struct *p)
229{
230 switch_task_namespaces(p, NULL);
231}
232
219static int __init nsproxy_cache_init(void) 233static int __init nsproxy_cache_init(void)
220{ 234{
221 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 235 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);