diff options
author | Pavel Emelyanov <xemul@openvz.org> | 2007-10-19 02:39:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:37 -0400 |
commit | cf7b708c8d1d7a27736771bcf4c457b332b0f818 (patch) | |
tree | 10f80257b052313b283f18ddfe35145882e0b47f /kernel | |
parent | a6f5e06378970a2687332c2d54046245fcff1e7e (diff) |
Make access to task's nsproxy lighter
When someone wants to deal with some other taks's namespaces it has to lock
the task and then to get the desired namespace if the one exists. This is
slow on read-only paths and may be impossible in some cases.
E.g. Oleg recently noticed a race between unshare() and the (sent for
review in cgroups) pid namespaces - when the task notifies the parent it
has to know the parent's namespace, but taking the task_lock() is
impossible there - the code is under write locked tasklist lock.
On the other hand switching the namespace on task (daemonize) and releasing
the namespace (after the last task exit) is rather rare operation and we
can sacrifice its speed to solve the issues above.
The access to other task namespaces is proposed to be performed
like this:
rcu_read_lock();
nsproxy = task_nsproxy(tsk);
if (nsproxy != NULL) {
/ *
* work with the namespaces here
* e.g. get the reference on one of them
* /
} / *
* NULL task_nsproxy() means that this task is
* almost dead (zombie)
* /
rcu_read_unlock();
This patch has passed the review by Eric and Oleg :) and,
of course, tested.
[clg@fr.ibm.com: fix unshare()]
[ebiederm@xmission.com: Update get_net_ns_by_pid]
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | kernel/nsproxy.c | 40 |
3 files changed, 36 insertions, 22 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index d22aefabb129..db9764186d5a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -400,9 +400,10 @@ void daemonize(const char *name, ...) | |||
400 | current->fs = fs; | 400 | current->fs = fs; |
401 | atomic_inc(&fs->count); | 401 | atomic_inc(&fs->count); |
402 | 402 | ||
403 | exit_task_namespaces(current); | 403 | if (current->nsproxy != init_task.nsproxy) { |
404 | current->nsproxy = init_task.nsproxy; | 404 | get_nsproxy(init_task.nsproxy); |
405 | get_task_namespaces(current); | 405 | switch_task_namespaces(current, init_task.nsproxy); |
406 | } | ||
406 | 407 | ||
407 | exit_files(current); | 408 | exit_files(current); |
408 | current->files = init_task.files; | 409 | current->files = init_task.files; |
diff --git a/kernel/fork.c b/kernel/fork.c index 2deaf481efab..d2f4a420a5b9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1632,7 +1632,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1632 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | 1632 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; |
1633 | struct files_struct *fd, *new_fd = NULL; | 1633 | struct files_struct *fd, *new_fd = NULL; |
1634 | struct sem_undo_list *new_ulist = NULL; | 1634 | struct sem_undo_list *new_ulist = NULL; |
1635 | struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; | 1635 | struct nsproxy *new_nsproxy = NULL; |
1636 | 1636 | ||
1637 | check_unshare_flags(&unshare_flags); | 1637 | check_unshare_flags(&unshare_flags); |
1638 | 1638 | ||
@@ -1662,14 +1662,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1662 | 1662 | ||
1663 | if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { | 1663 | if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { |
1664 | 1664 | ||
1665 | task_lock(current); | ||
1666 | |||
1667 | if (new_nsproxy) { | 1665 | if (new_nsproxy) { |
1668 | old_nsproxy = current->nsproxy; | 1666 | switch_task_namespaces(current, new_nsproxy); |
1669 | current->nsproxy = new_nsproxy; | 1667 | new_nsproxy = NULL; |
1670 | new_nsproxy = old_nsproxy; | ||
1671 | } | 1668 | } |
1672 | 1669 | ||
1670 | task_lock(current); | ||
1671 | |||
1673 | if (new_fs) { | 1672 | if (new_fs) { |
1674 | fs = current->fs; | 1673 | fs = current->fs; |
1675 | current->fs = new_fs; | 1674 | current->fs = new_fs; |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index e981c61304f1..c8ef7c2992ed 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep; | |||
26 | 26 | ||
27 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); | 27 | struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); |
28 | 28 | ||
29 | static inline void get_nsproxy(struct nsproxy *ns) | ||
30 | { | ||
31 | atomic_inc(&ns->count); | ||
32 | } | ||
33 | |||
34 | void get_task_namespaces(struct task_struct *tsk) | ||
35 | { | ||
36 | struct nsproxy *ns = tsk->nsproxy; | ||
37 | if (ns) { | ||
38 | get_nsproxy(ns); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | /* | 29 | /* |
43 | * creates a copy of "orig" with refcount 1. | 30 | * creates a copy of "orig" with refcount 1. |
44 | */ | 31 | */ |
@@ -216,6 +203,33 @@ out: | |||
216 | return err; | 203 | return err; |
217 | } | 204 | } |
218 | 205 | ||
206 | void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) | ||
207 | { | ||
208 | struct nsproxy *ns; | ||
209 | |||
210 | might_sleep(); | ||
211 | |||
212 | ns = p->nsproxy; | ||
213 | |||
214 | rcu_assign_pointer(p->nsproxy, new); | ||
215 | |||
216 | if (ns && atomic_dec_and_test(&ns->count)) { | ||
217 | /* | ||
218 | * wait for others to get what they want from this nsproxy. | ||
219 | * | ||
220 | * cannot release this nsproxy via the call_rcu() since | ||
221 | * put_mnt_ns() will want to sleep | ||
222 | */ | ||
223 | synchronize_rcu(); | ||
224 | free_nsproxy(ns); | ||
225 | } | ||
226 | } | ||
227 | |||
228 | void exit_task_namespaces(struct task_struct *p) | ||
229 | { | ||
230 | switch_task_namespaces(p, NULL); | ||
231 | } | ||
232 | |||
219 | static int __init nsproxy_cache_init(void) | 233 | static int __init nsproxy_cache_init(void) |
220 | { | 234 | { |
221 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); | 235 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); |