aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSukadev Bhattiprolu <sukadev@us.ibm.com>2006-12-08 05:38:01 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-08 11:28:52 -0500
commit84d737866e2babdeab0c6b18ea155c6a649663b8 (patch)
treee504da826174c2804d8c680828800aa680090686
parent6cc1b22a4acef3816eaa5f8c227d93d749b23195 (diff)
[PATCH] add child reaper to pid_namespace
Add a per pid_namespace child-reaper. This is needed so processes are reaped within the same pid space and do not spill over to the parent pid space. Its also needed so containers preserve existing semantic that pid == 1 would reap orphaned children. This is based on Eric Biederman's patch: http://lkml.org/lkml/2006/2/6/285 Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/exec.c5
-rw-r--r--include/linux/pid.h5
-rw-r--r--include/linux/pid_namespace.h6
-rw-r--r--include/linux/sched.h1
-rw-r--r--init/main.c5
-rw-r--r--kernel/exit.c23
-rw-r--r--kernel/pid.c3
-rw-r--r--kernel/signal.c11
8 files changed, 40 insertions, 19 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 60433e2254a4..12d8cd461b41 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -38,6 +38,7 @@
38#include <linux/binfmts.h> 38#include <linux/binfmts.h>
39#include <linux/swap.h> 39#include <linux/swap.h>
40#include <linux/utsname.h> 40#include <linux/utsname.h>
41#include <linux/pid_namespace.h>
41#include <linux/module.h> 42#include <linux/module.h>
42#include <linux/namei.h> 43#include <linux/namei.h>
43#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
@@ -620,8 +621,8 @@ static int de_thread(struct task_struct *tsk)
620 * Reparenting needs write_lock on tasklist_lock, 621 * Reparenting needs write_lock on tasklist_lock,
621 * so it is safe to do it under read_lock. 622 * so it is safe to do it under read_lock.
622 */ 623 */
623 if (unlikely(tsk->group_leader == child_reaper)) 624 if (unlikely(tsk->group_leader == child_reaper(tsk)))
624 child_reaper = tsk; 625 tsk->nsproxy->pid_ns->child_reaper = tsk;
625 626
626 zap_other_threads(tsk); 627 zap_other_threads(tsk);
627 read_unlock(&tasklist_lock); 628 read_unlock(&tasklist_lock);
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 2c0007d17218..4dec047b1837 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -35,8 +35,9 @@ enum pid_type
35 * 35 *
36 * Holding a reference to struct pid solves both of these problems. 36 * Holding a reference to struct pid solves both of these problems.
37 * It is small so holding a reference does not consume a lot of 37 * It is small so holding a reference does not consume a lot of
38 * resources, and since a new struct pid is allocated when the numeric 38 * resources, and since a new struct pid is allocated when the numeric pid
39 * pid value is reused we don't mistakenly refer to new processes. 39 * value is reused (when pids wrap around) we don't mistakenly refer to new
40 * processes.
40 */ 41 */
41 42
42struct pid 43struct pid
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 76e7c6b2cf33..d2a9d419f01f 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -19,6 +19,7 @@ struct pid_namespace {
19 struct kref kref; 19 struct kref kref;
20 struct pidmap pidmap[PIDMAP_ENTRIES]; 20 struct pidmap pidmap[PIDMAP_ENTRIES];
21 int last_pid; 21 int last_pid;
22 struct task_struct *child_reaper;
22}; 23};
23 24
24extern struct pid_namespace init_pid_ns; 25extern struct pid_namespace init_pid_ns;
@@ -36,4 +37,9 @@ static inline void put_pid_ns(struct pid_namespace *ns)
36 kref_put(&ns->kref, free_pid_ns); 37 kref_put(&ns->kref, free_pid_ns);
37} 38}
38 39
40static inline struct task_struct *child_reaper(struct task_struct *tsk)
41{
42 return tsk->nsproxy->pid_ns->child_reaper;
43}
44
39#endif /* _LINUX_PID_NS_H */ 45#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fec1d419714..f0317edea141 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,7 +1400,6 @@ extern NORET_TYPE void do_group_exit(int);
1400extern void daemonize(const char *, ...); 1400extern void daemonize(const char *, ...);
1401extern int allow_signal(int); 1401extern int allow_signal(int);
1402extern int disallow_signal(int); 1402extern int disallow_signal(int);
1403extern struct task_struct *child_reaper;
1404 1403
1405extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); 1404extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
1406extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); 1405extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
diff --git a/init/main.c b/init/main.c
index 4cdcd06e6d78..036f97c0c34c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -51,6 +51,7 @@
51#include <linux/debug_locks.h> 51#include <linux/debug_locks.h>
52#include <linux/lockdep.h> 52#include <linux/lockdep.h>
53#include <linux/utsrelease.h> 53#include <linux/utsrelease.h>
54#include <linux/pid_namespace.h>
54#include <linux/compile.h> 55#include <linux/compile.h>
55 56
56#include <asm/io.h> 57#include <asm/io.h>
@@ -626,8 +627,6 @@ static int __init initcall_debug_setup(char *str)
626} 627}
627__setup("initcall_debug", initcall_debug_setup); 628__setup("initcall_debug", initcall_debug_setup);
628 629
629struct task_struct *child_reaper = &init_task;
630
631extern initcall_t __initcall_start[], __initcall_end[]; 630extern initcall_t __initcall_start[], __initcall_end[];
632 631
633static void __init do_initcalls(void) 632static void __init do_initcalls(void)
@@ -727,7 +726,7 @@ static int init(void * unused)
727 * assumptions about where in the task array this 726 * assumptions about where in the task array this
728 * can be found. 727 * can be found.
729 */ 728 */
730 child_reaper = current; 729 init_pid_ns.child_reaper = current;
731 730
732 cad_pid = task_pid(current); 731 cad_pid = task_pid(current);
733 732
diff --git a/kernel/exit.c b/kernel/exit.c
index 28d9feedfd27..fd0e067952ab 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -22,6 +22,7 @@
22#include <linux/file.h> 22#include <linux/file.h>
23#include <linux/binfmts.h> 23#include <linux/binfmts.h>
24#include <linux/nsproxy.h> 24#include <linux/nsproxy.h>
25#include <linux/pid_namespace.h>
25#include <linux/ptrace.h> 26#include <linux/ptrace.h>
26#include <linux/profile.h> 27#include <linux/profile.h>
27#include <linux/mount.h> 28#include <linux/mount.h>
@@ -48,7 +49,6 @@
48#include <asm/mmu_context.h> 49#include <asm/mmu_context.h>
49 50
50extern void sem_exit (void); 51extern void sem_exit (void);
51extern struct task_struct *child_reaper;
52 52
53static void exit_mm(struct task_struct * tsk); 53static void exit_mm(struct task_struct * tsk);
54 54
@@ -260,7 +260,8 @@ static int has_stopped_jobs(int pgrp)
260} 260}
261 261
262/** 262/**
263 * reparent_to_init - Reparent the calling kernel thread to the init task. 263 * reparent_to_init - Reparent the calling kernel thread to the init task
264 * of the pid space that the thread belongs to.
264 * 265 *
265 * If a kernel thread is launched as a result of a system call, or if 266 * If a kernel thread is launched as a result of a system call, or if
266 * it ever exits, it should generally reparent itself to init so that 267 * it ever exits, it should generally reparent itself to init so that
@@ -278,8 +279,8 @@ static void reparent_to_init(void)
278 ptrace_unlink(current); 279 ptrace_unlink(current);
279 /* Reparent to init */ 280 /* Reparent to init */
280 remove_parent(current); 281 remove_parent(current);
281 current->parent = child_reaper; 282 current->parent = child_reaper(current);
282 current->real_parent = child_reaper; 283 current->real_parent = child_reaper(current);
283 add_parent(current); 284 add_parent(current);
284 285
285 /* Set the exit signal to SIGCHLD so we signal init on exit */ 286 /* Set the exit signal to SIGCHLD so we signal init on exit */
@@ -662,7 +663,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
662 * When we die, we re-parent all our children. 663 * When we die, we re-parent all our children.
663 * Try to give them to another thread in our thread 664 * Try to give them to another thread in our thread
664 * group, and if no such member exists, give it to 665 * group, and if no such member exists, give it to
665 * the global child reaper process (ie "init") 666 * the child reaper process (ie "init") in our pid
667 * space.
666 */ 668 */
667static void 669static void
668forget_original_parent(struct task_struct *father, struct list_head *to_release) 670forget_original_parent(struct task_struct *father, struct list_head *to_release)
@@ -673,7 +675,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
673 do { 675 do {
674 reaper = next_thread(reaper); 676 reaper = next_thread(reaper);
675 if (reaper == father) { 677 if (reaper == father) {
676 reaper = child_reaper; 678 reaper = child_reaper(father);
677 break; 679 break;
678 } 680 }
679 } while (reaper->exit_state); 681 } while (reaper->exit_state);
@@ -859,8 +861,13 @@ fastcall NORET_TYPE void do_exit(long code)
859 panic("Aiee, killing interrupt handler!"); 861 panic("Aiee, killing interrupt handler!");
860 if (unlikely(!tsk->pid)) 862 if (unlikely(!tsk->pid))
861 panic("Attempted to kill the idle task!"); 863 panic("Attempted to kill the idle task!");
862 if (unlikely(tsk == child_reaper)) 864 if (unlikely(tsk == child_reaper(tsk))) {
863 panic("Attempted to kill init!"); 865 if (tsk->nsproxy->pid_ns != &init_pid_ns)
866 tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper;
867 else
868 panic("Attempted to kill init!");
869 }
870
864 871
865 if (unlikely(current->ptrace & PT_TRACE_EXIT)) { 872 if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
866 current->ptrace_message = code; 873 current->ptrace_message = code;
diff --git a/kernel/pid.c b/kernel/pid.c
index 1d9cc268b499..2efe9d8d367b 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -65,7 +65,8 @@ struct pid_namespace init_pid_ns = {
65 .pidmap = { 65 .pidmap = {
66 [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } 66 [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
67 }, 67 },
68 .last_pid = 0 68 .last_pid = 0,
69 .child_reaper = &init_task
69}; 70};
70 71
71/* 72/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 9eac4db60eda..1921ffdc5e77 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -24,6 +24,9 @@
24#include <linux/signal.h> 24#include <linux/signal.h>
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <linux/pid_namespace.h>
28#include <linux/nsproxy.h>
29
27#include <asm/param.h> 30#include <asm/param.h>
28#include <asm/uaccess.h> 31#include <asm/uaccess.h>
29#include <asm/unistd.h> 32#include <asm/unistd.h>
@@ -1877,8 +1880,12 @@ relock:
1877 if (sig_kernel_ignore(signr)) /* Default is nothing. */ 1880 if (sig_kernel_ignore(signr)) /* Default is nothing. */
1878 continue; 1881 continue;
1879 1882
1880 /* Init gets no signals it doesn't want. */ 1883 /*
1881 if (current == child_reaper) 1884 * Init of a pid space gets no signals it doesn't want from
1885 * within that pid space. It can of course get signals from
1886 * its parent pid space.
1887 */
1888 if (current == child_reaper(current))
1882 continue; 1889 continue;
1883 1890
1884 if (sig_kernel_stop(signr)) { 1891 if (sig_kernel_stop(signr)) {