diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2012-12-21 23:27:12 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2012-12-25 19:10:05 -0500 |
commit | c876ad7682155958d0c9c27afe9017925c230d64 (patch) | |
tree | 926064bd7909f60daed3b6b963555e57cab7b520 /kernel | |
parent | 8382fcac1b813ad0a4e68a838fc7ae93fa39eda0 (diff) |
pidns: Stop pid allocation when init dies
Oleg pointed out that in a pid namespace the sequence.
- pid 1 becomes a zombie
- setns(thepidns), fork,...
- reaping pid 1.
- The injected processes exiting.
Can lead to processes attempting access their child reaper and
instead following a stale pointer.
That waitpid for init can return before all of the processes in
the pid namespace have exited is also unfortunate.
Avoid these problems by disabling the allocation of new pids in a pid
namespace when init dies, instead of when the last process in a pid
namespace is reaped.
Pointed-out-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/pid.c | 15 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 4 |
2 files changed, 16 insertions, 3 deletions
diff --git a/kernel/pid.c b/kernel/pid.c index 36aa02ff17d6..de9af600006f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid) | |||
270 | wake_up_process(ns->child_reaper); | 270 | wake_up_process(ns->child_reaper); |
271 | break; | 271 | break; |
272 | case 0: | 272 | case 0: |
273 | ns->nr_hashed = -1; | ||
274 | schedule_work(&ns->proc_work); | 273 | schedule_work(&ns->proc_work); |
275 | break; | 274 | break; |
276 | } | 275 | } |
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
319 | 318 | ||
320 | upid = pid->numbers + ns->level; | 319 | upid = pid->numbers + ns->level; |
321 | spin_lock_irq(&pidmap_lock); | 320 | spin_lock_irq(&pidmap_lock); |
322 | if (ns->nr_hashed < 0) | 321 | if (!(ns->nr_hashed & PIDNS_HASH_ADDING)) |
323 | goto out_unlock; | 322 | goto out_unlock; |
324 | for ( ; upid >= pid->numbers; --upid) { | 323 | for ( ; upid >= pid->numbers; --upid) { |
325 | hlist_add_head_rcu(&upid->pid_chain, | 324 | hlist_add_head_rcu(&upid->pid_chain, |
@@ -342,6 +341,13 @@ out_free: | |||
342 | goto out; | 341 | goto out; |
343 | } | 342 | } |
344 | 343 | ||
344 | void disable_pid_allocation(struct pid_namespace *ns) | ||
345 | { | ||
346 | spin_lock_irq(&pidmap_lock); | ||
347 | ns->nr_hashed &= ~PIDNS_HASH_ADDING; | ||
348 | spin_unlock_irq(&pidmap_lock); | ||
349 | } | ||
350 | |||
345 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) | 351 | struct pid *find_pid_ns(int nr, struct pid_namespace *ns) |
346 | { | 352 | { |
347 | struct hlist_node *elem; | 353 | struct hlist_node *elem; |
@@ -573,6 +579,9 @@ void __init pidhash_init(void) | |||
573 | 579 | ||
574 | void __init pidmap_init(void) | 580 | void __init pidmap_init(void) |
575 | { | 581 | { |
582 | /* Veryify no one has done anything silly */ | ||
583 | BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING); | ||
584 | |||
576 | /* bump default and minimum pid_max based on number of cpus */ | 585 | /* bump default and minimum pid_max based on number of cpus */ |
577 | pid_max = min(pid_max_max, max_t(int, pid_max, | 586 | pid_max = min(pid_max_max, max_t(int, pid_max, |
578 | PIDS_PER_CPU_DEFAULT * num_possible_cpus())); | 587 | PIDS_PER_CPU_DEFAULT * num_possible_cpus())); |
@@ -584,7 +593,7 @@ void __init pidmap_init(void) | |||
584 | /* Reserve PID 0. We never call free_pidmap(0) */ | 593 | /* Reserve PID 0. We never call free_pidmap(0) */ |
585 | set_bit(0, init_pid_ns.pidmap[0].page); | 594 | set_bit(0, init_pid_ns.pidmap[0].page); |
586 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 595 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
587 | init_pid_ns.nr_hashed = 1; | 596 | init_pid_ns.nr_hashed = PIDNS_HASH_ADDING; |
588 | 597 | ||
589 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, | 598 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
590 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); | 599 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index fdbd0cdf271a..c1c3dc1c6023 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns | |||
115 | ns->level = level; | 115 | ns->level = level; |
116 | ns->parent = get_pid_ns(parent_pid_ns); | 116 | ns->parent = get_pid_ns(parent_pid_ns); |
117 | ns->user_ns = get_user_ns(user_ns); | 117 | ns->user_ns = get_user_ns(user_ns); |
118 | ns->nr_hashed = PIDNS_HASH_ADDING; | ||
118 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | 119 | INIT_WORK(&ns->proc_work, proc_cleanup_work); |
119 | 120 | ||
120 | set_bit(0, ns->pidmap[0].page); | 121 | set_bit(0, ns->pidmap[0].page); |
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
181 | int rc; | 182 | int rc; |
182 | struct task_struct *task, *me = current; | 183 | struct task_struct *task, *me = current; |
183 | 184 | ||
185 | /* Don't allow any more processes into the pid namespace */ | ||
186 | disable_pid_allocation(pid_ns); | ||
187 | |||
184 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | 188 | /* Ignore SIGCHLD causing any terminated children to autoreap */ |
185 | spin_lock_irq(&me->sighand->siglock); | 189 | spin_lock_irq(&me->sighand->siglock); |
186 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | 190 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; |