aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2012-12-21 23:27:12 -0500
committerEric W. Biederman <ebiederm@xmission.com>2012-12-25 19:10:05 -0500
commitc876ad7682155958d0c9c27afe9017925c230d64 (patch)
tree926064bd7909f60daed3b6b963555e57cab7b520
parent8382fcac1b813ad0a4e68a838fc7ae93fa39eda0 (diff)
pidns: Stop pid allocation when init dies
Oleg pointed out that in a pid namespace the sequence. - pid 1 becomes a zombie - setns(thepidns), fork,... - reaping pid 1. - The injected processes exiting. Can lead to processes attempting access their child reaper and instead following a stale pointer. That waitpid for init can return before all of the processes in the pid namespace have exited is also unfortunate. Avoid these problems by disabling the allocation of new pids in a pid namespace when init dies, instead of when the last process in a pid namespace is reaped. Pointed-out-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--include/linux/pid.h1
-rw-r--r--include/linux/pid_namespace.h4
-rw-r--r--kernel/pid.c15
-rw-r--r--kernel/pid_namespace.c4
4 files changed, 20 insertions, 4 deletions
diff --git a/include/linux/pid.h b/include/linux/pid.h
index b152d44fb181..2381c973d897 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
121 121
122extern struct pid *alloc_pid(struct pid_namespace *ns); 122extern struct pid *alloc_pid(struct pid_namespace *ns);
123extern void free_pid(struct pid *pid); 123extern void free_pid(struct pid *pid);
124extern void disable_pid_allocation(struct pid_namespace *ns);
124 125
125/* 126/*
126 * ns_of_pid() returns the pid namespace in which the specified pid was 127 * ns_of_pid() returns the pid namespace in which the specified pid was
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index bf285999273a..215e5e3dda10 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,7 +21,7 @@ struct pid_namespace {
21 struct kref kref; 21 struct kref kref;
22 struct pidmap pidmap[PIDMAP_ENTRIES]; 22 struct pidmap pidmap[PIDMAP_ENTRIES];
23 int last_pid; 23 int last_pid;
24 int nr_hashed; 24 unsigned int nr_hashed;
25 struct task_struct *child_reaper; 25 struct task_struct *child_reaper;
26 struct kmem_cache *pid_cachep; 26 struct kmem_cache *pid_cachep;
27 unsigned int level; 27 unsigned int level;
@@ -42,6 +42,8 @@ struct pid_namespace {
42 42
43extern struct pid_namespace init_pid_ns; 43extern struct pid_namespace init_pid_ns;
44 44
45#define PIDNS_HASH_ADDING (1U << 31)
46
45#ifdef CONFIG_PID_NS 47#ifdef CONFIG_PID_NS
46static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) 48static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
47{ 49{
diff --git a/kernel/pid.c b/kernel/pid.c
index 36aa02ff17d6..de9af600006f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
270 wake_up_process(ns->child_reaper); 270 wake_up_process(ns->child_reaper);
271 break; 271 break;
272 case 0: 272 case 0:
273 ns->nr_hashed = -1;
274 schedule_work(&ns->proc_work); 273 schedule_work(&ns->proc_work);
275 break; 274 break;
276 } 275 }
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
319 318
320 upid = pid->numbers + ns->level; 319 upid = pid->numbers + ns->level;
321 spin_lock_irq(&pidmap_lock); 320 spin_lock_irq(&pidmap_lock);
322 if (ns->nr_hashed < 0) 321 if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
323 goto out_unlock; 322 goto out_unlock;
324 for ( ; upid >= pid->numbers; --upid) { 323 for ( ; upid >= pid->numbers; --upid) {
325 hlist_add_head_rcu(&upid->pid_chain, 324 hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
342 goto out; 341 goto out;
343} 342}
344 343
344void disable_pid_allocation(struct pid_namespace *ns)
345{
346 spin_lock_irq(&pidmap_lock);
347 ns->nr_hashed &= ~PIDNS_HASH_ADDING;
348 spin_unlock_irq(&pidmap_lock);
349}
350
345struct pid *find_pid_ns(int nr, struct pid_namespace *ns) 351struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
346{ 352{
347 struct hlist_node *elem; 353 struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
573 579
574void __init pidmap_init(void) 580void __init pidmap_init(void)
575{ 581{
582 /* Veryify no one has done anything silly */
583 BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
584
576 /* bump default and minimum pid_max based on number of cpus */ 585 /* bump default and minimum pid_max based on number of cpus */
577 pid_max = min(pid_max_max, max_t(int, pid_max, 586 pid_max = min(pid_max_max, max_t(int, pid_max,
578 PIDS_PER_CPU_DEFAULT * num_possible_cpus())); 587 PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
584 /* Reserve PID 0. We never call free_pidmap(0) */ 593 /* Reserve PID 0. We never call free_pidmap(0) */
585 set_bit(0, init_pid_ns.pidmap[0].page); 594 set_bit(0, init_pid_ns.pidmap[0].page);
586 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 595 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
587 init_pid_ns.nr_hashed = 1; 596 init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
588 597
589 init_pid_ns.pid_cachep = KMEM_CACHE(pid, 598 init_pid_ns.pid_cachep = KMEM_CACHE(pid,
590 SLAB_HWCACHE_ALIGN | SLAB_PANIC); 599 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index fdbd0cdf271a..c1c3dc1c6023 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
115 ns->level = level; 115 ns->level = level;
116 ns->parent = get_pid_ns(parent_pid_ns); 116 ns->parent = get_pid_ns(parent_pid_ns);
117 ns->user_ns = get_user_ns(user_ns); 117 ns->user_ns = get_user_ns(user_ns);
118 ns->nr_hashed = PIDNS_HASH_ADDING;
118 INIT_WORK(&ns->proc_work, proc_cleanup_work); 119 INIT_WORK(&ns->proc_work, proc_cleanup_work);
119 120
120 set_bit(0, ns->pidmap[0].page); 121 set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
181 int rc; 182 int rc;
182 struct task_struct *task, *me = current; 183 struct task_struct *task, *me = current;
183 184
185 /* Don't allow any more processes into the pid namespace */
186 disable_pid_allocation(pid_ns);
187
184 /* Ignore SIGCHLD causing any terminated children to autoreap */ 188 /* Ignore SIGCHLD causing any terminated children to autoreap */
185 spin_lock_irq(&me->sighand->siglock); 189 spin_lock_irq(&me->sighand->siglock);
186 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; 190 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;