aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2015-04-16 15:47:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-17 09:04:06 -0400
commit35f71bc0a09a45924bed268d8ccd0d3407bc476f (patch)
treea117a8c06e82bcf75ccfeb69d25f3f11dd2bb9cd
parent69828dce7af2cb6d08ef5a03de687d422fb7ec1f (diff)
fork: report pid reservation failure properly
copy_process will report any failure in alloc_pid as ENOMEM currently which is misleading because the pid allocation might fail not only when the memory is short but also when the pid space is consumed already. The current man page even mentions this case: : EAGAIN : : A system-imposed limit on the number of threads was encountered. : There are a number of limits that may trigger this error: the : RLIMIT_NPROC soft resource limit (set via setrlimit(2)), which : limits the number of processes and threads for a real user ID, was : reached; the kernel's system-wide limit on the number of processes : and threads, /proc/sys/kernel/threads-max, was reached (see : proc(5)); or the maximum number of PIDs, /proc/sys/kernel/pid_max, : was reached (see proc(5)). so the current behavior is also incorrect wrt. documentation. POSIX man page also suggest returing EAGAIN when the process count limit is reached. This patch simply propagates error code from alloc_pid and makes sure we return -EAGAIN due to reservation failure. This will make behavior of fork closer to both our documentation and POSIX. alloc_pid might alsoo fail when the reaper in the pid namespace is dead (the namespace basically disallows all new processes) and there is no good error code which would match documented ones. We have traditionally returned ENOMEM for this case which is misleading as well but as per Eric W. Biederman this behavior is documented in man pid_namespaces(7) : If the "init" process of a PID namespace terminates, the kernel : terminates all of the processes in the namespace via a SIGKILL signal. : This behavior reflects the fact that the "init" process is essential for : the correct operation of a PID namespace. In this case, a subsequent : fork(2) into this PID namespace will fail with the error ENOMEM; it is : not possible to create a new processes in a PID namespace whose "init" : process has terminated. and introducing a new error code would be too risky so let's stick to ENOMEM for this case. Signed-off-by: Michal Hocko <mhocko@suse.cz> Cc: Oleg Nesterov <oleg@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/pid.c15
2 files changed, 11 insertions, 9 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index f2c1e7352298..d778016ac1e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1403,10 +1403,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1403 goto bad_fork_cleanup_io; 1403 goto bad_fork_cleanup_io;
1404 1404
1405 if (pid != &init_struct_pid) { 1405 if (pid != &init_struct_pid) {
1406 retval = -ENOMEM;
1407 pid = alloc_pid(p->nsproxy->pid_ns_for_children); 1406 pid = alloc_pid(p->nsproxy->pid_ns_for_children);
1408 if (!pid) 1407 if (IS_ERR(pid)) {
1408 retval = PTR_ERR(pid);
1409 goto bad_fork_cleanup_io; 1409 goto bad_fork_cleanup_io;
1410 }
1410 } 1411 }
1411 1412
1412 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1413 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
diff --git a/kernel/pid.c b/kernel/pid.c
index cd36a5e0d173..4fd07d5b7baf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -182,7 +182,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
182 spin_unlock_irq(&pidmap_lock); 182 spin_unlock_irq(&pidmap_lock);
183 kfree(page); 183 kfree(page);
184 if (unlikely(!map->page)) 184 if (unlikely(!map->page))
185 break; 185 return -ENOMEM;
186 } 186 }
187 if (likely(atomic_read(&map->nr_free))) { 187 if (likely(atomic_read(&map->nr_free))) {
188 for ( ; ; ) { 188 for ( ; ; ) {
@@ -210,7 +210,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
210 } 210 }
211 pid = mk_pid(pid_ns, map, offset); 211 pid = mk_pid(pid_ns, map, offset);
212 } 212 }
213 return -1; 213 return -EAGAIN;
214} 214}
215 215
216int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) 216int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
@@ -301,17 +301,20 @@ struct pid *alloc_pid(struct pid_namespace *ns)
301 int i, nr; 301 int i, nr;
302 struct pid_namespace *tmp; 302 struct pid_namespace *tmp;
303 struct upid *upid; 303 struct upid *upid;
304 int retval = -ENOMEM;
304 305
305 pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); 306 pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
306 if (!pid) 307 if (!pid)
307 goto out; 308 return ERR_PTR(retval);
308 309
309 tmp = ns; 310 tmp = ns;
310 pid->level = ns->level; 311 pid->level = ns->level;
311 for (i = ns->level; i >= 0; i--) { 312 for (i = ns->level; i >= 0; i--) {
312 nr = alloc_pidmap(tmp); 313 nr = alloc_pidmap(tmp);
313 if (nr < 0) 314 if (IS_ERR_VALUE(nr)) {
315 retval = nr;
314 goto out_free; 316 goto out_free;
317 }
315 318
316 pid->numbers[i].nr = nr; 319 pid->numbers[i].nr = nr;
317 pid->numbers[i].ns = tmp; 320 pid->numbers[i].ns = tmp;
@@ -339,7 +342,6 @@ struct pid *alloc_pid(struct pid_namespace *ns)
339 } 342 }
340 spin_unlock_irq(&pidmap_lock); 343 spin_unlock_irq(&pidmap_lock);
341 344
342out:
343 return pid; 345 return pid;
344 346
345out_unlock: 347out_unlock:
@@ -351,8 +353,7 @@ out_free:
351 free_pidmap(pid->numbers + i); 353 free_pidmap(pid->numbers + i);
352 354
353 kmem_cache_free(ns->pid_cachep, pid); 355 kmem_cache_free(ns->pid_cachep, pid);
354 pid = NULL; 356 return ERR_PTR(retval);
355 goto out;
356} 357}
357 358
358void disable_pid_allocation(struct pid_namespace *ns) 359void disable_pid_allocation(struct pid_namespace *ns)