summaryrefslogtreecommitdiffstats
path: root/kernel/pid.c
diff options
context:
space:
mode:
authorChristian Brauner <christian@brauner.io>2019-05-24 06:43:51 -0400
committerChristian Brauner <christian@brauner.io>2019-06-28 06:17:55 -0400
commit32fcb426ec001cb6d5a4a195091a8486ea77e2df (patch)
tree69a4a06ba72d2d35b43806118e8474698f823b42 /kernel/pid.c
parent740378dc7834bc511ac1ecb2157696681d2a1d32 (diff)
pid: add pidfd_open()
This adds the pidfd_open() syscall. It allows a caller to retrieve pollable pidfds for a process which did not get created via CLONE_PIDFD, i.e. for a process that is created via traditional fork()/clone() calls that is only referenced by a PID: int pidfd = pidfd_open(1234, 0); ret = pidfd_send_signal(pidfd, SIGSTOP, NULL, 0); With the introduction of pidfds through CLONE_PIDFD it is possible to created pidfds at process creation time. However, a lot of processes get created with traditional PID-based calls such as fork() or clone() (without CLONE_PIDFD). For these processes a caller can currently not create a pollable pidfd. This is a problem for Android's low memory killer (LMK) and service managers such as systemd. Both are examples of tools that want to make use of pidfds to get reliable notification of process exit for non-parents (pidfd polling) and race-free signal sending (pidfd_send_signal()). They intend to switch to this API for process supervision/management as soon as possible. Having no way to get pollable pidfds from PID-only processes is one of the biggest blockers for them in adopting this api. With pidfd_open() making it possible to retrieve pidfds for PID-based processes we enable them to adopt this api. In line with Arnd's recent changes to consolidate syscall numbers across architectures, I have added the pidfd_open() syscall to all architectures at the same time. Signed-off-by: Christian Brauner <christian@brauner.io> Reviewed-by: David Howells <dhowells@redhat.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Kees Cook <keescook@chromium.org> Cc: Joel Fernandes (Google) <joel@joelfernandes.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Jann Horn <jannh@google.com> Cc: Andy Lutomirsky <luto@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Aleksa Sarai <cyphar@cyphar.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org
Diffstat (limited to 'kernel/pid.c')
-rw-r--r--kernel/pid.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/kernel/pid.c b/kernel/pid.c
index 6ce3a95968f7..8e6f50053364 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -37,6 +37,8 @@
37#include <linux/syscalls.h> 37#include <linux/syscalls.h>
38#include <linux/proc_ns.h> 38#include <linux/proc_ns.h>
39#include <linux/proc_fs.h> 39#include <linux/proc_fs.h>
40#include <linux/anon_inodes.h>
41#include <linux/sched/signal.h>
40#include <linux/sched/task.h> 42#include <linux/sched/task.h>
41#include <linux/idr.h> 43#include <linux/idr.h>
42 44
@@ -452,6 +454,73 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
452 return idr_get_next(&ns->idr, &nr); 454 return idr_get_next(&ns->idr, &nr);
453} 455}
454 456
457/**
458 * pidfd_create() - Create a new pid file descriptor.
459 *
460 * @pid: struct pid that the pidfd will reference
461 *
462 * This creates a new pid file descriptor with the O_CLOEXEC flag set.
463 *
464 * Note, that this function can only be called after the fd table has
465 * been unshared to avoid leaking the pidfd to the new process.
466 *
467 * Return: On success, a cloexec pidfd is returned.
468 * On error, a negative errno number will be returned.
469 */
470static int pidfd_create(struct pid *pid)
471{
472 int fd;
473
474 fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
475 O_RDWR | O_CLOEXEC);
476 if (fd < 0)
477 put_pid(pid);
478
479 return fd;
480}
481
482/**
483 * pidfd_open() - Open new pid file descriptor.
484 *
485 * @pid: pid for which to retrieve a pidfd
486 * @flags: flags to pass
487 *
488 * This creates a new pid file descriptor with the O_CLOEXEC flag set for
489 * the process identified by @pid. Currently, the process identified by
490 * @pid must be a thread-group leader. This restriction currently exists
491 * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
492 * be used with CLONE_THREAD) and pidfd polling (only supports thread group
493 * leaders).
494 *
495 * Return: On success, a cloexec pidfd is returned.
496 * On error, a negative errno number will be returned.
497 */
498SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
499{
500 int fd, ret;
501 struct pid *p;
502
503 if (flags)
504 return -EINVAL;
505
506 if (pid <= 0)
507 return -EINVAL;
508
509 p = find_get_pid(pid);
510 if (!p)
511 return -ESRCH;
512
513 ret = 0;
514 rcu_read_lock();
515 if (!pid_task(p, PIDTYPE_TGID))
516 ret = -EINVAL;
517 rcu_read_unlock();
518
519 fd = ret ?: pidfd_create(p);
520 put_pid(p);
521 return fd;
522}
523
455void __init pid_idr_init(void) 524void __init pid_idr_init(void)
456{ 525{
457 /* Verify no one has done anything silly: */ 526 /* Verify no one has done anything silly: */