diff options
author | Christian Brauner <christian@brauner.io> | 2019-04-07 15:18:11 -0400 |
---|---|---|
committer | Christian Brauner <christian@brauner.io> | 2019-05-07 08:31:04 -0400 |
commit | 43c6afee48d4d866d5eb984d3a5dbbc7d9b4e7bf (patch) | |
tree | 19fa00ede54949757cea719765db5ec86fb89f2a /samples | |
parent | 2151ad1b067275730de1b38c7257478cae47d29e (diff) |
samples: show race-free pidfd metadata access
This is a sample program showing userspace how to get race-free access
to process metadata from a pidfd. It is rather easy to do and userspace
can actually simply reuse code that currently parses a process's status
file in procfs.
The program can easily be extended into a generic helper suitable for
inclusion in a libc to make it even easier for userspace to gain metadata
access.
Since this came up in a discussion because this API is going to be used
in various service managers: A lot of programs will have a whitelist
seccomp filter that returns <some-errno> for all new syscalls. This
means that programs might get confused if CLONE_PIDFD works but the
later pidfd_send_signal() syscall doesn't. Hence, here's a ahead of
time check that pidfd_send_signal() is supported:
bool pidfd_send_signal_supported()
{
int procfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (procfd < 0)
return false;
/*
* A process is always allowed to signal itself so
* pidfd_send_signal() should never fail this test. If it does
* it must mean it is not available, blocked by an LSM, seccomp,
* or other.
*/
return pidfd_send_signal(procfd, 0, NULL, 0) == 0;
}
Signed-off-by: Christian Brauner <christian@brauner.io>
Co-developed-by: Jann Horn <jannh@google.com>
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/Makefile | 2 | ||||
-rw-r--r-- | samples/pidfd/Makefile | 6 | ||||
-rw-r--r-- | samples/pidfd/pidfd-metadata.c | 112 |
3 files changed, 119 insertions, 1 deletions
diff --git a/samples/Makefile b/samples/Makefile index b1142a958811..fadadb1c3b05 100644 --- a/samples/Makefile +++ b/samples/Makefile | |||
@@ -3,4 +3,4 @@ | |||
3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ | 3 | obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ |
4 | hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ | 4 | hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ |
5 | configfs/ connector/ v4l/ trace_printk/ \ | 5 | configfs/ connector/ v4l/ trace_printk/ \ |
6 | vfio-mdev/ statx/ qmi/ binderfs/ | 6 | vfio-mdev/ statx/ qmi/ binderfs/ pidfd/ |
diff --git a/samples/pidfd/Makefile b/samples/pidfd/Makefile new file mode 100644 index 000000000000..0ff97784177a --- /dev/null +++ b/samples/pidfd/Makefile | |||
@@ -0,0 +1,6 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | hostprogs-y := pidfd-metadata | ||
4 | always := $(hostprogs-y) | ||
5 | HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include | ||
6 | all: pidfd-metadata | ||
diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c new file mode 100644 index 000000000000..640f5f757c57 --- /dev/null +++ b/samples/pidfd/pidfd-metadata.c | |||
@@ -0,0 +1,112 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | |||
3 | #define _GNU_SOURCE | ||
4 | #include <err.h> | ||
5 | #include <errno.h> | ||
6 | #include <fcntl.h> | ||
7 | #include <inttypes.h> | ||
8 | #include <limits.h> | ||
9 | #include <sched.h> | ||
10 | #include <signal.h> | ||
11 | #include <stdio.h> | ||
12 | #include <stdlib.h> | ||
13 | #include <string.h> | ||
14 | #include <sys/stat.h> | ||
15 | #include <sys/syscall.h> | ||
16 | #include <sys/types.h> | ||
17 | #include <sys/wait.h> | ||
18 | #include <unistd.h> | ||
19 | |||
20 | #ifndef CLONE_PIDFD | ||
21 | #define CLONE_PIDFD 0x00001000 | ||
22 | #endif | ||
23 | |||
24 | static int do_child(void *args) | ||
25 | { | ||
26 | printf("%d\n", getpid()); | ||
27 | _exit(EXIT_SUCCESS); | ||
28 | } | ||
29 | |||
30 | static pid_t pidfd_clone(int flags, int *pidfd) | ||
31 | { | ||
32 | size_t stack_size = 1024; | ||
33 | char *stack[1024] = { 0 }; | ||
34 | |||
35 | #ifdef __ia64__ | ||
36 | return __clone2(do_child, stack, stack_size, flags | SIGCHLD, NULL, pidfd); | ||
37 | #else | ||
38 | return clone(do_child, stack + stack_size, flags | SIGCHLD, NULL, pidfd); | ||
39 | #endif | ||
40 | } | ||
41 | |||
42 | static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, | ||
43 | unsigned int flags) | ||
44 | { | ||
45 | return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); | ||
46 | } | ||
47 | |||
48 | static int pidfd_metadata_fd(pid_t pid, int pidfd) | ||
49 | { | ||
50 | int procfd, ret; | ||
51 | char path[100]; | ||
52 | |||
53 | snprintf(path, sizeof(path), "/proc/%d", pid); | ||
54 | procfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC); | ||
55 | if (procfd < 0) { | ||
56 | warn("Failed to open %s\n", path); | ||
57 | return -1; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Verify that the pid has not been recycled and our /proc/<pid> handle | ||
62 | * is still valid. | ||
63 | */ | ||
64 | ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0); | ||
65 | if (ret < 0) { | ||
66 | switch (errno) { | ||
67 | case EPERM: | ||
68 | /* Process exists, just not allowed to signal it. */ | ||
69 | break; | ||
70 | default: | ||
71 | warn("Failed to signal process\n"); | ||
72 | close(procfd); | ||
73 | procfd = -1; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | return procfd; | ||
78 | } | ||
79 | |||
80 | int main(int argc, char *argv[]) | ||
81 | { | ||
82 | int pidfd = 0, ret = EXIT_FAILURE; | ||
83 | char buf[4096] = { 0 }; | ||
84 | pid_t pid; | ||
85 | int procfd, statusfd; | ||
86 | ssize_t bytes; | ||
87 | |||
88 | pid = pidfd_clone(CLONE_PIDFD, &pidfd); | ||
89 | if (pid < 0) | ||
90 | exit(ret); | ||
91 | |||
92 | procfd = pidfd_metadata_fd(pid, pidfd); | ||
93 | close(pidfd); | ||
94 | if (procfd < 0) | ||
95 | goto out; | ||
96 | |||
97 | statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC); | ||
98 | close(procfd); | ||
99 | if (statusfd < 0) | ||
100 | goto out; | ||
101 | |||
102 | bytes = read(statusfd, buf, sizeof(buf)); | ||
103 | if (bytes > 0) | ||
104 | bytes = write(STDOUT_FILENO, buf, bytes); | ||
105 | close(statusfd); | ||
106 | ret = EXIT_SUCCESS; | ||
107 | |||
108 | out: | ||
109 | (void)wait(NULL); | ||
110 | |||
111 | exit(ret); | ||
112 | } | ||