From 259e5e6c75a910f3b5e656151dc602f53f9d7548 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 12 Apr 2012 16:47:50 -0500 Subject: Add PR_{GET,SET}_NO_NEW_PRIVS to prevent execve from granting privs With this change, calling prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) disables privilege granting operations at execve-time. For example, a process will not be able to execute a setuid binary to change their uid or gid if this bit is set. The same is true for file capabilities. Additionally, LSM_UNSAFE_NO_NEW_PRIVS is defined to ensure that LSMs respect the requested behavior. To determine if the NO_NEW_PRIVS bit is set, a task may call prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); It returns 1 if set and 0 if it is not set. If any of the arguments are non-zero, it will return -1 and set errno to -EINVAL. (PR_SET_NO_NEW_PRIVS behaves similarly.) This functionality is desired for the proposed seccomp filter patch series. By using PR_SET_NO_NEW_PRIVS, it allows a task to modify the system call behavior for itself and its child tasks without being able to impact the behavior of a more privileged task. Another potential use is making certain privileged operations unprivileged. For example, chroot may be considered "safe" if it cannot affect privileged tasks. Note, this patch causes execve to fail when PR_SET_NO_NEW_PRIVS is set and AppArmor is in use. It is fixed in a subsequent patch. Signed-off-by: Andy Lutomirski Signed-off-by: Will Drewry Acked-by: Eric Paris Acked-by: Kees Cook v18: updated change desc v17: using new define values as per 3.4 Signed-off-by: James Morris --- include/linux/prctl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux/prctl.h') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index e0cfec2490aa..78b76e24cc7e 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -124,4 +124,19 @@ #define PR_SET_CHILD_SUBREAPER 36 #define PR_GET_CHILD_SUBREAPER 37 +/* + * If no_new_privs is set, then operations that grant new privileges (i.e. + * execve) will either fail or not grant them. This affects suid/sgid, + * file capabilities, and LSMs. + * + * Operations that merely manipulate or drop existing privileges (setresuid, + * capset, etc.) will still work. Drop those privileges if you want them gone. + * + * Changing LSM security domain is considered a new privilege. So, for example, + * asking selinux for a specific new context (e.g. with runcon) will result + * in execve returning -EPERM. + */ +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.2 From fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:45 -0700 Subject: c/r: prctl: extend PR_SET_MM to set up more mm_struct entries During checkpoint we dump whole process memory to a file and the dump includes process stack memory. But among stack data itself, the stack carries additional parameters such as command line arguments, environment data and auxiliary vector. So when we do restore procedure and once we've restored stack data itself we need to setup mm_struct::arg_start/end, env_start/end, so restored process would be able to find command line arguments and environment data it had at checkpoint time. The same applies to auxiliary vector. For this reason additional PR_SET_MM_(ARG_START | ARG_END | ENV_START | ENV_END | AUXV) codes are introduced. Signed-off-by: Cyrill Gorcunov Acked-by: Kees Cook Cc: Tejun Heo Cc: Andrew Vagin Cc: Serge Hallyn Cc: Pavel Emelyanov Cc: Vasiliy Kulikov Cc: KAMEZAWA Hiroyuki Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/prctl.h') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 78b76e24cc7e..18d84c4b42d8 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -113,6 +113,11 @@ # define PR_SET_MM_START_STACK 5 # define PR_SET_MM_START_BRK 6 # define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 /* * Set specific pid that is allowed to ptrace the current task. -- cgit v1.2.2 From b32dfe377102ce668775f8b6b1461f7ad428f8b6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:46 -0700 Subject: c/r: prctl: add ability to set new mm_struct::exe_file When we do restore we would like to have a way to setup a former mm_struct::exe_file so that /proc/pid/exe would point to the original executable file a process had at checkpoint time. For this the PR_SET_MM_EXE_FILE code is introduced. This option takes a file descriptor which will be set as a source for new /proc/$pid/exe symlink. Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE vmas present for current process, simply because this feature is a special to C/R and mm::num_exe_file_vmas become meaningless after that. To minimize the amount of transition the /proc/pid/exe symlink might have, this feature is implemented in one-shot manner. Thus once changed the symlink can't be changed again. This should help sysadmins to monitor the symlinks over all process running in a system. In particular one could make a snapshot of processes and ring alarm if there unexpected changes of /proc/pid/exe's in a system. Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and the caller must have CAP_SYS_RESOURCE capability granted, otherwise the request to change symlink will be rejected. Signed-off-by: Cyrill Gorcunov Reviewed-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: Pavel Emelyanov Cc: Kees Cook Cc: Tejun Heo Cc: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/prctl.h') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 18d84c4b42d8..711e0a30aacc 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -118,6 +118,7 @@ # define PR_SET_MM_ENV_START 10 # define PR_SET_MM_ENV_END 11 # define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 /* * Set specific pid that is allowed to ptrace the current task. -- cgit v1.2.2