aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@free.fr>2012-03-28 17:42:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 20:14:36 -0400
commitcf3f89214ef6a33fad60856bc5ffd7bb2fc4709b (patch)
treed6f5d7eb93bad10cd146a737a3a72e3459ec3e61 /include/linux
parent5a04cca6c39cdd0b8c75b0628da634248f381b62 (diff)
pidns: add reboot_pid_ns() to handle the reboot syscall
In the case of a child pid namespace, rebooting the system does not really makes sense. When the pid namespace is used in conjunction with the other namespaces in order to create a linux container, the reboot syscall leads to some problems. A container can reboot the host. That can be fixed by dropping the sys_reboot capability but we are unable to correctly to poweroff/ halt/reboot a container and the container stays stuck at the shutdown time with the container's init process waiting indefinitively. After several attempts, no solution from userspace was found to reliabily handle the shutdown from a container. This patch propose to make the init process of the child pid namespace to exit with a signal status set to : SIGINT if the child pid namespace called "halt/poweroff" and SIGHUP if the child pid namespace called "reboot". When the reboot syscall is called and we are not in the initial pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART", and "RESTART2". Otherwise we return EINVAL. Returning EINVAL is also an easy way to check if this feature is supported by the kernel when invoking another 'reboot' option like CAD. By this way the parent process of the child pid namespace knows if it rebooted or not and can take the right decision. Test case: ========== #include <alloca.h> #include <stdio.h> #include <sched.h> #include <unistd.h> #include <signal.h> #include <sys/reboot.h> #include <sys/types.h> #include <sys/wait.h> #include <linux/reboot.h> static int do_reboot(void *arg) { int *cmd = arg; if (reboot(*cmd)) printf("failed to reboot(%d): %m\n", *cmd); } int test_reboot(int cmd, int sig) { long stack_size = 4096; void *stack = alloca(stack_size) + stack_size; int status; pid_t ret; ret = clone(do_reboot, stack, CLONE_NEWPID | SIGCHLD, &cmd); if (ret < 0) { printf("failed to clone: %m\n"); return -1; } if (wait(&status) < 0) { printf("unexpected wait error: %m\n"); return -1; } if (!WIFSIGNALED(status)) { printf("child process exited but was not signaled\n"); return -1; } if (WTERMSIG(status) != sig) { printf("signal termination is not the one expected\n"); return -1; } return 0; } int main(int argc, char *argv[]) { int status; status = test_reboot(LINUX_REBOOT_CMD_RESTART, SIGHUP); if (status < 0) return 1; printf("reboot(LINUX_REBOOT_CMD_RESTART) succeed\n"); status = test_reboot(LINUX_REBOOT_CMD_RESTART2, SIGHUP); if (status < 0) return 1; printf("reboot(LINUX_REBOOT_CMD_RESTART2) succeed\n"); status = test_reboot(LINUX_REBOOT_CMD_HALT, SIGINT); if (status < 0) return 1; printf("reboot(LINUX_REBOOT_CMD_HALT) succeed\n"); status = test_reboot(LINUX_REBOOT_CMD_POWER_OFF, SIGINT); if (status < 0) return 1; printf("reboot(LINUX_REBOOT_CMD_POWERR_OFF) succeed\n"); status = test_reboot(LINUX_REBOOT_CMD_CAD_ON, -1); if (status >= 0) { printf("reboot(LINUX_REBOOT_CMD_CAD_ON) should have failed\n"); return 1; } printf("reboot(LINUX_REBOOT_CMD_CAD_ON) has failed as expected\n"); return 0; } [akpm@linux-foundation.org: tweak and add comments] [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr> Acked-by: Serge Hallyn <serge.hallyn@canonical.com> Tested-by: Serge Hallyn <serge.hallyn@canonical.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/pid_namespace.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f5bd679be46b..b067bd8c49d0 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -33,6 +33,7 @@ struct pid_namespace {
33#endif 33#endif
34 gid_t pid_gid; 34 gid_t pid_gid;
35 int hide_pid; 35 int hide_pid;
36 int reboot; /* group exit code if this pidns was rebooted */
36}; 37};
37 38
38extern struct pid_namespace init_pid_ns; 39extern struct pid_namespace init_pid_ns;
@@ -48,6 +49,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
48extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); 49extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
49extern void free_pid_ns(struct kref *kref); 50extern void free_pid_ns(struct kref *kref);
50extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); 51extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
52extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
51 53
52static inline void put_pid_ns(struct pid_namespace *ns) 54static inline void put_pid_ns(struct pid_namespace *ns)
53{ 55{
@@ -75,11 +77,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
75{ 77{
76} 78}
77 79
78
79static inline void zap_pid_ns_processes(struct pid_namespace *ns) 80static inline void zap_pid_ns_processes(struct pid_namespace *ns)
80{ 81{
81 BUG(); 82 BUG();
82} 83}
84
85static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
86{
87 return 0;
88}
83#endif /* CONFIG_PID_NS */ 89#endif /* CONFIG_PID_NS */
84 90
85extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); 91extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);