aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-03-05 17:59:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-05 18:49:42 -0500
commitd68b46fe16ad59b3a5f51ec73daaa5dc06753798 (patch)
tree4bae9ddc5f5bed853d1f0936d05c30e4f7ca924d
parentc415c3b47ea2754659d915cca387a20999044163 (diff)
vfork: make it killable
Make vfork() killable. Change do_fork(CLONE_VFORK) to do wait_for_completion_killable(). If it fails we do not return to the user-mode and never touch the memory shared with our child. However, in this case we should clear child->vfork_done before return, we use task_lock() in do_fork()->wait_for_vfork_done() and complete_vfork_done() to serialize with each other. Note: now that we use task_lock() we don't really need completion, we could turn task->vfork_done into "task_struct *wake_up_me" but this needs some complications. NOTE: this and the next patches do not affect in-kernel users of CLONE_VFORK, kernel threads run with all signals ignored including SIGKILL/SIGSTOP. However this is obviously the user-visible change. Not only a fatal signal can kill the vforking parent, a sub-thread can do execve or exit_group() and kill the thread sleeping in vfork(). Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/fork.c40
2 files changed, 33 insertions, 9 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1b25a37f2aee..b6467711f12e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2372,7 +2372,7 @@ static inline int thread_group_empty(struct task_struct *p)
2372 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring 2372 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
2373 * subscriptions and synchronises with wait4(). Also used in procfs. Also 2373 * subscriptions and synchronises with wait4(). Also used in procfs. Also
2374 * pins the final release of task.io_context. Also protects ->cpuset and 2374 * pins the final release of task.io_context. Also protects ->cpuset and
2375 * ->cgroup.subsys[]. 2375 * ->cgroup.subsys[]. And ->vfork_done.
2376 * 2376 *
2377 * Nests both inside and outside of read_lock(&tasklist_lock). 2377 * Nests both inside and outside of read_lock(&tasklist_lock).
2378 * It must not be nested with write_lock_irq(&tasklist_lock), 2378 * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/kernel/fork.c b/kernel/fork.c
index cf3d96379608..892c534ce6e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -670,10 +670,34 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
670 670
671void complete_vfork_done(struct task_struct *tsk) 671void complete_vfork_done(struct task_struct *tsk)
672{ 672{
673 struct completion *vfork_done = tsk->vfork_done; 673 struct completion *vfork;
674 674
675 tsk->vfork_done = NULL; 675 task_lock(tsk);
676 complete(vfork_done); 676 vfork = tsk->vfork_done;
677 if (likely(vfork)) {
678 tsk->vfork_done = NULL;
679 complete(vfork);
680 }
681 task_unlock(tsk);
682}
683
684static int wait_for_vfork_done(struct task_struct *child,
685 struct completion *vfork)
686{
687 int killed;
688
689 freezer_do_not_count();
690 killed = wait_for_completion_killable(vfork);
691 freezer_count();
692
693 if (killed) {
694 task_lock(child);
695 child->vfork_done = NULL;
696 task_unlock(child);
697 }
698
699 put_task_struct(child);
700 return killed;
677} 701}
678 702
679/* Please note the differences between mmput and mm_release. 703/* Please note the differences between mmput and mm_release.
@@ -717,7 +741,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
717 * If we're exiting normally, clear a user-space tid field if 741 * If we're exiting normally, clear a user-space tid field if
718 * requested. We leave this alone when dying by signal, to leave 742 * requested. We leave this alone when dying by signal, to leave
719 * the value intact in a core dump, and to save the unnecessary 743 * the value intact in a core dump, and to save the unnecessary
720 * trouble otherwise. Userland only wants this done for a sys_exit. 744 * trouble, say, a killed vfork parent shouldn't touch this mm.
745 * Userland only wants this done for a sys_exit.
721 */ 746 */
722 if (tsk->clear_child_tid) { 747 if (tsk->clear_child_tid) {
723 if (!(tsk->flags & PF_SIGNALED) && 748 if (!(tsk->flags & PF_SIGNALED) &&
@@ -1551,6 +1576,7 @@ long do_fork(unsigned long clone_flags,
1551 if (clone_flags & CLONE_VFORK) { 1576 if (clone_flags & CLONE_VFORK) {
1552 p->vfork_done = &vfork; 1577 p->vfork_done = &vfork;
1553 init_completion(&vfork); 1578 init_completion(&vfork);
1579 get_task_struct(p);
1554 } 1580 }
1555 1581
1556 /* 1582 /*
@@ -1568,10 +1594,8 @@ long do_fork(unsigned long clone_flags,
1568 ptrace_event(trace, nr); 1594 ptrace_event(trace, nr);
1569 1595
1570 if (clone_flags & CLONE_VFORK) { 1596 if (clone_flags & CLONE_VFORK) {
1571 freezer_do_not_count(); 1597 if (!wait_for_vfork_done(p, &vfork))
1572 wait_for_completion(&vfork); 1598 ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
1573 freezer_count();
1574 ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
1575 } 1599 }
1576 } else { 1600 } else {
1577 nr = PTR_ERR(p); 1601 nr = PTR_ERR(p);