aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2018-07-23 16:20:37 -0400
committerEric W. Biederman <ebiederm@xmission.com>2018-08-09 14:07:01 -0400
commitc3ad2c3b02e953ead2b8d52a0c9e70312930c3d0 (patch)
tree482a2618879775b852824e975b5c8e2ad7163745
parent924de3b8c9410c404c6eda7abffd282b97b3ff7f (diff)
signal: Don't restart fork when signals come in.
Wen Yang <wen.yang99@zte.com.cn> and majiang <ma.jiang@zte.com.cn> report that a periodic signal received during fork can cause fork to continually restart preventing an application from making progress. The code was being overly pessimistic. Fork needs to guarantee that a signal sent to multiple processes is logically delivered before the fork and just to the forking process or logically delivered after the fork to both the forking process and it's newly spawned child. For signals like periodic timers that are always delivered to a single process fork can safely complete and let them appear to logically delivered after the fork(). While examining this issue I also discovered that fork today will miss signals delivered to multiple processes during the fork and handled by another thread. Similarly the current code will also miss blocked signals that are delivered to multiple process, as those signals will not appear pending during fork. Add a list of each thread that is currently forking, and keep on that list a signal set that records all of the signals sent to multiple processes. When fork completes initialize the new processes shared_pending signal set with it. The calculate_sigpending function will see those signals and set TIF_SIGPENDING causing the new task to take the slow path to userspace to handle those signals. Making it appear as if those signals were received immediately after the fork. It is not possible to send real time signals to multiple processes and exceptions don't go to multiple processes, which means that that are no signals sent to multiple processes that require siginfo. This means it is safe to not bother collecting siginfo on signals sent during fork. The sigaction of a child of fork is initially the same as the sigaction of the parent process. So a signal the parent ignores the child will also initially ignore. Therefore it is safe to ignore signals sent to multiple processes and ignored by the forking process. Signals sent to only a single process or only a single thread and delivered during fork are treated as if they are received after the fork, and generally not dealt with. They won't cause any problems. V2: Added removal from the multiprocess list on failure. V3: Use -ERESTARTNOINTR directly V4: - Don't queue both SIGCONT and SIGSTOP - Initialize signal_struct.multiprocess in init_task - Move setting of shared_pending to before the new task is visible to signals. This prevents signals from comming in before shared_pending.signal is set to delayed.signal and being lost. V5: - rework list add and delete to account for idle threads v6: - Use sigdelsetmask when removing stop signals Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200447 Reported-by: Wen Yang <wen.yang99@zte.com.cn> and Reported-by: majiang <ma.jiang@zte.com.cn> Fixes: 4a2c7a7837da ("[PATCH] make fork() atomic wrt pgrp/session signals") Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--include/linux/sched/signal.h8
-rw-r--r--init/init_task.c1
-rw-r--r--kernel/fork.c43
-rw-r--r--kernel/signal.c15
4 files changed, 49 insertions, 18 deletions
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ae2b0b81be25..4e9b77fb702d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -69,6 +69,11 @@ struct thread_group_cputimer {
69 bool checking_timer; 69 bool checking_timer;
70}; 70};
71 71
72struct multiprocess_signals {
73 sigset_t signal;
74 struct hlist_node node;
75};
76
72/* 77/*
73 * NOTE! "signal_struct" does not have its own 78 * NOTE! "signal_struct" does not have its own
74 * locking, because a shared signal_struct always 79 * locking, because a shared signal_struct always
@@ -90,6 +95,9 @@ struct signal_struct {
90 /* shared signal handling: */ 95 /* shared signal handling: */
91 struct sigpending shared_pending; 96 struct sigpending shared_pending;
92 97
98 /* For collecting multiprocess signals during fork */
99 struct hlist_head multiprocess;
100
93 /* thread group exit support */ 101 /* thread group exit support */
94 int group_exit_code; 102 int group_exit_code;
95 /* overloaded: 103 /* overloaded:
diff --git a/init/init_task.c b/init/init_task.c
index 4f97846256d7..5aebe3be4d7c 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,6 +22,7 @@ static struct signal_struct init_signals = {
22 .list = LIST_HEAD_INIT(init_signals.shared_pending.list), 22 .list = LIST_HEAD_INIT(init_signals.shared_pending.list),
23 .signal = {{0}} 23 .signal = {{0}}
24 }, 24 },
25 .multiprocess = HLIST_HEAD_INIT,
25 .rlim = INIT_RLIMITS, 26 .rlim = INIT_RLIMITS,
26 .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), 27 .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
27#ifdef CONFIG_POSIX_TIMERS 28#ifdef CONFIG_POSIX_TIMERS
diff --git a/kernel/fork.c b/kernel/fork.c
index ab731e15a600..411e34acace7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1456,6 +1456,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1456 init_waitqueue_head(&sig->wait_chldexit); 1456 init_waitqueue_head(&sig->wait_chldexit);
1457 sig->curr_target = tsk; 1457 sig->curr_target = tsk;
1458 init_sigpending(&sig->shared_pending); 1458 init_sigpending(&sig->shared_pending);
1459 INIT_HLIST_HEAD(&sig->multiprocess);
1459 seqlock_init(&sig->stats_lock); 1460 seqlock_init(&sig->stats_lock);
1460 prev_cputime_init(&sig->prev_cputime); 1461 prev_cputime_init(&sig->prev_cputime);
1461 1462
@@ -1602,6 +1603,7 @@ static __latent_entropy struct task_struct *copy_process(
1602{ 1603{
1603 int retval; 1604 int retval;
1604 struct task_struct *p; 1605 struct task_struct *p;
1606 struct multiprocess_signals delayed;
1605 1607
1606 /* 1608 /*
1607 * Don't allow sharing the root directory with processes in a different 1609 * Don't allow sharing the root directory with processes in a different
@@ -1649,6 +1651,24 @@ static __latent_entropy struct task_struct *copy_process(
1649 return ERR_PTR(-EINVAL); 1651 return ERR_PTR(-EINVAL);
1650 } 1652 }
1651 1653
1654 /*
1655 * Force any signals received before this point to be delivered
1656 * before the fork happens. Collect up signals sent to multiple
1657 * processes that happen during the fork and delay them so that
1658 * they appear to happen after the fork.
1659 */
1660 sigemptyset(&delayed.signal);
1661 INIT_HLIST_NODE(&delayed.node);
1662
1663 spin_lock_irq(&current->sighand->siglock);
1664 if (!(clone_flags & CLONE_THREAD))
1665 hlist_add_head(&delayed.node, &current->signal->multiprocess);
1666 recalc_sigpending();
1667 spin_unlock_irq(&current->sighand->siglock);
1668 retval = -ERESTARTNOINTR;
1669 if (signal_pending(current))
1670 goto fork_out;
1671
1652 retval = -ENOMEM; 1672 retval = -ENOMEM;
1653 p = dup_task_struct(current, node); 1673 p = dup_task_struct(current, node);
1654 if (!p) 1674 if (!p)
@@ -1934,22 +1954,6 @@ static __latent_entropy struct task_struct *copy_process(
1934 goto bad_fork_cancel_cgroup; 1954 goto bad_fork_cancel_cgroup;
1935 } 1955 }
1936 1956
1937 if (!(clone_flags & CLONE_THREAD)) {
1938 /*
1939 * Process group and session signals need to be delivered to just the
1940 * parent before the fork or both the parent and the child after the
1941 * fork. Restart if a signal comes in before we add the new process to
1942 * it's process group.
1943 * A fatal signal pending means that current will exit, so the new
1944 * thread can't slip out of an OOM kill (or normal SIGKILL).
1945 */
1946 recalc_sigpending();
1947 if (signal_pending(current)) {
1948 retval = -ERESTARTNOINTR;
1949 goto bad_fork_cancel_cgroup;
1950 }
1951 }
1952
1953 1957
1954 init_task_pid_links(p); 1958 init_task_pid_links(p);
1955 if (likely(p->pid)) { 1959 if (likely(p->pid)) {
@@ -1965,7 +1969,7 @@ static __latent_entropy struct task_struct *copy_process(
1965 ns_of_pid(pid)->child_reaper = p; 1969 ns_of_pid(pid)->child_reaper = p;
1966 p->signal->flags |= SIGNAL_UNKILLABLE; 1970 p->signal->flags |= SIGNAL_UNKILLABLE;
1967 } 1971 }
1968 1972 p->signal->shared_pending.signal = delayed.signal;
1969 p->signal->tty = tty_kref_get(current->signal->tty); 1973 p->signal->tty = tty_kref_get(current->signal->tty);
1970 /* 1974 /*
1971 * Inherit has_child_subreaper flag under the same 1975 * Inherit has_child_subreaper flag under the same
@@ -1993,8 +1997,8 @@ static __latent_entropy struct task_struct *copy_process(
1993 attach_pid(p, PIDTYPE_PID); 1997 attach_pid(p, PIDTYPE_PID);
1994 nr_threads++; 1998 nr_threads++;
1995 } 1999 }
1996
1997 total_forks++; 2000 total_forks++;
2001 hlist_del_init(&delayed.node);
1998 spin_unlock(&current->sighand->siglock); 2002 spin_unlock(&current->sighand->siglock);
1999 syscall_tracepoint_update(p); 2003 syscall_tracepoint_update(p);
2000 write_unlock_irq(&tasklist_lock); 2004 write_unlock_irq(&tasklist_lock);
@@ -2059,6 +2063,9 @@ bad_fork_free:
2059 put_task_stack(p); 2063 put_task_stack(p);
2060 free_task(p); 2064 free_task(p);
2061fork_out: 2065fork_out:
2066 spin_lock_irq(&current->sighand->siglock);
2067 hlist_del_init(&delayed.node);
2068 spin_unlock_irq(&current->sighand->siglock);
2062 return ERR_PTR(retval); 2069 return ERR_PTR(retval);
2063} 2070}
2064 2071
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f0eafb6d474..cfa9d10e731a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1121,6 +1121,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1121out_set: 1121out_set:
1122 signalfd_notify(t, sig); 1122 signalfd_notify(t, sig);
1123 sigaddset(&pending->signal, sig); 1123 sigaddset(&pending->signal, sig);
1124
1125 /* Let multiprocess signals appear after on-going forks */
1126 if (type > PIDTYPE_TGID) {
1127 struct multiprocess_signals *delayed;
1128 hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
1129 sigset_t *signal = &delayed->signal;
1130 /* Can't queue both a stop and a continue signal */
1131 if (sig == SIGCONT)
1132 sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
1133 else if (sig_kernel_stop(sig))
1134 sigdelset(signal, SIGCONT);
1135 sigaddset(signal, sig);
1136 }
1137 }
1138
1124 complete_signal(sig, t, type); 1139 complete_signal(sig, t, type);
1125ret: 1140ret:
1126 trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result); 1141 trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);