aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c317
1 files changed, 162 insertions, 155 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3b95c6..869dc221733e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h> 15#include <linux/iocontext.h>
17#include <linux/key.h> 16#include <linux/key.h>
18#include <linux/security.h> 17#include <linux/security.h>
@@ -48,7 +47,8 @@
48#include <linux/tracehook.h> 47#include <linux/tracehook.h>
49#include <linux/fs_struct.h> 48#include <linux/fs_struct.h>
50#include <linux/init_task.h> 49#include <linux/init_task.h>
51#include <trace/sched.h> 50#include <linux/perf_counter.h>
51#include <trace/events/sched.h>
52 52
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/unistd.h> 54#include <asm/unistd.h>
@@ -56,10 +56,6 @@
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include "cred-internals.h" 57#include "cred-internals.h"
58 58
59DEFINE_TRACE(sched_process_free);
60DEFINE_TRACE(sched_process_exit);
61DEFINE_TRACE(sched_process_wait);
62
63static void exit_mm(struct task_struct * tsk); 59static void exit_mm(struct task_struct * tsk);
64 60
65static void __unhash_process(struct task_struct *p) 61static void __unhash_process(struct task_struct *p)
@@ -158,6 +154,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
158{ 154{
159 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 155 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
160 156
157#ifdef CONFIG_PERF_COUNTERS
158 WARN_ON_ONCE(tsk->perf_counter_ctxp);
159#endif
161 trace_sched_process_free(tsk); 160 trace_sched_process_free(tsk);
162 put_task_struct(tsk); 161 put_task_struct(tsk);
163} 162}
@@ -174,6 +173,7 @@ repeat:
174 atomic_dec(&__task_cred(p)->user->processes); 173 atomic_dec(&__task_cred(p)->user->processes);
175 174
176 proc_flush_task(p); 175 proc_flush_task(p);
176
177 write_lock_irq(&tasklist_lock); 177 write_lock_irq(&tasklist_lock);
178 tracehook_finish_release_task(p); 178 tracehook_finish_release_task(p);
179 __exit_signal(p); 179 __exit_signal(p);
@@ -374,9 +374,8 @@ static void set_special_pids(struct pid *pid)
374} 374}
375 375
376/* 376/*
377 * Let kernel threads use this to say that they 377 * Let kernel threads use this to say that they allow a certain signal.
378 * allow a certain signal (since daemonize() will 378 * Must not be used if kthread was cloned with CLONE_SIGHAND.
379 * have disabled all of them by default).
380 */ 379 */
381int allow_signal(int sig) 380int allow_signal(int sig)
382{ 381{
@@ -384,14 +383,14 @@ int allow_signal(int sig)
384 return -EINVAL; 383 return -EINVAL;
385 384
386 spin_lock_irq(&current->sighand->siglock); 385 spin_lock_irq(&current->sighand->siglock);
386 /* This is only needed for daemonize()'ed kthreads */
387 sigdelset(&current->blocked, sig); 387 sigdelset(&current->blocked, sig);
388 if (!current->mm) { 388 /*
389 /* Kernel threads handle their own signals. 389 * Kernel threads handle their own signals. Let the signal code
390 Let the signal code know it'll be handled, so 390 * know it'll be handled, so that they don't get converted to
391 that they don't get converted to SIGKILL or 391 * SIGKILL or just silently dropped.
392 just silently dropped */ 392 */
393 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; 393 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
394 }
395 recalc_sigpending(); 394 recalc_sigpending();
396 spin_unlock_irq(&current->sighand->siglock); 395 spin_unlock_irq(&current->sighand->siglock);
397 return 0; 396 return 0;
@@ -590,7 +589,7 @@ retry:
590 /* 589 /*
591 * Search in the siblings 590 * Search in the siblings
592 */ 591 */
593 list_for_each_entry(c, &p->parent->children, sibling) { 592 list_for_each_entry(c, &p->real_parent->children, sibling) {
594 if (c->mm == mm) 593 if (c->mm == mm)
595 goto assign_new_owner; 594 goto assign_new_owner;
596 } 595 }
@@ -757,7 +756,7 @@ static void reparent_thread(struct task_struct *father, struct task_struct *p,
757 p->exit_signal = SIGCHLD; 756 p->exit_signal = SIGCHLD;
758 757
759 /* If it has exited notify the new parent about this child's death. */ 758 /* If it has exited notify the new parent about this child's death. */
760 if (!p->ptrace && 759 if (!task_ptrace(p) &&
761 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { 760 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
762 do_notify_parent(p, p->exit_signal); 761 do_notify_parent(p, p->exit_signal);
763 if (task_detached(p)) { 762 if (task_detached(p)) {
@@ -782,7 +781,7 @@ static void forget_original_parent(struct task_struct *father)
782 list_for_each_entry_safe(p, n, &father->children, sibling) { 781 list_for_each_entry_safe(p, n, &father->children, sibling) {
783 p->real_parent = reaper; 782 p->real_parent = reaper;
784 if (p->parent == father) { 783 if (p->parent == father) {
785 BUG_ON(p->ptrace); 784 BUG_ON(task_ptrace(p));
786 p->parent = p->real_parent; 785 p->parent = p->real_parent;
787 } 786 }
788 reparent_thread(father, p, &dead_children); 787 reparent_thread(father, p, &dead_children);
@@ -975,16 +974,19 @@ NORET_TYPE void do_exit(long code)
975 module_put(tsk->binfmt->module); 974 module_put(tsk->binfmt->module);
976 975
977 proc_exit_connector(tsk); 976 proc_exit_connector(tsk);
977
978 /*
979 * Flush inherited counters to the parent - before the parent
980 * gets woken up by child-exit notifications.
981 */
982 perf_counter_exit_task(tsk);
983
978 exit_notify(tsk, group_dead); 984 exit_notify(tsk, group_dead);
979#ifdef CONFIG_NUMA 985#ifdef CONFIG_NUMA
980 mpol_put(tsk->mempolicy); 986 mpol_put(tsk->mempolicy);
981 tsk->mempolicy = NULL; 987 tsk->mempolicy = NULL;
982#endif 988#endif
983#ifdef CONFIG_FUTEX 989#ifdef CONFIG_FUTEX
984 /*
985 * This must happen late, after the PID is not
986 * hashed anymore:
987 */
988 if (unlikely(!list_empty(&tsk->pi_state_list))) 990 if (unlikely(!list_empty(&tsk->pi_state_list)))
989 exit_pi_state_list(tsk); 991 exit_pi_state_list(tsk);
990 if (unlikely(current->pi_state_cache)) 992 if (unlikely(current->pi_state_cache))
@@ -1077,6 +1079,18 @@ SYSCALL_DEFINE1(exit_group, int, error_code)
1077 return 0; 1079 return 0;
1078} 1080}
1079 1081
1082struct wait_opts {
1083 enum pid_type wo_type;
1084 int wo_flags;
1085 struct pid *wo_pid;
1086
1087 struct siginfo __user *wo_info;
1088 int __user *wo_stat;
1089 struct rusage __user *wo_rusage;
1090
1091 int notask_error;
1092};
1093
1080static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1094static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1081{ 1095{
1082 struct pid *pid = NULL; 1096 struct pid *pid = NULL;
@@ -1087,13 +1101,12 @@ static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1087 return pid; 1101 return pid;
1088} 1102}
1089 1103
1090static int eligible_child(enum pid_type type, struct pid *pid, int options, 1104static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1091 struct task_struct *p)
1092{ 1105{
1093 int err; 1106 int err;
1094 1107
1095 if (type < PIDTYPE_MAX) { 1108 if (wo->wo_type < PIDTYPE_MAX) {
1096 if (task_pid_type(p, type) != pid) 1109 if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
1097 return 0; 1110 return 0;
1098 } 1111 }
1099 1112
@@ -1102,8 +1115,8 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1102 * set; otherwise, wait for non-clone children *only*. (Note: 1115 * set; otherwise, wait for non-clone children *only*. (Note:
1103 * A "clone" child here is one that reports to its parent 1116 * A "clone" child here is one that reports to its parent
1104 * using a signal other than SIGCHLD.) */ 1117 * using a signal other than SIGCHLD.) */
1105 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) 1118 if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1106 && !(options & __WALL)) 1119 && !(wo->wo_flags & __WALL))
1107 return 0; 1120 return 0;
1108 1121
1109 err = security_task_wait(p); 1122 err = security_task_wait(p);
@@ -1113,14 +1126,15 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1113 return 1; 1126 return 1;
1114} 1127}
1115 1128
1116static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, 1129static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
1117 int why, int status, 1130 pid_t pid, uid_t uid, int why, int status)
1118 struct siginfo __user *infop,
1119 struct rusage __user *rusagep)
1120{ 1131{
1121 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1132 struct siginfo __user *infop;
1133 int retval = wo->wo_rusage
1134 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1122 1135
1123 put_task_struct(p); 1136 put_task_struct(p);
1137 infop = wo->wo_info;
1124 if (!retval) 1138 if (!retval)
1125 retval = put_user(SIGCHLD, &infop->si_signo); 1139 retval = put_user(SIGCHLD, &infop->si_signo);
1126 if (!retval) 1140 if (!retval)
@@ -1144,19 +1158,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1144 * the lock and this task is uninteresting. If we return nonzero, we have 1158 * the lock and this task is uninteresting. If we return nonzero, we have
1145 * released the lock and the system call should return. 1159 * released the lock and the system call should return.
1146 */ 1160 */
1147static int wait_task_zombie(struct task_struct *p, int options, 1161static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1148 struct siginfo __user *infop,
1149 int __user *stat_addr, struct rusage __user *ru)
1150{ 1162{
1151 unsigned long state; 1163 unsigned long state;
1152 int retval, status, traced; 1164 int retval, status, traced;
1153 pid_t pid = task_pid_vnr(p); 1165 pid_t pid = task_pid_vnr(p);
1154 uid_t uid = __task_cred(p)->uid; 1166 uid_t uid = __task_cred(p)->uid;
1167 struct siginfo __user *infop;
1155 1168
1156 if (!likely(options & WEXITED)) 1169 if (!likely(wo->wo_flags & WEXITED))
1157 return 0; 1170 return 0;
1158 1171
1159 if (unlikely(options & WNOWAIT)) { 1172 if (unlikely(wo->wo_flags & WNOWAIT)) {
1160 int exit_code = p->exit_code; 1173 int exit_code = p->exit_code;
1161 int why, status; 1174 int why, status;
1162 1175
@@ -1169,8 +1182,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1169 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; 1182 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1170 status = exit_code & 0x7f; 1183 status = exit_code & 0x7f;
1171 } 1184 }
1172 return wait_noreap_copyout(p, pid, uid, why, 1185 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1173 status, infop, ru);
1174 } 1186 }
1175 1187
1176 /* 1188 /*
@@ -1184,11 +1196,13 @@ static int wait_task_zombie(struct task_struct *p, int options,
1184 } 1196 }
1185 1197
1186 traced = ptrace_reparented(p); 1198 traced = ptrace_reparented(p);
1187 1199 /*
1188 if (likely(!traced)) { 1200 * It can be ptraced but not reparented, check
1201 * !task_detached() to filter out sub-threads.
1202 */
1203 if (likely(!traced) && likely(!task_detached(p))) {
1189 struct signal_struct *psig; 1204 struct signal_struct *psig;
1190 struct signal_struct *sig; 1205 struct signal_struct *sig;
1191 struct task_cputime cputime;
1192 1206
1193 /* 1207 /*
1194 * The resource counters for the group leader are in its 1208 * The resource counters for the group leader are in its
@@ -1201,26 +1215,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
1201 * p->signal fields, because they are only touched by 1215 * p->signal fields, because they are only touched by
1202 * __exit_signal, which runs with tasklist_lock 1216 * __exit_signal, which runs with tasklist_lock
1203 * write-locked anyway, and so is excluded here. We do 1217 * write-locked anyway, and so is excluded here. We do
1204 * need to protect the access to p->parent->signal fields, 1218 * need to protect the access to parent->signal fields,
1205 * as other threads in the parent group can be right 1219 * as other threads in the parent group can be right
1206 * here reaping other children at the same time. 1220 * here reaping other children at the same time.
1207 *
1208 * We use thread_group_cputime() to get times for the thread
1209 * group, which consolidates times for all threads in the
1210 * group including the group leader.
1211 */ 1221 */
1212 thread_group_cputime(p, &cputime); 1222 spin_lock_irq(&p->real_parent->sighand->siglock);
1213 spin_lock_irq(&p->parent->sighand->siglock); 1223 psig = p->real_parent->signal;
1214 psig = p->parent->signal;
1215 sig = p->signal; 1224 sig = p->signal;
1216 psig->cutime = 1225 psig->cutime =
1217 cputime_add(psig->cutime, 1226 cputime_add(psig->cutime,
1218 cputime_add(cputime.utime, 1227 cputime_add(p->utime,
1219 sig->cutime)); 1228 cputime_add(sig->utime,
1229 sig->cutime)));
1220 psig->cstime = 1230 psig->cstime =
1221 cputime_add(psig->cstime, 1231 cputime_add(psig->cstime,
1222 cputime_add(cputime.stime, 1232 cputime_add(p->stime,
1223 sig->cstime)); 1233 cputime_add(sig->stime,
1234 sig->cstime)));
1224 psig->cgtime = 1235 psig->cgtime =
1225 cputime_add(psig->cgtime, 1236 cputime_add(psig->cgtime,
1226 cputime_add(p->gtime, 1237 cputime_add(p->gtime,
@@ -1242,7 +1253,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1242 sig->oublock + sig->coublock; 1253 sig->oublock + sig->coublock;
1243 task_io_accounting_add(&psig->ioac, &p->ioac); 1254 task_io_accounting_add(&psig->ioac, &p->ioac);
1244 task_io_accounting_add(&psig->ioac, &sig->ioac); 1255 task_io_accounting_add(&psig->ioac, &sig->ioac);
1245 spin_unlock_irq(&p->parent->sighand->siglock); 1256 spin_unlock_irq(&p->real_parent->sighand->siglock);
1246 } 1257 }
1247 1258
1248 /* 1259 /*
@@ -1251,11 +1262,14 @@ static int wait_task_zombie(struct task_struct *p, int options,
1251 */ 1262 */
1252 read_unlock(&tasklist_lock); 1263 read_unlock(&tasklist_lock);
1253 1264
1254 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1265 retval = wo->wo_rusage
1266 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1255 status = (p->signal->flags & SIGNAL_GROUP_EXIT) 1267 status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1256 ? p->signal->group_exit_code : p->exit_code; 1268 ? p->signal->group_exit_code : p->exit_code;
1257 if (!retval && stat_addr) 1269 if (!retval && wo->wo_stat)
1258 retval = put_user(status, stat_addr); 1270 retval = put_user(status, wo->wo_stat);
1271
1272 infop = wo->wo_info;
1259 if (!retval && infop) 1273 if (!retval && infop)
1260 retval = put_user(SIGCHLD, &infop->si_signo); 1274 retval = put_user(SIGCHLD, &infop->si_signo);
1261 if (!retval && infop) 1275 if (!retval && infop)
@@ -1323,15 +1337,18 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
1323 * the lock and this task is uninteresting. If we return nonzero, we have 1337 * the lock and this task is uninteresting. If we return nonzero, we have
1324 * released the lock and the system call should return. 1338 * released the lock and the system call should return.
1325 */ 1339 */
1326static int wait_task_stopped(int ptrace, struct task_struct *p, 1340static int wait_task_stopped(struct wait_opts *wo,
1327 int options, struct siginfo __user *infop, 1341 int ptrace, struct task_struct *p)
1328 int __user *stat_addr, struct rusage __user *ru)
1329{ 1342{
1343 struct siginfo __user *infop;
1330 int retval, exit_code, *p_code, why; 1344 int retval, exit_code, *p_code, why;
1331 uid_t uid = 0; /* unneeded, required by compiler */ 1345 uid_t uid = 0; /* unneeded, required by compiler */
1332 pid_t pid; 1346 pid_t pid;
1333 1347
1334 if (!(options & WUNTRACED)) 1348 /*
1349 * Traditionally we see ptrace'd stopped tasks regardless of options.
1350 */
1351 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1335 return 0; 1352 return 0;
1336 1353
1337 exit_code = 0; 1354 exit_code = 0;
@@ -1345,7 +1362,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
1345 if (!exit_code) 1362 if (!exit_code)
1346 goto unlock_sig; 1363 goto unlock_sig;
1347 1364
1348 if (!unlikely(options & WNOWAIT)) 1365 if (!unlikely(wo->wo_flags & WNOWAIT))
1349 *p_code = 0; 1366 *p_code = 0;
1350 1367
1351 /* don't need the RCU readlock here as we're holding a spinlock */ 1368 /* don't need the RCU readlock here as we're holding a spinlock */
@@ -1367,14 +1384,15 @@ unlock_sig:
1367 why = ptrace ? CLD_TRAPPED : CLD_STOPPED; 1384 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1368 read_unlock(&tasklist_lock); 1385 read_unlock(&tasklist_lock);
1369 1386
1370 if (unlikely(options & WNOWAIT)) 1387 if (unlikely(wo->wo_flags & WNOWAIT))
1371 return wait_noreap_copyout(p, pid, uid, 1388 return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
1372 why, exit_code, 1389
1373 infop, ru); 1390 retval = wo->wo_rusage
1391 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1392 if (!retval && wo->wo_stat)
1393 retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
1374 1394
1375 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1395 infop = wo->wo_info;
1376 if (!retval && stat_addr)
1377 retval = put_user((exit_code << 8) | 0x7f, stat_addr);
1378 if (!retval && infop) 1396 if (!retval && infop)
1379 retval = put_user(SIGCHLD, &infop->si_signo); 1397 retval = put_user(SIGCHLD, &infop->si_signo);
1380 if (!retval && infop) 1398 if (!retval && infop)
@@ -1401,15 +1419,13 @@ unlock_sig:
1401 * the lock and this task is uninteresting. If we return nonzero, we have 1419 * the lock and this task is uninteresting. If we return nonzero, we have
1402 * released the lock and the system call should return. 1420 * released the lock and the system call should return.
1403 */ 1421 */
1404static int wait_task_continued(struct task_struct *p, int options, 1422static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1405 struct siginfo __user *infop,
1406 int __user *stat_addr, struct rusage __user *ru)
1407{ 1423{
1408 int retval; 1424 int retval;
1409 pid_t pid; 1425 pid_t pid;
1410 uid_t uid; 1426 uid_t uid;
1411 1427
1412 if (!unlikely(options & WCONTINUED)) 1428 if (!unlikely(wo->wo_flags & WCONTINUED))
1413 return 0; 1429 return 0;
1414 1430
1415 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1431 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
@@ -1421,7 +1437,7 @@ static int wait_task_continued(struct task_struct *p, int options,
1421 spin_unlock_irq(&p->sighand->siglock); 1437 spin_unlock_irq(&p->sighand->siglock);
1422 return 0; 1438 return 0;
1423 } 1439 }
1424 if (!unlikely(options & WNOWAIT)) 1440 if (!unlikely(wo->wo_flags & WNOWAIT))
1425 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1441 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1426 uid = __task_cred(p)->uid; 1442 uid = __task_cred(p)->uid;
1427 spin_unlock_irq(&p->sighand->siglock); 1443 spin_unlock_irq(&p->sighand->siglock);
@@ -1430,17 +1446,17 @@ static int wait_task_continued(struct task_struct *p, int options,
1430 get_task_struct(p); 1446 get_task_struct(p);
1431 read_unlock(&tasklist_lock); 1447 read_unlock(&tasklist_lock);
1432 1448
1433 if (!infop) { 1449 if (!wo->wo_info) {
1434 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1450 retval = wo->wo_rusage
1451 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1435 put_task_struct(p); 1452 put_task_struct(p);
1436 if (!retval && stat_addr) 1453 if (!retval && wo->wo_stat)
1437 retval = put_user(0xffff, stat_addr); 1454 retval = put_user(0xffff, wo->wo_stat);
1438 if (!retval) 1455 if (!retval)
1439 retval = pid; 1456 retval = pid;
1440 } else { 1457 } else {
1441 retval = wait_noreap_copyout(p, pid, uid, 1458 retval = wait_noreap_copyout(wo, p, pid, uid,
1442 CLD_CONTINUED, SIGCONT, 1459 CLD_CONTINUED, SIGCONT);
1443 infop, ru);
1444 BUG_ON(retval == 0); 1460 BUG_ON(retval == 0);
1445 } 1461 }
1446 1462
@@ -1450,19 +1466,16 @@ static int wait_task_continued(struct task_struct *p, int options,
1450/* 1466/*
1451 * Consider @p for a wait by @parent. 1467 * Consider @p for a wait by @parent.
1452 * 1468 *
1453 * -ECHILD should be in *@notask_error before the first call. 1469 * -ECHILD should be in ->notask_error before the first call.
1454 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1470 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1455 * Returns zero if the search for a child should continue; 1471 * Returns zero if the search for a child should continue;
1456 * then *@notask_error is 0 if @p is an eligible child, 1472 * then ->notask_error is 0 if @p is an eligible child,
1457 * or another error from security_task_wait(), or still -ECHILD. 1473 * or another error from security_task_wait(), or still -ECHILD.
1458 */ 1474 */
1459static int wait_consider_task(struct task_struct *parent, int ptrace, 1475static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
1460 struct task_struct *p, int *notask_error, 1476 int ptrace, struct task_struct *p)
1461 enum pid_type type, struct pid *pid, int options,
1462 struct siginfo __user *infop,
1463 int __user *stat_addr, struct rusage __user *ru)
1464{ 1477{
1465 int ret = eligible_child(type, pid, options, p); 1478 int ret = eligible_child(wo, p);
1466 if (!ret) 1479 if (!ret)
1467 return ret; 1480 return ret;
1468 1481
@@ -1474,16 +1487,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1474 * to look for security policy problems, rather 1487 * to look for security policy problems, rather
1475 * than for mysterious wait bugs. 1488 * than for mysterious wait bugs.
1476 */ 1489 */
1477 if (*notask_error) 1490 if (wo->notask_error)
1478 *notask_error = ret; 1491 wo->notask_error = ret;
1492 return 0;
1479 } 1493 }
1480 1494
1481 if (likely(!ptrace) && unlikely(p->ptrace)) { 1495 if (likely(!ptrace) && unlikely(task_ptrace(p))) {
1482 /* 1496 /*
1483 * This child is hidden by ptrace. 1497 * This child is hidden by ptrace.
1484 * We aren't allowed to see it now, but eventually we will. 1498 * We aren't allowed to see it now, but eventually we will.
1485 */ 1499 */
1486 *notask_error = 0; 1500 wo->notask_error = 0;
1487 return 0; 1501 return 0;
1488 } 1502 }
1489 1503
@@ -1494,34 +1508,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1494 * We don't reap group leaders with subthreads. 1508 * We don't reap group leaders with subthreads.
1495 */ 1509 */
1496 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) 1510 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
1497 return wait_task_zombie(p, options, infop, stat_addr, ru); 1511 return wait_task_zombie(wo, p);
1498 1512
1499 /* 1513 /*
1500 * It's stopped or running now, so it might 1514 * It's stopped or running now, so it might
1501 * later continue, exit, or stop again. 1515 * later continue, exit, or stop again.
1502 */ 1516 */
1503 *notask_error = 0; 1517 wo->notask_error = 0;
1504 1518
1505 if (task_stopped_code(p, ptrace)) 1519 if (task_stopped_code(p, ptrace))
1506 return wait_task_stopped(ptrace, p, options, 1520 return wait_task_stopped(wo, ptrace, p);
1507 infop, stat_addr, ru);
1508 1521
1509 return wait_task_continued(p, options, infop, stat_addr, ru); 1522 return wait_task_continued(wo, p);
1510} 1523}
1511 1524
1512/* 1525/*
1513 * Do the work of do_wait() for one thread in the group, @tsk. 1526 * Do the work of do_wait() for one thread in the group, @tsk.
1514 * 1527 *
1515 * -ECHILD should be in *@notask_error before the first call. 1528 * -ECHILD should be in ->notask_error before the first call.
1516 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1529 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1517 * Returns zero if the search for a child should continue; then 1530 * Returns zero if the search for a child should continue; then
1518 * *@notask_error is 0 if there were any eligible children, 1531 * ->notask_error is 0 if there were any eligible children,
1519 * or another error from security_task_wait(), or still -ECHILD. 1532 * or another error from security_task_wait(), or still -ECHILD.
1520 */ 1533 */
1521static int do_wait_thread(struct task_struct *tsk, int *notask_error, 1534static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1522 enum pid_type type, struct pid *pid, int options,
1523 struct siginfo __user *infop, int __user *stat_addr,
1524 struct rusage __user *ru)
1525{ 1535{
1526 struct task_struct *p; 1536 struct task_struct *p;
1527 1537
@@ -1530,9 +1540,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1530 * Do not consider detached threads. 1540 * Do not consider detached threads.
1531 */ 1541 */
1532 if (!task_detached(p)) { 1542 if (!task_detached(p)) {
1533 int ret = wait_consider_task(tsk, 0, p, notask_error, 1543 int ret = wait_consider_task(wo, tsk, 0, p);
1534 type, pid, options,
1535 infop, stat_addr, ru);
1536 if (ret) 1544 if (ret)
1537 return ret; 1545 return ret;
1538 } 1546 }
@@ -1541,22 +1549,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1541 return 0; 1549 return 0;
1542} 1550}
1543 1551
1544static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, 1552static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1545 enum pid_type type, struct pid *pid, int options,
1546 struct siginfo __user *infop, int __user *stat_addr,
1547 struct rusage __user *ru)
1548{ 1553{
1549 struct task_struct *p; 1554 struct task_struct *p;
1550 1555
1551 /*
1552 * Traditionally we see ptrace'd stopped tasks regardless of options.
1553 */
1554 options |= WUNTRACED;
1555
1556 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1556 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1557 int ret = wait_consider_task(tsk, 1, p, notask_error, 1557 int ret = wait_consider_task(wo, tsk, 1, p);
1558 type, pid, options,
1559 infop, stat_addr, ru);
1560 if (ret) 1558 if (ret)
1561 return ret; 1559 return ret;
1562 } 1560 }
@@ -1564,65 +1562,59 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
1564 return 0; 1562 return 0;
1565} 1563}
1566 1564
1567static long do_wait(enum pid_type type, struct pid *pid, int options, 1565static long do_wait(struct wait_opts *wo)
1568 struct siginfo __user *infop, int __user *stat_addr,
1569 struct rusage __user *ru)
1570{ 1566{
1571 DECLARE_WAITQUEUE(wait, current); 1567 DECLARE_WAITQUEUE(wait, current);
1572 struct task_struct *tsk; 1568 struct task_struct *tsk;
1573 int retval; 1569 int retval;
1574 1570
1575 trace_sched_process_wait(pid); 1571 trace_sched_process_wait(wo->wo_pid);
1576 1572
1577 add_wait_queue(&current->signal->wait_chldexit,&wait); 1573 add_wait_queue(&current->signal->wait_chldexit,&wait);
1578repeat: 1574repeat:
1579 /* 1575 /*
1580 * If there is nothing that can match our critiera just get out. 1576 * If there is nothing that can match our critiera just get out.
1581 * We will clear @retval to zero if we see any child that might later 1577 * We will clear ->notask_error to zero if we see any child that
1582 * match our criteria, even if we are not able to reap it yet. 1578 * might later match our criteria, even if we are not able to reap
1579 * it yet.
1583 */ 1580 */
1584 retval = -ECHILD; 1581 wo->notask_error = -ECHILD;
1585 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) 1582 if ((wo->wo_type < PIDTYPE_MAX) &&
1586 goto end; 1583 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
1584 goto notask;
1587 1585
1588 current->state = TASK_INTERRUPTIBLE; 1586 set_current_state(TASK_INTERRUPTIBLE);
1589 read_lock(&tasklist_lock); 1587 read_lock(&tasklist_lock);
1590 tsk = current; 1588 tsk = current;
1591 do { 1589 do {
1592 int tsk_result = do_wait_thread(tsk, &retval, 1590 retval = do_wait_thread(wo, tsk);
1593 type, pid, options, 1591 if (retval)
1594 infop, stat_addr, ru);
1595 if (!tsk_result)
1596 tsk_result = ptrace_do_wait(tsk, &retval,
1597 type, pid, options,
1598 infop, stat_addr, ru);
1599 if (tsk_result) {
1600 /*
1601 * tasklist_lock is unlocked and we have a final result.
1602 */
1603 retval = tsk_result;
1604 goto end; 1592 goto end;
1605 }
1606 1593
1607 if (options & __WNOTHREAD) 1594 retval = ptrace_do_wait(wo, tsk);
1595 if (retval)
1596 goto end;
1597
1598 if (wo->wo_flags & __WNOTHREAD)
1608 break; 1599 break;
1609 tsk = next_thread(tsk); 1600 } while_each_thread(current, tsk);
1610 BUG_ON(tsk->signal != current->signal);
1611 } while (tsk != current);
1612 read_unlock(&tasklist_lock); 1601 read_unlock(&tasklist_lock);
1613 1602
1614 if (!retval && !(options & WNOHANG)) { 1603notask:
1604 retval = wo->notask_error;
1605 if (!retval && !(wo->wo_flags & WNOHANG)) {
1615 retval = -ERESTARTSYS; 1606 retval = -ERESTARTSYS;
1616 if (!signal_pending(current)) { 1607 if (!signal_pending(current)) {
1617 schedule(); 1608 schedule();
1618 goto repeat; 1609 goto repeat;
1619 } 1610 }
1620 } 1611 }
1621
1622end: 1612end:
1623 current->state = TASK_RUNNING; 1613 __set_current_state(TASK_RUNNING);
1624 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1614 remove_wait_queue(&current->signal->wait_chldexit,&wait);
1625 if (infop) { 1615 if (wo->wo_info) {
1616 struct siginfo __user *infop = wo->wo_info;
1617
1626 if (retval > 0) 1618 if (retval > 0)
1627 retval = 0; 1619 retval = 0;
1628 else { 1620 else {
@@ -1651,6 +1643,7 @@ end:
1651SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, 1643SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1652 infop, int, options, struct rusage __user *, ru) 1644 infop, int, options, struct rusage __user *, ru)
1653{ 1645{
1646 struct wait_opts wo;
1654 struct pid *pid = NULL; 1647 struct pid *pid = NULL;
1655 enum pid_type type; 1648 enum pid_type type;
1656 long ret; 1649 long ret;
@@ -1680,7 +1673,14 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1680 1673
1681 if (type < PIDTYPE_MAX) 1674 if (type < PIDTYPE_MAX)
1682 pid = find_get_pid(upid); 1675 pid = find_get_pid(upid);
1683 ret = do_wait(type, pid, options, infop, NULL, ru); 1676
1677 wo.wo_type = type;
1678 wo.wo_pid = pid;
1679 wo.wo_flags = options;
1680 wo.wo_info = infop;
1681 wo.wo_stat = NULL;
1682 wo.wo_rusage = ru;
1683 ret = do_wait(&wo);
1684 put_pid(pid); 1684 put_pid(pid);
1685 1685
1686 /* avoid REGPARM breakage on x86: */ 1686 /* avoid REGPARM breakage on x86: */
@@ -1691,6 +1691,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1691SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, 1691SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1692 int, options, struct rusage __user *, ru) 1692 int, options, struct rusage __user *, ru)
1693{ 1693{
1694 struct wait_opts wo;
1694 struct pid *pid = NULL; 1695 struct pid *pid = NULL;
1695 enum pid_type type; 1696 enum pid_type type;
1696 long ret; 1697 long ret;
@@ -1712,7 +1713,13 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1712 pid = find_get_pid(upid); 1713 pid = find_get_pid(upid);
1713 } 1714 }
1714 1715
1715 ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); 1716 wo.wo_type = type;
1717 wo.wo_pid = pid;
1718 wo.wo_flags = options | WEXITED;
1719 wo.wo_info = NULL;
1720 wo.wo_stat = stat_addr;
1721 wo.wo_rusage = ru;
1722 ret = do_wait(&wo);
1716 put_pid(pid); 1723 put_pid(pid);
1717 1724
1718 /* avoid REGPARM breakage on x86: */ 1725 /* avoid REGPARM breakage on x86: */