aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c316
1 files changed, 162 insertions, 154 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3b95c6..628d41f0dd54 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,8 @@
48#include <linux/tracehook.h> 48#include <linux/tracehook.h>
49#include <linux/fs_struct.h> 49#include <linux/fs_struct.h>
50#include <linux/init_task.h> 50#include <linux/init_task.h>
51#include <trace/sched.h> 51#include <linux/perf_counter.h>
52#include <trace/events/sched.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -56,10 +57,6 @@
56#include <asm/mmu_context.h> 57#include <asm/mmu_context.h>
57#include "cred-internals.h" 58#include "cred-internals.h"
58 59
59DEFINE_TRACE(sched_process_free);
60DEFINE_TRACE(sched_process_exit);
61DEFINE_TRACE(sched_process_wait);
62
63static void exit_mm(struct task_struct * tsk); 60static void exit_mm(struct task_struct * tsk);
64 61
65static void __unhash_process(struct task_struct *p) 62static void __unhash_process(struct task_struct *p)
@@ -158,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
158{ 155{
159 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 156 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
160 157
158#ifdef CONFIG_PERF_COUNTERS
159 WARN_ON_ONCE(tsk->perf_counter_ctxp);
160#endif
161 trace_sched_process_free(tsk); 161 trace_sched_process_free(tsk);
162 put_task_struct(tsk); 162 put_task_struct(tsk);
163} 163}
@@ -174,6 +174,7 @@ repeat:
174 atomic_dec(&__task_cred(p)->user->processes); 174 atomic_dec(&__task_cred(p)->user->processes);
175 175
176 proc_flush_task(p); 176 proc_flush_task(p);
177
177 write_lock_irq(&tasklist_lock); 178 write_lock_irq(&tasklist_lock);
178 tracehook_finish_release_task(p); 179 tracehook_finish_release_task(p);
179 __exit_signal(p); 180 __exit_signal(p);
@@ -374,9 +375,8 @@ static void set_special_pids(struct pid *pid)
374} 375}
375 376
376/* 377/*
377 * Let kernel threads use this to say that they 378 * Let kernel threads use this to say that they allow a certain signal.
378 * allow a certain signal (since daemonize() will 379 * Must not be used if kthread was cloned with CLONE_SIGHAND.
379 * have disabled all of them by default).
380 */ 380 */
381int allow_signal(int sig) 381int allow_signal(int sig)
382{ 382{
@@ -384,14 +384,14 @@ int allow_signal(int sig)
384 return -EINVAL; 384 return -EINVAL;
385 385
386 spin_lock_irq(&current->sighand->siglock); 386 spin_lock_irq(&current->sighand->siglock);
387 /* This is only needed for daemonize()'ed kthreads */
387 sigdelset(&current->blocked, sig); 388 sigdelset(&current->blocked, sig);
388 if (!current->mm) { 389 /*
389 /* Kernel threads handle their own signals. 390 * Kernel threads handle their own signals. Let the signal code
390 Let the signal code know it'll be handled, so 391 * know it'll be handled, so that they don't get converted to
391 that they don't get converted to SIGKILL or 392 * SIGKILL or just silently dropped.
392 just silently dropped */ 393 */
393 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; 394 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
394 }
395 recalc_sigpending(); 395 recalc_sigpending();
396 spin_unlock_irq(&current->sighand->siglock); 396 spin_unlock_irq(&current->sighand->siglock);
397 return 0; 397 return 0;
@@ -590,7 +590,7 @@ retry:
590 /* 590 /*
591 * Search in the siblings 591 * Search in the siblings
592 */ 592 */
593 list_for_each_entry(c, &p->parent->children, sibling) { 593 list_for_each_entry(c, &p->real_parent->children, sibling) {
594 if (c->mm == mm) 594 if (c->mm == mm)
595 goto assign_new_owner; 595 goto assign_new_owner;
596 } 596 }
@@ -757,7 +757,7 @@ static void reparent_thread(struct task_struct *father, struct task_struct *p,
757 p->exit_signal = SIGCHLD; 757 p->exit_signal = SIGCHLD;
758 758
759 /* If it has exited notify the new parent about this child's death. */ 759 /* If it has exited notify the new parent about this child's death. */
760 if (!p->ptrace && 760 if (!task_ptrace(p) &&
761 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { 761 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
762 do_notify_parent(p, p->exit_signal); 762 do_notify_parent(p, p->exit_signal);
763 if (task_detached(p)) { 763 if (task_detached(p)) {
@@ -782,7 +782,7 @@ static void forget_original_parent(struct task_struct *father)
782 list_for_each_entry_safe(p, n, &father->children, sibling) { 782 list_for_each_entry_safe(p, n, &father->children, sibling) {
783 p->real_parent = reaper; 783 p->real_parent = reaper;
784 if (p->parent == father) { 784 if (p->parent == father) {
785 BUG_ON(p->ptrace); 785 BUG_ON(task_ptrace(p));
786 p->parent = p->real_parent; 786 p->parent = p->real_parent;
787 } 787 }
788 reparent_thread(father, p, &dead_children); 788 reparent_thread(father, p, &dead_children);
@@ -975,16 +975,19 @@ NORET_TYPE void do_exit(long code)
975 module_put(tsk->binfmt->module); 975 module_put(tsk->binfmt->module);
976 976
977 proc_exit_connector(tsk); 977 proc_exit_connector(tsk);
978
979 /*
980 * Flush inherited counters to the parent - before the parent
981 * gets woken up by child-exit notifications.
982 */
983 perf_counter_exit_task(tsk);
984
978 exit_notify(tsk, group_dead); 985 exit_notify(tsk, group_dead);
979#ifdef CONFIG_NUMA 986#ifdef CONFIG_NUMA
980 mpol_put(tsk->mempolicy); 987 mpol_put(tsk->mempolicy);
981 tsk->mempolicy = NULL; 988 tsk->mempolicy = NULL;
982#endif 989#endif
983#ifdef CONFIG_FUTEX 990#ifdef CONFIG_FUTEX
984 /*
985 * This must happen late, after the PID is not
986 * hashed anymore:
987 */
988 if (unlikely(!list_empty(&tsk->pi_state_list))) 991 if (unlikely(!list_empty(&tsk->pi_state_list)))
989 exit_pi_state_list(tsk); 992 exit_pi_state_list(tsk);
990 if (unlikely(current->pi_state_cache)) 993 if (unlikely(current->pi_state_cache))
@@ -1077,6 +1080,18 @@ SYSCALL_DEFINE1(exit_group, int, error_code)
1077 return 0; 1080 return 0;
1078} 1081}
1079 1082
1083struct wait_opts {
1084 enum pid_type wo_type;
1085 int wo_flags;
1086 struct pid *wo_pid;
1087
1088 struct siginfo __user *wo_info;
1089 int __user *wo_stat;
1090 struct rusage __user *wo_rusage;
1091
1092 int notask_error;
1093};
1094
1080static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1095static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1081{ 1096{
1082 struct pid *pid = NULL; 1097 struct pid *pid = NULL;
@@ -1087,13 +1102,12 @@ static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1087 return pid; 1102 return pid;
1088} 1103}
1089 1104
1090static int eligible_child(enum pid_type type, struct pid *pid, int options, 1105static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1091 struct task_struct *p)
1092{ 1106{
1093 int err; 1107 int err;
1094 1108
1095 if (type < PIDTYPE_MAX) { 1109 if (wo->wo_type < PIDTYPE_MAX) {
1096 if (task_pid_type(p, type) != pid) 1110 if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
1097 return 0; 1111 return 0;
1098 } 1112 }
1099 1113
@@ -1102,8 +1116,8 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1102 * set; otherwise, wait for non-clone children *only*. (Note: 1116 * set; otherwise, wait for non-clone children *only*. (Note:
1103 * A "clone" child here is one that reports to its parent 1117 * A "clone" child here is one that reports to its parent
1104 * using a signal other than SIGCHLD.) */ 1118 * using a signal other than SIGCHLD.) */
1105 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) 1119 if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1106 && !(options & __WALL)) 1120 && !(wo->wo_flags & __WALL))
1107 return 0; 1121 return 0;
1108 1122
1109 err = security_task_wait(p); 1123 err = security_task_wait(p);
@@ -1113,14 +1127,15 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1113 return 1; 1127 return 1;
1114} 1128}
1115 1129
1116static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, 1130static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
1117 int why, int status, 1131 pid_t pid, uid_t uid, int why, int status)
1118 struct siginfo __user *infop,
1119 struct rusage __user *rusagep)
1120{ 1132{
1121 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1133 struct siginfo __user *infop;
1134 int retval = wo->wo_rusage
1135 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1122 1136
1123 put_task_struct(p); 1137 put_task_struct(p);
1138 infop = wo->wo_info;
1124 if (!retval) 1139 if (!retval)
1125 retval = put_user(SIGCHLD, &infop->si_signo); 1140 retval = put_user(SIGCHLD, &infop->si_signo);
1126 if (!retval) 1141 if (!retval)
@@ -1144,19 +1159,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1144 * the lock and this task is uninteresting. If we return nonzero, we have 1159 * the lock and this task is uninteresting. If we return nonzero, we have
1145 * released the lock and the system call should return. 1160 * released the lock and the system call should return.
1146 */ 1161 */
1147static int wait_task_zombie(struct task_struct *p, int options, 1162static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1148 struct siginfo __user *infop,
1149 int __user *stat_addr, struct rusage __user *ru)
1150{ 1163{
1151 unsigned long state; 1164 unsigned long state;
1152 int retval, status, traced; 1165 int retval, status, traced;
1153 pid_t pid = task_pid_vnr(p); 1166 pid_t pid = task_pid_vnr(p);
1154 uid_t uid = __task_cred(p)->uid; 1167 uid_t uid = __task_cred(p)->uid;
1168 struct siginfo __user *infop;
1155 1169
1156 if (!likely(options & WEXITED)) 1170 if (!likely(wo->wo_flags & WEXITED))
1157 return 0; 1171 return 0;
1158 1172
1159 if (unlikely(options & WNOWAIT)) { 1173 if (unlikely(wo->wo_flags & WNOWAIT)) {
1160 int exit_code = p->exit_code; 1174 int exit_code = p->exit_code;
1161 int why, status; 1175 int why, status;
1162 1176
@@ -1169,8 +1183,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1169 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; 1183 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1170 status = exit_code & 0x7f; 1184 status = exit_code & 0x7f;
1171 } 1185 }
1172 return wait_noreap_copyout(p, pid, uid, why, 1186 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1173 status, infop, ru);
1174 } 1187 }
1175 1188
1176 /* 1189 /*
@@ -1184,11 +1197,13 @@ static int wait_task_zombie(struct task_struct *p, int options,
1184 } 1197 }
1185 1198
1186 traced = ptrace_reparented(p); 1199 traced = ptrace_reparented(p);
1187 1200 /*
1188 if (likely(!traced)) { 1201 * It can be ptraced but not reparented, check
1202 * !task_detached() to filter out sub-threads.
1203 */
1204 if (likely(!traced) && likely(!task_detached(p))) {
1189 struct signal_struct *psig; 1205 struct signal_struct *psig;
1190 struct signal_struct *sig; 1206 struct signal_struct *sig;
1191 struct task_cputime cputime;
1192 1207
1193 /* 1208 /*
1194 * The resource counters for the group leader are in its 1209 * The resource counters for the group leader are in its
@@ -1201,26 +1216,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
1201 * p->signal fields, because they are only touched by 1216 * p->signal fields, because they are only touched by
1202 * __exit_signal, which runs with tasklist_lock 1217 * __exit_signal, which runs with tasklist_lock
1203 * write-locked anyway, and so is excluded here. We do 1218 * write-locked anyway, and so is excluded here. We do
1204 * need to protect the access to p->parent->signal fields, 1219 * need to protect the access to parent->signal fields,
1205 * as other threads in the parent group can be right 1220 * as other threads in the parent group can be right
1206 * here reaping other children at the same time. 1221 * here reaping other children at the same time.
1207 *
1208 * We use thread_group_cputime() to get times for the thread
1209 * group, which consolidates times for all threads in the
1210 * group including the group leader.
1211 */ 1222 */
1212 thread_group_cputime(p, &cputime); 1223 spin_lock_irq(&p->real_parent->sighand->siglock);
1213 spin_lock_irq(&p->parent->sighand->siglock); 1224 psig = p->real_parent->signal;
1214 psig = p->parent->signal;
1215 sig = p->signal; 1225 sig = p->signal;
1216 psig->cutime = 1226 psig->cutime =
1217 cputime_add(psig->cutime, 1227 cputime_add(psig->cutime,
1218 cputime_add(cputime.utime, 1228 cputime_add(p->utime,
1219 sig->cutime)); 1229 cputime_add(sig->utime,
1230 sig->cutime)));
1220 psig->cstime = 1231 psig->cstime =
1221 cputime_add(psig->cstime, 1232 cputime_add(psig->cstime,
1222 cputime_add(cputime.stime, 1233 cputime_add(p->stime,
1223 sig->cstime)); 1234 cputime_add(sig->stime,
1235 sig->cstime)));
1224 psig->cgtime = 1236 psig->cgtime =
1225 cputime_add(psig->cgtime, 1237 cputime_add(psig->cgtime,
1226 cputime_add(p->gtime, 1238 cputime_add(p->gtime,
@@ -1242,7 +1254,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1242 sig->oublock + sig->coublock; 1254 sig->oublock + sig->coublock;
1243 task_io_accounting_add(&psig->ioac, &p->ioac); 1255 task_io_accounting_add(&psig->ioac, &p->ioac);
1244 task_io_accounting_add(&psig->ioac, &sig->ioac); 1256 task_io_accounting_add(&psig->ioac, &sig->ioac);
1245 spin_unlock_irq(&p->parent->sighand->siglock); 1257 spin_unlock_irq(&p->real_parent->sighand->siglock);
1246 } 1258 }
1247 1259
1248 /* 1260 /*
@@ -1251,11 +1263,14 @@ static int wait_task_zombie(struct task_struct *p, int options,
1251 */ 1263 */
1252 read_unlock(&tasklist_lock); 1264 read_unlock(&tasklist_lock);
1253 1265
1254 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1266 retval = wo->wo_rusage
1267 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1255 status = (p->signal->flags & SIGNAL_GROUP_EXIT) 1268 status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1256 ? p->signal->group_exit_code : p->exit_code; 1269 ? p->signal->group_exit_code : p->exit_code;
1257 if (!retval && stat_addr) 1270 if (!retval && wo->wo_stat)
1258 retval = put_user(status, stat_addr); 1271 retval = put_user(status, wo->wo_stat);
1272
1273 infop = wo->wo_info;
1259 if (!retval && infop) 1274 if (!retval && infop)
1260 retval = put_user(SIGCHLD, &infop->si_signo); 1275 retval = put_user(SIGCHLD, &infop->si_signo);
1261 if (!retval && infop) 1276 if (!retval && infop)
@@ -1323,15 +1338,18 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
1323 * the lock and this task is uninteresting. If we return nonzero, we have 1338 * the lock and this task is uninteresting. If we return nonzero, we have
1324 * released the lock and the system call should return. 1339 * released the lock and the system call should return.
1325 */ 1340 */
1326static int wait_task_stopped(int ptrace, struct task_struct *p, 1341static int wait_task_stopped(struct wait_opts *wo,
1327 int options, struct siginfo __user *infop, 1342 int ptrace, struct task_struct *p)
1328 int __user *stat_addr, struct rusage __user *ru)
1329{ 1343{
1344 struct siginfo __user *infop;
1330 int retval, exit_code, *p_code, why; 1345 int retval, exit_code, *p_code, why;
1331 uid_t uid = 0; /* unneeded, required by compiler */ 1346 uid_t uid = 0; /* unneeded, required by compiler */
1332 pid_t pid; 1347 pid_t pid;
1333 1348
1334 if (!(options & WUNTRACED)) 1349 /*
1350 * Traditionally we see ptrace'd stopped tasks regardless of options.
1351 */
1352 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1335 return 0; 1353 return 0;
1336 1354
1337 exit_code = 0; 1355 exit_code = 0;
@@ -1345,7 +1363,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p,
1345 if (!exit_code) 1363 if (!exit_code)
1346 goto unlock_sig; 1364 goto unlock_sig;
1347 1365
1348 if (!unlikely(options & WNOWAIT)) 1366 if (!unlikely(wo->wo_flags & WNOWAIT))
1349 *p_code = 0; 1367 *p_code = 0;
1350 1368
1351 /* don't need the RCU readlock here as we're holding a spinlock */ 1369 /* don't need the RCU readlock here as we're holding a spinlock */
@@ -1367,14 +1385,15 @@ unlock_sig:
1367 why = ptrace ? CLD_TRAPPED : CLD_STOPPED; 1385 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1368 read_unlock(&tasklist_lock); 1386 read_unlock(&tasklist_lock);
1369 1387
1370 if (unlikely(options & WNOWAIT)) 1388 if (unlikely(wo->wo_flags & WNOWAIT))
1371 return wait_noreap_copyout(p, pid, uid, 1389 return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
1372 why, exit_code, 1390
1373 infop, ru); 1391 retval = wo->wo_rusage
1392 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1393 if (!retval && wo->wo_stat)
1394 retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
1374 1395
1375 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1396 infop = wo->wo_info;
1376 if (!retval && stat_addr)
1377 retval = put_user((exit_code << 8) | 0x7f, stat_addr);
1378 if (!retval && infop) 1397 if (!retval && infop)
1379 retval = put_user(SIGCHLD, &infop->si_signo); 1398 retval = put_user(SIGCHLD, &infop->si_signo);
1380 if (!retval && infop) 1399 if (!retval && infop)
@@ -1401,15 +1420,13 @@ unlock_sig:
1401 * the lock and this task is uninteresting. If we return nonzero, we have 1420 * the lock and this task is uninteresting. If we return nonzero, we have
1402 * released the lock and the system call should return. 1421 * released the lock and the system call should return.
1403 */ 1422 */
1404static int wait_task_continued(struct task_struct *p, int options, 1423static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1405 struct siginfo __user *infop,
1406 int __user *stat_addr, struct rusage __user *ru)
1407{ 1424{
1408 int retval; 1425 int retval;
1409 pid_t pid; 1426 pid_t pid;
1410 uid_t uid; 1427 uid_t uid;
1411 1428
1412 if (!unlikely(options & WCONTINUED)) 1429 if (!unlikely(wo->wo_flags & WCONTINUED))
1413 return 0; 1430 return 0;
1414 1431
1415 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1432 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
@@ -1421,7 +1438,7 @@ static int wait_task_continued(struct task_struct *p, int options,
1421 spin_unlock_irq(&p->sighand->siglock); 1438 spin_unlock_irq(&p->sighand->siglock);
1422 return 0; 1439 return 0;
1423 } 1440 }
1424 if (!unlikely(options & WNOWAIT)) 1441 if (!unlikely(wo->wo_flags & WNOWAIT))
1425 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1442 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1426 uid = __task_cred(p)->uid; 1443 uid = __task_cred(p)->uid;
1427 spin_unlock_irq(&p->sighand->siglock); 1444 spin_unlock_irq(&p->sighand->siglock);
@@ -1430,17 +1447,17 @@ static int wait_task_continued(struct task_struct *p, int options,
1430 get_task_struct(p); 1447 get_task_struct(p);
1431 read_unlock(&tasklist_lock); 1448 read_unlock(&tasklist_lock);
1432 1449
1433 if (!infop) { 1450 if (!wo->wo_info) {
1434 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1451 retval = wo->wo_rusage
1452 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1435 put_task_struct(p); 1453 put_task_struct(p);
1436 if (!retval && stat_addr) 1454 if (!retval && wo->wo_stat)
1437 retval = put_user(0xffff, stat_addr); 1455 retval = put_user(0xffff, wo->wo_stat);
1438 if (!retval) 1456 if (!retval)
1439 retval = pid; 1457 retval = pid;
1440 } else { 1458 } else {
1441 retval = wait_noreap_copyout(p, pid, uid, 1459 retval = wait_noreap_copyout(wo, p, pid, uid,
1442 CLD_CONTINUED, SIGCONT, 1460 CLD_CONTINUED, SIGCONT);
1443 infop, ru);
1444 BUG_ON(retval == 0); 1461 BUG_ON(retval == 0);
1445 } 1462 }
1446 1463
@@ -1450,19 +1467,16 @@ static int wait_task_continued(struct task_struct *p, int options,
1450/* 1467/*
1451 * Consider @p for a wait by @parent. 1468 * Consider @p for a wait by @parent.
1452 * 1469 *
1453 * -ECHILD should be in *@notask_error before the first call. 1470 * -ECHILD should be in ->notask_error before the first call.
1454 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1471 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1455 * Returns zero if the search for a child should continue; 1472 * Returns zero if the search for a child should continue;
1456 * then *@notask_error is 0 if @p is an eligible child, 1473 * then ->notask_error is 0 if @p is an eligible child,
1457 * or another error from security_task_wait(), or still -ECHILD. 1474 * or another error from security_task_wait(), or still -ECHILD.
1458 */ 1475 */
1459static int wait_consider_task(struct task_struct *parent, int ptrace, 1476static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
1460 struct task_struct *p, int *notask_error, 1477 int ptrace, struct task_struct *p)
1461 enum pid_type type, struct pid *pid, int options,
1462 struct siginfo __user *infop,
1463 int __user *stat_addr, struct rusage __user *ru)
1464{ 1478{
1465 int ret = eligible_child(type, pid, options, p); 1479 int ret = eligible_child(wo, p);
1466 if (!ret) 1480 if (!ret)
1467 return ret; 1481 return ret;
1468 1482
@@ -1474,16 +1488,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1474 * to look for security policy problems, rather 1488 * to look for security policy problems, rather
1475 * than for mysterious wait bugs. 1489 * than for mysterious wait bugs.
1476 */ 1490 */
1477 if (*notask_error) 1491 if (wo->notask_error)
1478 *notask_error = ret; 1492 wo->notask_error = ret;
1493 return 0;
1479 } 1494 }
1480 1495
1481 if (likely(!ptrace) && unlikely(p->ptrace)) { 1496 if (likely(!ptrace) && unlikely(task_ptrace(p))) {
1482 /* 1497 /*
1483 * This child is hidden by ptrace. 1498 * This child is hidden by ptrace.
1484 * We aren't allowed to see it now, but eventually we will. 1499 * We aren't allowed to see it now, but eventually we will.
1485 */ 1500 */
1486 *notask_error = 0; 1501 wo->notask_error = 0;
1487 return 0; 1502 return 0;
1488 } 1503 }
1489 1504
@@ -1494,34 +1509,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1494 * We don't reap group leaders with subthreads. 1509 * We don't reap group leaders with subthreads.
1495 */ 1510 */
1496 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) 1511 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
1497 return wait_task_zombie(p, options, infop, stat_addr, ru); 1512 return wait_task_zombie(wo, p);
1498 1513
1499 /* 1514 /*
1500 * It's stopped or running now, so it might 1515 * It's stopped or running now, so it might
1501 * later continue, exit, or stop again. 1516 * later continue, exit, or stop again.
1502 */ 1517 */
1503 *notask_error = 0; 1518 wo->notask_error = 0;
1504 1519
1505 if (task_stopped_code(p, ptrace)) 1520 if (task_stopped_code(p, ptrace))
1506 return wait_task_stopped(ptrace, p, options, 1521 return wait_task_stopped(wo, ptrace, p);
1507 infop, stat_addr, ru);
1508 1522
1509 return wait_task_continued(p, options, infop, stat_addr, ru); 1523 return wait_task_continued(wo, p);
1510} 1524}
1511 1525
1512/* 1526/*
1513 * Do the work of do_wait() for one thread in the group, @tsk. 1527 * Do the work of do_wait() for one thread in the group, @tsk.
1514 * 1528 *
1515 * -ECHILD should be in *@notask_error before the first call. 1529 * -ECHILD should be in ->notask_error before the first call.
1516 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1530 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1517 * Returns zero if the search for a child should continue; then 1531 * Returns zero if the search for a child should continue; then
1518 * *@notask_error is 0 if there were any eligible children, 1532 * ->notask_error is 0 if there were any eligible children,
1519 * or another error from security_task_wait(), or still -ECHILD. 1533 * or another error from security_task_wait(), or still -ECHILD.
1520 */ 1534 */
1521static int do_wait_thread(struct task_struct *tsk, int *notask_error, 1535static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1522 enum pid_type type, struct pid *pid, int options,
1523 struct siginfo __user *infop, int __user *stat_addr,
1524 struct rusage __user *ru)
1525{ 1536{
1526 struct task_struct *p; 1537 struct task_struct *p;
1527 1538
@@ -1530,9 +1541,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1530 * Do not consider detached threads. 1541 * Do not consider detached threads.
1531 */ 1542 */
1532 if (!task_detached(p)) { 1543 if (!task_detached(p)) {
1533 int ret = wait_consider_task(tsk, 0, p, notask_error, 1544 int ret = wait_consider_task(wo, tsk, 0, p);
1534 type, pid, options,
1535 infop, stat_addr, ru);
1536 if (ret) 1545 if (ret)
1537 return ret; 1546 return ret;
1538 } 1547 }
@@ -1541,22 +1550,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1541 return 0; 1550 return 0;
1542} 1551}
1543 1552
1544static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, 1553static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1545 enum pid_type type, struct pid *pid, int options,
1546 struct siginfo __user *infop, int __user *stat_addr,
1547 struct rusage __user *ru)
1548{ 1554{
1549 struct task_struct *p; 1555 struct task_struct *p;
1550 1556
1551 /*
1552 * Traditionally we see ptrace'd stopped tasks regardless of options.
1553 */
1554 options |= WUNTRACED;
1555
1556 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1557 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1557 int ret = wait_consider_task(tsk, 1, p, notask_error, 1558 int ret = wait_consider_task(wo, tsk, 1, p);
1558 type, pid, options,
1559 infop, stat_addr, ru);
1560 if (ret) 1559 if (ret)
1561 return ret; 1560 return ret;
1562 } 1561 }
@@ -1564,65 +1563,59 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
1564 return 0; 1563 return 0;
1565} 1564}
1566 1565
1567static long do_wait(enum pid_type type, struct pid *pid, int options, 1566static long do_wait(struct wait_opts *wo)
1568 struct siginfo __user *infop, int __user *stat_addr,
1569 struct rusage __user *ru)
1570{ 1567{
1571 DECLARE_WAITQUEUE(wait, current); 1568 DECLARE_WAITQUEUE(wait, current);
1572 struct task_struct *tsk; 1569 struct task_struct *tsk;
1573 int retval; 1570 int retval;
1574 1571
1575 trace_sched_process_wait(pid); 1572 trace_sched_process_wait(wo->wo_pid);
1576 1573
1577 add_wait_queue(&current->signal->wait_chldexit,&wait); 1574 add_wait_queue(&current->signal->wait_chldexit,&wait);
1578repeat: 1575repeat:
1579 /* 1576 /*
1580 * If there is nothing that can match our critiera just get out. 1577 * If there is nothing that can match our critiera just get out.
1581 * We will clear @retval to zero if we see any child that might later 1578 * We will clear ->notask_error to zero if we see any child that
1582 * match our criteria, even if we are not able to reap it yet. 1579 * might later match our criteria, even if we are not able to reap
1580 * it yet.
1583 */ 1581 */
1584 retval = -ECHILD; 1582 wo->notask_error = -ECHILD;
1585 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) 1583 if ((wo->wo_type < PIDTYPE_MAX) &&
1586 goto end; 1584 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
1585 goto notask;
1587 1586
1588 current->state = TASK_INTERRUPTIBLE; 1587 set_current_state(TASK_INTERRUPTIBLE);
1589 read_lock(&tasklist_lock); 1588 read_lock(&tasklist_lock);
1590 tsk = current; 1589 tsk = current;
1591 do { 1590 do {
1592 int tsk_result = do_wait_thread(tsk, &retval, 1591 retval = do_wait_thread(wo, tsk);
1593 type, pid, options, 1592 if (retval)
1594 infop, stat_addr, ru);
1595 if (!tsk_result)
1596 tsk_result = ptrace_do_wait(tsk, &retval,
1597 type, pid, options,
1598 infop, stat_addr, ru);
1599 if (tsk_result) {
1600 /*
1601 * tasklist_lock is unlocked and we have a final result.
1602 */
1603 retval = tsk_result;
1604 goto end; 1593 goto end;
1605 }
1606 1594
1607 if (options & __WNOTHREAD) 1595 retval = ptrace_do_wait(wo, tsk);
1596 if (retval)
1597 goto end;
1598
1599 if (wo->wo_flags & __WNOTHREAD)
1608 break; 1600 break;
1609 tsk = next_thread(tsk); 1601 } while_each_thread(current, tsk);
1610 BUG_ON(tsk->signal != current->signal);
1611 } while (tsk != current);
1612 read_unlock(&tasklist_lock); 1602 read_unlock(&tasklist_lock);
1613 1603
1614 if (!retval && !(options & WNOHANG)) { 1604notask:
1605 retval = wo->notask_error;
1606 if (!retval && !(wo->wo_flags & WNOHANG)) {
1615 retval = -ERESTARTSYS; 1607 retval = -ERESTARTSYS;
1616 if (!signal_pending(current)) { 1608 if (!signal_pending(current)) {
1617 schedule(); 1609 schedule();
1618 goto repeat; 1610 goto repeat;
1619 } 1611 }
1620 } 1612 }
1621
1622end: 1613end:
1623 current->state = TASK_RUNNING; 1614 __set_current_state(TASK_RUNNING);
1624 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1615 remove_wait_queue(&current->signal->wait_chldexit,&wait);
1625 if (infop) { 1616 if (wo->wo_info) {
1617 struct siginfo __user *infop = wo->wo_info;
1618
1626 if (retval > 0) 1619 if (retval > 0)
1627 retval = 0; 1620 retval = 0;
1628 else { 1621 else {
@@ -1651,6 +1644,7 @@ end:
1651SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, 1644SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1652 infop, int, options, struct rusage __user *, ru) 1645 infop, int, options, struct rusage __user *, ru)
1653{ 1646{
1647 struct wait_opts wo;
1654 struct pid *pid = NULL; 1648 struct pid *pid = NULL;
1655 enum pid_type type; 1649 enum pid_type type;
1656 long ret; 1650 long ret;
@@ -1680,7 +1674,14 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1680 1674
1681 if (type < PIDTYPE_MAX) 1675 if (type < PIDTYPE_MAX)
1682 pid = find_get_pid(upid); 1676 pid = find_get_pid(upid);
1683 ret = do_wait(type, pid, options, infop, NULL, ru); 1677
1678 wo.wo_type = type;
1679 wo.wo_pid = pid;
1680 wo.wo_flags = options;
1681 wo.wo_info = infop;
1682 wo.wo_stat = NULL;
1683 wo.wo_rusage = ru;
1684 ret = do_wait(&wo);
1684 put_pid(pid); 1685 put_pid(pid);
1685 1686
1686 /* avoid REGPARM breakage on x86: */ 1687 /* avoid REGPARM breakage on x86: */
@@ -1691,6 +1692,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1691SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, 1692SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1692 int, options, struct rusage __user *, ru) 1693 int, options, struct rusage __user *, ru)
1693{ 1694{
1695 struct wait_opts wo;
1694 struct pid *pid = NULL; 1696 struct pid *pid = NULL;
1695 enum pid_type type; 1697 enum pid_type type;
1696 long ret; 1698 long ret;
@@ -1712,7 +1714,13 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1712 pid = find_get_pid(upid); 1714 pid = find_get_pid(upid);
1713 } 1715 }
1714 1716
1715 ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); 1717 wo.wo_type = type;
1718 wo.wo_pid = pid;
1719 wo.wo_flags = options | WEXITED;
1720 wo.wo_info = NULL;
1721 wo.wo_stat = stat_addr;
1722 wo.wo_rusage = ru;
1723 ret = do_wait(&wo);
1716 put_pid(pid); 1724 put_pid(pid);
1717 1725
1718 /* avoid REGPARM breakage on x86: */ 1726 /* avoid REGPARM breakage on x86: */