aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c711
1 files changed, 324 insertions, 387 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 167e1e3ad7c6..1143012951e9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h> 15#include <linux/iocontext.h>
17#include <linux/key.h> 16#include <linux/key.h>
18#include <linux/security.h> 17#include <linux/security.h>
@@ -46,8 +45,11 @@
46#include <linux/blkdev.h> 45#include <linux/blkdev.h>
47#include <linux/task_io_accounting_ops.h> 46#include <linux/task_io_accounting_ops.h>
48#include <linux/tracehook.h> 47#include <linux/tracehook.h>
48#include <linux/fs_struct.h>
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <trace/sched.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
51 53
52#include <asm/uaccess.h> 54#include <asm/uaccess.h>
53#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -55,17 +57,8 @@
55#include <asm/mmu_context.h> 57#include <asm/mmu_context.h>
56#include "cred-internals.h" 58#include "cred-internals.h"
57 59
58DEFINE_TRACE(sched_process_free);
59DEFINE_TRACE(sched_process_exit);
60DEFINE_TRACE(sched_process_wait);
61
62static void exit_mm(struct task_struct * tsk); 60static void exit_mm(struct task_struct * tsk);
63 61
64static inline int task_detached(struct task_struct *p)
65{
66 return p->exit_signal == -1;
67}
68
69static void __unhash_process(struct task_struct *p) 62static void __unhash_process(struct task_struct *p)
70{ 63{
71 nr_threads--; 64 nr_threads--;
@@ -118,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk)
118 * We won't ever get here for the group leader, since it 111 * We won't ever get here for the group leader, since it
119 * will have been the last reference on the signal_struct. 112 * will have been the last reference on the signal_struct.
120 */ 113 */
121 sig->utime = cputime_add(sig->utime, task_utime(tsk)); 114 sig->utime = cputime_add(sig->utime, tsk->utime);
122 sig->stime = cputime_add(sig->stime, task_stime(tsk)); 115 sig->stime = cputime_add(sig->stime, tsk->stime);
123 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 116 sig->gtime = cputime_add(sig->gtime, tsk->gtime);
124 sig->min_flt += tsk->min_flt; 117 sig->min_flt += tsk->min_flt;
125 sig->maj_flt += tsk->maj_flt; 118 sig->maj_flt += tsk->maj_flt;
126 sig->nvcsw += tsk->nvcsw; 119 sig->nvcsw += tsk->nvcsw;
@@ -162,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
162{ 155{
163 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 156 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
164 157
158#ifdef CONFIG_PERF_EVENTS
159 WARN_ON_ONCE(tsk->perf_event_ctxp);
160#endif
165 trace_sched_process_free(tsk); 161 trace_sched_process_free(tsk);
166 put_task_struct(tsk); 162 put_task_struct(tsk);
167} 163}
@@ -178,6 +174,7 @@ repeat:
178 atomic_dec(&__task_cred(p)->user->processes); 174 atomic_dec(&__task_cred(p)->user->processes);
179 175
180 proc_flush_task(p); 176 proc_flush_task(p);
177
181 write_lock_irq(&tasklist_lock); 178 write_lock_irq(&tasklist_lock);
182 tracehook_finish_release_task(p); 179 tracehook_finish_release_task(p);
183 __exit_signal(p); 180 __exit_signal(p);
@@ -362,16 +359,12 @@ static void reparent_to_kthreadd(void)
362void __set_special_pids(struct pid *pid) 359void __set_special_pids(struct pid *pid)
363{ 360{
364 struct task_struct *curr = current->group_leader; 361 struct task_struct *curr = current->group_leader;
365 pid_t nr = pid_nr(pid);
366 362
367 if (task_session(curr) != pid) { 363 if (task_session(curr) != pid)
368 change_pid(curr, PIDTYPE_SID, pid); 364 change_pid(curr, PIDTYPE_SID, pid);
369 set_task_session(curr, nr); 365
370 } 366 if (task_pgrp(curr) != pid)
371 if (task_pgrp(curr) != pid) {
372 change_pid(curr, PIDTYPE_PGID, pid); 367 change_pid(curr, PIDTYPE_PGID, pid);
373 set_task_pgrp(curr, nr);
374 }
375} 368}
376 369
377static void set_special_pids(struct pid *pid) 370static void set_special_pids(struct pid *pid)
@@ -382,9 +375,8 @@ static void set_special_pids(struct pid *pid)
382} 375}
383 376
384/* 377/*
385 * Let kernel threads use this to say that they 378 * Let kernel threads use this to say that they allow a certain signal.
386 * allow a certain signal (since daemonize() will 379 * Must not be used if kthread was cloned with CLONE_SIGHAND.
387 * have disabled all of them by default).
388 */ 380 */
389int allow_signal(int sig) 381int allow_signal(int sig)
390{ 382{
@@ -392,14 +384,14 @@ int allow_signal(int sig)
392 return -EINVAL; 384 return -EINVAL;
393 385
394 spin_lock_irq(&current->sighand->siglock); 386 spin_lock_irq(&current->sighand->siglock);
387 /* This is only needed for daemonize()'ed kthreads */
395 sigdelset(&current->blocked, sig); 388 sigdelset(&current->blocked, sig);
396 if (!current->mm) { 389 /*
397 /* Kernel threads handle their own signals. 390 * Kernel threads handle their own signals. Let the signal code
398 Let the signal code know it'll be handled, so 391 * know it'll be handled, so that they don't get converted to
399 that they don't get converted to SIGKILL or 392 * SIGKILL or just silently dropped.
400 just silently dropped */ 393 */
401 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; 394 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
402 }
403 recalc_sigpending(); 395 recalc_sigpending();
404 spin_unlock_irq(&current->sighand->siglock); 396 spin_unlock_irq(&current->sighand->siglock);
405 return 0; 397 return 0;
@@ -429,7 +421,6 @@ EXPORT_SYMBOL(disallow_signal);
429void daemonize(const char *name, ...) 421void daemonize(const char *name, ...)
430{ 422{
431 va_list args; 423 va_list args;
432 struct fs_struct *fs;
433 sigset_t blocked; 424 sigset_t blocked;
434 425
435 va_start(args, name); 426 va_start(args, name);
@@ -462,11 +453,7 @@ void daemonize(const char *name, ...)
462 453
463 /* Become as one with the init task */ 454 /* Become as one with the init task */
464 455
465 exit_fs(current); /* current->fs->count--; */ 456 daemonize_fs_struct();
466 fs = init_task.fs;
467 current->fs = fs;
468 atomic_inc(&fs->count);
469
470 exit_files(current); 457 exit_files(current);
471 current->files = init_task.files; 458 current->files = init_task.files;
472 atomic_inc(&current->files->count); 459 atomic_inc(&current->files->count);
@@ -565,30 +552,6 @@ void exit_files(struct task_struct *tsk)
565 } 552 }
566} 553}
567 554
568void put_fs_struct(struct fs_struct *fs)
569{
570 /* No need to hold fs->lock if we are killing it */
571 if (atomic_dec_and_test(&fs->count)) {
572 path_put(&fs->root);
573 path_put(&fs->pwd);
574 kmem_cache_free(fs_cachep, fs);
575 }
576}
577
578void exit_fs(struct task_struct *tsk)
579{
580 struct fs_struct * fs = tsk->fs;
581
582 if (fs) {
583 task_lock(tsk);
584 tsk->fs = NULL;
585 task_unlock(tsk);
586 put_fs_struct(fs);
587 }
588}
589
590EXPORT_SYMBOL_GPL(exit_fs);
591
592#ifdef CONFIG_MM_OWNER 555#ifdef CONFIG_MM_OWNER
593/* 556/*
594 * Task p is exiting and it owned mm, lets find a new owner for it 557 * Task p is exiting and it owned mm, lets find a new owner for it
@@ -627,7 +590,7 @@ retry:
627 /* 590 /*
628 * Search in the siblings 591 * Search in the siblings
629 */ 592 */
630 list_for_each_entry(c, &p->parent->children, sibling) { 593 list_for_each_entry(c, &p->real_parent->children, sibling) {
631 if (c->mm == mm) 594 if (c->mm == mm)
632 goto assign_new_owner; 595 goto assign_new_owner;
633 } 596 }
@@ -732,119 +695,6 @@ static void exit_mm(struct task_struct * tsk)
732} 695}
733 696
734/* 697/*
735 * Return nonzero if @parent's children should reap themselves.
736 *
737 * Called with write_lock_irq(&tasklist_lock) held.
738 */
739static int ignoring_children(struct task_struct *parent)
740{
741 int ret;
742 struct sighand_struct *psig = parent->sighand;
743 unsigned long flags;
744 spin_lock_irqsave(&psig->siglock, flags);
745 ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
746 (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
747 spin_unlock_irqrestore(&psig->siglock, flags);
748 return ret;
749}
750
751/*
752 * Detach all tasks we were using ptrace on.
753 * Any that need to be release_task'd are put on the @dead list.
754 *
755 * Called with write_lock(&tasklist_lock) held.
756 */
757static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
758{
759 struct task_struct *p, *n;
760 int ign = -1;
761
762 list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
763 __ptrace_unlink(p);
764
765 if (p->exit_state != EXIT_ZOMBIE)
766 continue;
767
768 /*
769 * If it's a zombie, our attachedness prevented normal
770 * parent notification or self-reaping. Do notification
771 * now if it would have happened earlier. If it should
772 * reap itself, add it to the @dead list. We can't call
773 * release_task() here because we already hold tasklist_lock.
774 *
775 * If it's our own child, there is no notification to do.
776 * But if our normal children self-reap, then this child
777 * was prevented by ptrace and we must reap it now.
778 */
779 if (!task_detached(p) && thread_group_empty(p)) {
780 if (!same_thread_group(p->real_parent, parent))
781 do_notify_parent(p, p->exit_signal);
782 else {
783 if (ign < 0)
784 ign = ignoring_children(parent);
785 if (ign)
786 p->exit_signal = -1;
787 }
788 }
789
790 if (task_detached(p)) {
791 /*
792 * Mark it as in the process of being reaped.
793 */
794 p->exit_state = EXIT_DEAD;
795 list_add(&p->ptrace_entry, dead);
796 }
797 }
798}
799
800/*
801 * Finish up exit-time ptrace cleanup.
802 *
803 * Called without locks.
804 */
805static void ptrace_exit_finish(struct task_struct *parent,
806 struct list_head *dead)
807{
808 struct task_struct *p, *n;
809
810 BUG_ON(!list_empty(&parent->ptraced));
811
812 list_for_each_entry_safe(p, n, dead, ptrace_entry) {
813 list_del_init(&p->ptrace_entry);
814 release_task(p);
815 }
816}
817
818static void reparent_thread(struct task_struct *p, struct task_struct *father)
819{
820 if (p->pdeath_signal)
821 /* We already hold the tasklist_lock here. */
822 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
823
824 list_move_tail(&p->sibling, &p->real_parent->children);
825
826 /* If this is a threaded reparent there is no need to
827 * notify anyone anything has happened.
828 */
829 if (same_thread_group(p->real_parent, father))
830 return;
831
832 /* We don't want people slaying init. */
833 if (!task_detached(p))
834 p->exit_signal = SIGCHLD;
835
836 /* If we'd notified the old parent about this child's death,
837 * also notify the new parent.
838 */
839 if (!ptrace_reparented(p) &&
840 p->exit_state == EXIT_ZOMBIE &&
841 !task_detached(p) && thread_group_empty(p))
842 do_notify_parent(p, p->exit_signal);
843
844 kill_orphaned_pgrp(p, father);
845}
846
847/*
848 * When we die, we re-parent all our children. 698 * When we die, we re-parent all our children.
849 * Try to give them to another thread in our thread 699 * Try to give them to another thread in our thread
850 * group, and if no such member exists, give it to 700 * group, and if no such member exists, give it to
@@ -883,31 +733,68 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
883 return pid_ns->child_reaper; 733 return pid_ns->child_reaper;
884} 734}
885 735
736/*
737* Any that need to be release_task'd are put on the @dead list.
738 */
739static void reparent_thread(struct task_struct *father, struct task_struct *p,
740 struct list_head *dead)
741{
742 if (p->pdeath_signal)
743 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
744
745 list_move_tail(&p->sibling, &p->real_parent->children);
746
747 if (task_detached(p))
748 return;
749 /*
750 * If this is a threaded reparent there is no need to
751 * notify anyone anything has happened.
752 */
753 if (same_thread_group(p->real_parent, father))
754 return;
755
756 /* We don't want people slaying init. */
757 p->exit_signal = SIGCHLD;
758
759 /* If it has exited notify the new parent about this child's death. */
760 if (!task_ptrace(p) &&
761 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
762 do_notify_parent(p, p->exit_signal);
763 if (task_detached(p)) {
764 p->exit_state = EXIT_DEAD;
765 list_move_tail(&p->sibling, dead);
766 }
767 }
768
769 kill_orphaned_pgrp(p, father);
770}
771
886static void forget_original_parent(struct task_struct *father) 772static void forget_original_parent(struct task_struct *father)
887{ 773{
888 struct task_struct *p, *n, *reaper; 774 struct task_struct *p, *n, *reaper;
889 LIST_HEAD(ptrace_dead); 775 LIST_HEAD(dead_children);
776
777 exit_ptrace(father);
890 778
891 write_lock_irq(&tasklist_lock); 779 write_lock_irq(&tasklist_lock);
892 reaper = find_new_reaper(father); 780 reaper = find_new_reaper(father);
893 /*
894 * First clean up ptrace if we were using it.
895 */
896 ptrace_exit(father, &ptrace_dead);
897 781
898 list_for_each_entry_safe(p, n, &father->children, sibling) { 782 list_for_each_entry_safe(p, n, &father->children, sibling) {
899 p->real_parent = reaper; 783 p->real_parent = reaper;
900 if (p->parent == father) { 784 if (p->parent == father) {
901 BUG_ON(p->ptrace); 785 BUG_ON(task_ptrace(p));
902 p->parent = p->real_parent; 786 p->parent = p->real_parent;
903 } 787 }
904 reparent_thread(p, father); 788 reparent_thread(father, p, &dead_children);
905 } 789 }
906
907 write_unlock_irq(&tasklist_lock); 790 write_unlock_irq(&tasklist_lock);
791
908 BUG_ON(!list_empty(&father->children)); 792 BUG_ON(!list_empty(&father->children));
909 793
910 ptrace_exit_finish(father, &ptrace_dead); 794 list_for_each_entry_safe(p, n, &dead_children, sibling) {
795 list_del_init(&p->sibling);
796 release_task(p);
797 }
911} 798}
912 799
913/* 800/*
@@ -950,8 +837,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
950 */ 837 */
951 if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && 838 if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) &&
952 (tsk->parent_exec_id != tsk->real_parent->self_exec_id || 839 (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
953 tsk->self_exec_id != tsk->parent_exec_id) && 840 tsk->self_exec_id != tsk->parent_exec_id))
954 !capable(CAP_KILL))
955 tsk->exit_signal = SIGCHLD; 841 tsk->exit_signal = SIGCHLD;
956 842
957 signal = tracehook_notify_death(tsk, &cookie, group_dead); 843 signal = tracehook_notify_death(tsk, &cookie, group_dead);
@@ -1016,6 +902,8 @@ NORET_TYPE void do_exit(long code)
1016 902
1017 tracehook_report_exit(&code); 903 tracehook_report_exit(&code);
1018 904
905 validate_creds_for_do_exit(tsk);
906
1019 /* 907 /*
1020 * We're taking recursive faults here in do_exit. Safest is to just 908 * We're taking recursive faults here in do_exit. Safest is to just
1021 * leave this task alone and wait for reboot. 909 * leave this task alone and wait for reboot.
@@ -1037,6 +925,8 @@ NORET_TYPE void do_exit(long code)
1037 schedule(); 925 schedule();
1038 } 926 }
1039 927
928 exit_irq_thread();
929
1040 exit_signals(tsk); /* sets PF_EXITING */ 930 exit_signals(tsk); /* sets PF_EXITING */
1041 /* 931 /*
1042 * tsk->flags are checked in the futex code to protect against 932 * tsk->flags are checked in the futex code to protect against
@@ -1056,6 +946,8 @@ NORET_TYPE void do_exit(long code)
1056 if (group_dead) { 946 if (group_dead) {
1057 hrtimer_cancel(&tsk->signal->real_timer); 947 hrtimer_cancel(&tsk->signal->real_timer);
1058 exit_itimers(tsk->signal); 948 exit_itimers(tsk->signal);
949 if (tsk->mm)
950 setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
1059 } 951 }
1060 acct_collect(code, group_dead); 952 acct_collect(code, group_dead);
1061 if (group_dead) 953 if (group_dead)
@@ -1083,22 +975,25 @@ NORET_TYPE void do_exit(long code)
1083 disassociate_ctty(1); 975 disassociate_ctty(1);
1084 976
1085 module_put(task_thread_info(tsk)->exec_domain->module); 977 module_put(task_thread_info(tsk)->exec_domain->module);
1086 if (tsk->binfmt)
1087 module_put(tsk->binfmt->module);
1088 978
1089 proc_exit_connector(tsk); 979 proc_exit_connector(tsk);
980
981 /*
982 * FIXME: do that only when needed, using sched_exit tracepoint
983 */
984 flush_ptrace_hw_breakpoint(tsk);
985 /*
986 * Flush inherited counters to the parent - before the parent
987 * gets woken up by child-exit notifications.
988 */
989 perf_event_exit_task(tsk);
990
1090 exit_notify(tsk, group_dead); 991 exit_notify(tsk, group_dead);
1091#ifdef CONFIG_NUMA 992#ifdef CONFIG_NUMA
1092 mpol_put(tsk->mempolicy); 993 mpol_put(tsk->mempolicy);
1093 tsk->mempolicy = NULL; 994 tsk->mempolicy = NULL;
1094#endif 995#endif
1095#ifdef CONFIG_FUTEX 996#ifdef CONFIG_FUTEX
1096 /*
1097 * This must happen late, after the PID is not
1098 * hashed anymore:
1099 */
1100 if (unlikely(!list_empty(&tsk->pi_state_list)))
1101 exit_pi_state_list(tsk);
1102 if (unlikely(current->pi_state_cache)) 997 if (unlikely(current->pi_state_cache))
1103 kfree(current->pi_state_cache); 998 kfree(current->pi_state_cache);
1104#endif 999#endif
@@ -1114,12 +1009,15 @@ NORET_TYPE void do_exit(long code)
1114 tsk->flags |= PF_EXITPIDONE; 1009 tsk->flags |= PF_EXITPIDONE;
1115 1010
1116 if (tsk->io_context) 1011 if (tsk->io_context)
1117 exit_io_context(); 1012 exit_io_context(tsk);
1118 1013
1119 if (tsk->splice_pipe) 1014 if (tsk->splice_pipe)
1120 __free_pipe_info(tsk->splice_pipe); 1015 __free_pipe_info(tsk->splice_pipe);
1121 1016
1017 validate_creds_for_do_exit(tsk);
1018
1122 preempt_disable(); 1019 preempt_disable();
1020 exit_rcu();
1123 /* causes final put_task_struct in finish_task_switch(). */ 1021 /* causes final put_task_struct in finish_task_switch(). */
1124 tsk->state = TASK_DEAD; 1022 tsk->state = TASK_DEAD;
1125 schedule(); 1023 schedule();
@@ -1189,62 +1087,72 @@ SYSCALL_DEFINE1(exit_group, int, error_code)
1189 return 0; 1087 return 0;
1190} 1088}
1191 1089
1192static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) 1090struct wait_opts {
1091 enum pid_type wo_type;
1092 int wo_flags;
1093 struct pid *wo_pid;
1094
1095 struct siginfo __user *wo_info;
1096 int __user *wo_stat;
1097 struct rusage __user *wo_rusage;
1098
1099 wait_queue_t child_wait;
1100 int notask_error;
1101};
1102
1103static inline
1104struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1193{ 1105{
1194 struct pid *pid = NULL; 1106 if (type != PIDTYPE_PID)
1195 if (type == PIDTYPE_PID) 1107 task = task->group_leader;
1196 pid = task->pids[type].pid; 1108 return task->pids[type].pid;
1197 else if (type < PIDTYPE_MAX)
1198 pid = task->group_leader->pids[type].pid;
1199 return pid;
1200} 1109}
1201 1110
1202static int eligible_child(enum pid_type type, struct pid *pid, int options, 1111static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
1203 struct task_struct *p)
1204{ 1112{
1205 int err; 1113 return wo->wo_type == PIDTYPE_MAX ||
1206 1114 task_pid_type(p, wo->wo_type) == wo->wo_pid;
1207 if (type < PIDTYPE_MAX) { 1115}
1208 if (task_pid_type(p, type) != pid)
1209 return 0;
1210 }
1211 1116
1117static int eligible_child(struct wait_opts *wo, struct task_struct *p)
1118{
1119 if (!eligible_pid(wo, p))
1120 return 0;
1212 /* Wait for all children (clone and not) if __WALL is set; 1121 /* Wait for all children (clone and not) if __WALL is set;
1213 * otherwise, wait for clone children *only* if __WCLONE is 1122 * otherwise, wait for clone children *only* if __WCLONE is
1214 * set; otherwise, wait for non-clone children *only*. (Note: 1123 * set; otherwise, wait for non-clone children *only*. (Note:
1215 * A "clone" child here is one that reports to its parent 1124 * A "clone" child here is one that reports to its parent
1216 * using a signal other than SIGCHLD.) */ 1125 * using a signal other than SIGCHLD.) */
1217 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) 1126 if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1218 && !(options & __WALL)) 1127 && !(wo->wo_flags & __WALL))
1219 return 0; 1128 return 0;
1220 1129
1221 err = security_task_wait(p);
1222 if (err)
1223 return err;
1224
1225 return 1; 1130 return 1;
1226} 1131}
1227 1132
1228static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, 1133static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
1229 int why, int status, 1134 pid_t pid, uid_t uid, int why, int status)
1230 struct siginfo __user *infop,
1231 struct rusage __user *rusagep)
1232{ 1135{
1233 int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; 1136 struct siginfo __user *infop;
1137 int retval = wo->wo_rusage
1138 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1234 1139
1235 put_task_struct(p); 1140 put_task_struct(p);
1236 if (!retval) 1141 infop = wo->wo_info;
1237 retval = put_user(SIGCHLD, &infop->si_signo); 1142 if (infop) {
1238 if (!retval) 1143 if (!retval)
1239 retval = put_user(0, &infop->si_errno); 1144 retval = put_user(SIGCHLD, &infop->si_signo);
1240 if (!retval) 1145 if (!retval)
1241 retval = put_user((short)why, &infop->si_code); 1146 retval = put_user(0, &infop->si_errno);
1242 if (!retval) 1147 if (!retval)
1243 retval = put_user(pid, &infop->si_pid); 1148 retval = put_user((short)why, &infop->si_code);
1244 if (!retval) 1149 if (!retval)
1245 retval = put_user(uid, &infop->si_uid); 1150 retval = put_user(pid, &infop->si_pid);
1246 if (!retval) 1151 if (!retval)
1247 retval = put_user(status, &infop->si_status); 1152 retval = put_user(uid, &infop->si_uid);
1153 if (!retval)
1154 retval = put_user(status, &infop->si_status);
1155 }
1248 if (!retval) 1156 if (!retval)
1249 retval = pid; 1157 retval = pid;
1250 return retval; 1158 return retval;
@@ -1256,19 +1164,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1256 * the lock and this task is uninteresting. If we return nonzero, we have 1164 * the lock and this task is uninteresting. If we return nonzero, we have
1257 * released the lock and the system call should return. 1165 * released the lock and the system call should return.
1258 */ 1166 */
1259static int wait_task_zombie(struct task_struct *p, int options, 1167static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1260 struct siginfo __user *infop,
1261 int __user *stat_addr, struct rusage __user *ru)
1262{ 1168{
1263 unsigned long state; 1169 unsigned long state;
1264 int retval, status, traced; 1170 int retval, status, traced;
1265 pid_t pid = task_pid_vnr(p); 1171 pid_t pid = task_pid_vnr(p);
1266 uid_t uid = __task_cred(p)->uid; 1172 uid_t uid = __task_cred(p)->uid;
1173 struct siginfo __user *infop;
1267 1174
1268 if (!likely(options & WEXITED)) 1175 if (!likely(wo->wo_flags & WEXITED))
1269 return 0; 1176 return 0;
1270 1177
1271 if (unlikely(options & WNOWAIT)) { 1178 if (unlikely(wo->wo_flags & WNOWAIT)) {
1272 int exit_code = p->exit_code; 1179 int exit_code = p->exit_code;
1273 int why, status; 1180 int why, status;
1274 1181
@@ -1281,8 +1188,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
1281 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; 1188 why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
1282 status = exit_code & 0x7f; 1189 status = exit_code & 0x7f;
1283 } 1190 }
1284 return wait_noreap_copyout(p, pid, uid, why, 1191 return wait_noreap_copyout(wo, p, pid, uid, why, status);
1285 status, infop, ru);
1286 } 1192 }
1287 1193
1288 /* 1194 /*
@@ -1296,11 +1202,15 @@ static int wait_task_zombie(struct task_struct *p, int options,
1296 } 1202 }
1297 1203
1298 traced = ptrace_reparented(p); 1204 traced = ptrace_reparented(p);
1299 1205 /*
1300 if (likely(!traced)) { 1206 * It can be ptraced but not reparented, check
1207 * !task_detached() to filter out sub-threads.
1208 */
1209 if (likely(!traced) && likely(!task_detached(p))) {
1301 struct signal_struct *psig; 1210 struct signal_struct *psig;
1302 struct signal_struct *sig; 1211 struct signal_struct *sig;
1303 struct task_cputime cputime; 1212 unsigned long maxrss;
1213 cputime_t tgutime, tgstime;
1304 1214
1305 /* 1215 /*
1306 * The resource counters for the group leader are in its 1216 * The resource counters for the group leader are in its
@@ -1313,25 +1223,25 @@ static int wait_task_zombie(struct task_struct *p, int options,
1313 * p->signal fields, because they are only touched by 1223 * p->signal fields, because they are only touched by
1314 * __exit_signal, which runs with tasklist_lock 1224 * __exit_signal, which runs with tasklist_lock
1315 * write-locked anyway, and so is excluded here. We do 1225 * write-locked anyway, and so is excluded here. We do
1316 * need to protect the access to p->parent->signal fields, 1226 * need to protect the access to parent->signal fields,
1317 * as other threads in the parent group can be right 1227 * as other threads in the parent group can be right
1318 * here reaping other children at the same time. 1228 * here reaping other children at the same time.
1319 * 1229 *
1320 * We use thread_group_cputime() to get times for the thread 1230 * We use thread_group_times() to get times for the thread
1321 * group, which consolidates times for all threads in the 1231 * group, which consolidates times for all threads in the
1322 * group including the group leader. 1232 * group including the group leader.
1323 */ 1233 */
1324 thread_group_cputime(p, &cputime); 1234 thread_group_times(p, &tgutime, &tgstime);
1325 spin_lock_irq(&p->parent->sighand->siglock); 1235 spin_lock_irq(&p->real_parent->sighand->siglock);
1326 psig = p->parent->signal; 1236 psig = p->real_parent->signal;
1327 sig = p->signal; 1237 sig = p->signal;
1328 psig->cutime = 1238 psig->cutime =
1329 cputime_add(psig->cutime, 1239 cputime_add(psig->cutime,
1330 cputime_add(cputime.utime, 1240 cputime_add(tgutime,
1331 sig->cutime)); 1241 sig->cutime));
1332 psig->cstime = 1242 psig->cstime =
1333 cputime_add(psig->cstime, 1243 cputime_add(psig->cstime,
1334 cputime_add(cputime.stime, 1244 cputime_add(tgstime,
1335 sig->cstime)); 1245 sig->cstime));
1336 psig->cgtime = 1246 psig->cgtime =
1337 cputime_add(psig->cgtime, 1247 cputime_add(psig->cgtime,
@@ -1352,9 +1262,12 @@ static int wait_task_zombie(struct task_struct *p, int options,
1352 psig->coublock += 1262 psig->coublock +=
1353 task_io_get_oublock(p) + 1263 task_io_get_oublock(p) +
1354 sig->oublock + sig->coublock; 1264 sig->oublock + sig->coublock;
1265 maxrss = max(sig->maxrss, sig->cmaxrss);
1266 if (psig->cmaxrss < maxrss)
1267 psig->cmaxrss = maxrss;
1355 task_io_accounting_add(&psig->ioac, &p->ioac); 1268 task_io_accounting_add(&psig->ioac, &p->ioac);
1356 task_io_accounting_add(&psig->ioac, &sig->ioac); 1269 task_io_accounting_add(&psig->ioac, &sig->ioac);
1357 spin_unlock_irq(&p->parent->sighand->siglock); 1270 spin_unlock_irq(&p->real_parent->sighand->siglock);
1358 } 1271 }
1359 1272
1360 /* 1273 /*
@@ -1363,11 +1276,14 @@ static int wait_task_zombie(struct task_struct *p, int options,
1363 */ 1276 */
1364 read_unlock(&tasklist_lock); 1277 read_unlock(&tasklist_lock);
1365 1278
1366 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1279 retval = wo->wo_rusage
1280 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1367 status = (p->signal->flags & SIGNAL_GROUP_EXIT) 1281 status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1368 ? p->signal->group_exit_code : p->exit_code; 1282 ? p->signal->group_exit_code : p->exit_code;
1369 if (!retval && stat_addr) 1283 if (!retval && wo->wo_stat)
1370 retval = put_user(status, stat_addr); 1284 retval = put_user(status, wo->wo_stat);
1285
1286 infop = wo->wo_info;
1371 if (!retval && infop) 1287 if (!retval && infop)
1372 retval = put_user(SIGCHLD, &infop->si_signo); 1288 retval = put_user(SIGCHLD, &infop->si_signo);
1373 if (!retval && infop) 1289 if (!retval && infop)
@@ -1417,42 +1333,51 @@ static int wait_task_zombie(struct task_struct *p, int options,
1417 return retval; 1333 return retval;
1418} 1334}
1419 1335
1336static int *task_stopped_code(struct task_struct *p, bool ptrace)
1337{
1338 if (ptrace) {
1339 if (task_is_stopped_or_traced(p))
1340 return &p->exit_code;
1341 } else {
1342 if (p->signal->flags & SIGNAL_STOP_STOPPED)
1343 return &p->signal->group_exit_code;
1344 }
1345 return NULL;
1346}
1347
1420/* 1348/*
1421 * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold 1349 * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold
1422 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 1350 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold
1423 * the lock and this task is uninteresting. If we return nonzero, we have 1351 * the lock and this task is uninteresting. If we return nonzero, we have
1424 * released the lock and the system call should return. 1352 * released the lock and the system call should return.
1425 */ 1353 */
1426static int wait_task_stopped(int ptrace, struct task_struct *p, 1354static int wait_task_stopped(struct wait_opts *wo,
1427 int options, struct siginfo __user *infop, 1355 int ptrace, struct task_struct *p)
1428 int __user *stat_addr, struct rusage __user *ru)
1429{ 1356{
1430 int retval, exit_code, why; 1357 struct siginfo __user *infop;
1358 int retval, exit_code, *p_code, why;
1431 uid_t uid = 0; /* unneeded, required by compiler */ 1359 uid_t uid = 0; /* unneeded, required by compiler */
1432 pid_t pid; 1360 pid_t pid;
1433 1361
1434 if (!(options & WUNTRACED)) 1362 /*
1363 * Traditionally we see ptrace'd stopped tasks regardless of options.
1364 */
1365 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1435 return 0; 1366 return 0;
1436 1367
1437 exit_code = 0; 1368 exit_code = 0;
1438 spin_lock_irq(&p->sighand->siglock); 1369 spin_lock_irq(&p->sighand->siglock);
1439 1370
1440 if (unlikely(!task_is_stopped_or_traced(p))) 1371 p_code = task_stopped_code(p, ptrace);
1441 goto unlock_sig; 1372 if (unlikely(!p_code))
1442
1443 if (!ptrace && p->signal->group_stop_count > 0)
1444 /*
1445 * A group stop is in progress and this is the group leader.
1446 * We won't report until all threads have stopped.
1447 */
1448 goto unlock_sig; 1373 goto unlock_sig;
1449 1374
1450 exit_code = p->exit_code; 1375 exit_code = *p_code;
1451 if (!exit_code) 1376 if (!exit_code)
1452 goto unlock_sig; 1377 goto unlock_sig;
1453 1378
1454 if (!unlikely(options & WNOWAIT)) 1379 if (!unlikely(wo->wo_flags & WNOWAIT))
1455 p->exit_code = 0; 1380 *p_code = 0;
1456 1381
1457 /* don't need the RCU readlock here as we're holding a spinlock */ 1382 /* don't need the RCU readlock here as we're holding a spinlock */
1458 uid = __task_cred(p)->uid; 1383 uid = __task_cred(p)->uid;
@@ -1473,14 +1398,15 @@ unlock_sig:
1473 why = ptrace ? CLD_TRAPPED : CLD_STOPPED; 1398 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1474 read_unlock(&tasklist_lock); 1399 read_unlock(&tasklist_lock);
1475 1400
1476 if (unlikely(options & WNOWAIT)) 1401 if (unlikely(wo->wo_flags & WNOWAIT))
1477 return wait_noreap_copyout(p, pid, uid, 1402 return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
1478 why, exit_code, 1403
1479 infop, ru); 1404 retval = wo->wo_rusage
1405 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1406 if (!retval && wo->wo_stat)
1407 retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat);
1480 1408
1481 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1409 infop = wo->wo_info;
1482 if (!retval && stat_addr)
1483 retval = put_user((exit_code << 8) | 0x7f, stat_addr);
1484 if (!retval && infop) 1410 if (!retval && infop)
1485 retval = put_user(SIGCHLD, &infop->si_signo); 1411 retval = put_user(SIGCHLD, &infop->si_signo);
1486 if (!retval && infop) 1412 if (!retval && infop)
@@ -1507,15 +1433,13 @@ unlock_sig:
1507 * the lock and this task is uninteresting. If we return nonzero, we have 1433 * the lock and this task is uninteresting. If we return nonzero, we have
1508 * released the lock and the system call should return. 1434 * released the lock and the system call should return.
1509 */ 1435 */
1510static int wait_task_continued(struct task_struct *p, int options, 1436static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1511 struct siginfo __user *infop,
1512 int __user *stat_addr, struct rusage __user *ru)
1513{ 1437{
1514 int retval; 1438 int retval;
1515 pid_t pid; 1439 pid_t pid;
1516 uid_t uid; 1440 uid_t uid;
1517 1441
1518 if (!unlikely(options & WCONTINUED)) 1442 if (!unlikely(wo->wo_flags & WCONTINUED))
1519 return 0; 1443 return 0;
1520 1444
1521 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1445 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
@@ -1527,7 +1451,7 @@ static int wait_task_continued(struct task_struct *p, int options,
1527 spin_unlock_irq(&p->sighand->siglock); 1451 spin_unlock_irq(&p->sighand->siglock);
1528 return 0; 1452 return 0;
1529 } 1453 }
1530 if (!unlikely(options & WNOWAIT)) 1454 if (!unlikely(wo->wo_flags & WNOWAIT))
1531 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1455 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1532 uid = __task_cred(p)->uid; 1456 uid = __task_cred(p)->uid;
1533 spin_unlock_irq(&p->sighand->siglock); 1457 spin_unlock_irq(&p->sighand->siglock);
@@ -1536,17 +1460,17 @@ static int wait_task_continued(struct task_struct *p, int options,
1536 get_task_struct(p); 1460 get_task_struct(p);
1537 read_unlock(&tasklist_lock); 1461 read_unlock(&tasklist_lock);
1538 1462
1539 if (!infop) { 1463 if (!wo->wo_info) {
1540 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; 1464 retval = wo->wo_rusage
1465 ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
1541 put_task_struct(p); 1466 put_task_struct(p);
1542 if (!retval && stat_addr) 1467 if (!retval && wo->wo_stat)
1543 retval = put_user(0xffff, stat_addr); 1468 retval = put_user(0xffff, wo->wo_stat);
1544 if (!retval) 1469 if (!retval)
1545 retval = pid; 1470 retval = pid;
1546 } else { 1471 } else {
1547 retval = wait_noreap_copyout(p, pid, uid, 1472 retval = wait_noreap_copyout(wo, p, pid, uid,
1548 CLD_CONTINUED, SIGCONT, 1473 CLD_CONTINUED, SIGCONT);
1549 infop, ru);
1550 BUG_ON(retval == 0); 1474 BUG_ON(retval == 0);
1551 } 1475 }
1552 1476
@@ -1556,22 +1480,20 @@ static int wait_task_continued(struct task_struct *p, int options,
1556/* 1480/*
1557 * Consider @p for a wait by @parent. 1481 * Consider @p for a wait by @parent.
1558 * 1482 *
1559 * -ECHILD should be in *@notask_error before the first call. 1483 * -ECHILD should be in ->notask_error before the first call.
1560 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1484 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1561 * Returns zero if the search for a child should continue; 1485 * Returns zero if the search for a child should continue;
1562 * then *@notask_error is 0 if @p is an eligible child, 1486 * then ->notask_error is 0 if @p is an eligible child,
1563 * or another error from security_task_wait(), or still -ECHILD. 1487 * or another error from security_task_wait(), or still -ECHILD.
1564 */ 1488 */
1565static int wait_consider_task(struct task_struct *parent, int ptrace, 1489static int wait_consider_task(struct wait_opts *wo, int ptrace,
1566 struct task_struct *p, int *notask_error, 1490 struct task_struct *p)
1567 enum pid_type type, struct pid *pid, int options,
1568 struct siginfo __user *infop,
1569 int __user *stat_addr, struct rusage __user *ru)
1570{ 1491{
1571 int ret = eligible_child(type, pid, options, p); 1492 int ret = eligible_child(wo, p);
1572 if (!ret) 1493 if (!ret)
1573 return ret; 1494 return ret;
1574 1495
1496 ret = security_task_wait(p);
1575 if (unlikely(ret < 0)) { 1497 if (unlikely(ret < 0)) {
1576 /* 1498 /*
1577 * If we have not yet seen any eligible child, 1499 * If we have not yet seen any eligible child,
@@ -1580,16 +1502,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1580 * to look for security policy problems, rather 1502 * to look for security policy problems, rather
1581 * than for mysterious wait bugs. 1503 * than for mysterious wait bugs.
1582 */ 1504 */
1583 if (*notask_error) 1505 if (wo->notask_error)
1584 *notask_error = ret; 1506 wo->notask_error = ret;
1507 return 0;
1585 } 1508 }
1586 1509
1587 if (likely(!ptrace) && unlikely(p->ptrace)) { 1510 if (likely(!ptrace) && unlikely(task_ptrace(p))) {
1588 /* 1511 /*
1589 * This child is hidden by ptrace. 1512 * This child is hidden by ptrace.
1590 * We aren't allowed to see it now, but eventually we will. 1513 * We aren't allowed to see it now, but eventually we will.
1591 */ 1514 */
1592 *notask_error = 0; 1515 wo->notask_error = 0;
1593 return 0; 1516 return 0;
1594 } 1517 }
1595 1518
@@ -1600,34 +1523,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace,
1600 * We don't reap group leaders with subthreads. 1523 * We don't reap group leaders with subthreads.
1601 */ 1524 */
1602 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) 1525 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
1603 return wait_task_zombie(p, options, infop, stat_addr, ru); 1526 return wait_task_zombie(wo, p);
1604 1527
1605 /* 1528 /*
1606 * It's stopped or running now, so it might 1529 * It's stopped or running now, so it might
1607 * later continue, exit, or stop again. 1530 * later continue, exit, or stop again.
1608 */ 1531 */
1609 *notask_error = 0; 1532 wo->notask_error = 0;
1610 1533
1611 if (task_is_stopped_or_traced(p)) 1534 if (task_stopped_code(p, ptrace))
1612 return wait_task_stopped(ptrace, p, options, 1535 return wait_task_stopped(wo, ptrace, p);
1613 infop, stat_addr, ru);
1614 1536
1615 return wait_task_continued(p, options, infop, stat_addr, ru); 1537 return wait_task_continued(wo, p);
1616} 1538}
1617 1539
1618/* 1540/*
1619 * Do the work of do_wait() for one thread in the group, @tsk. 1541 * Do the work of do_wait() for one thread in the group, @tsk.
1620 * 1542 *
1621 * -ECHILD should be in *@notask_error before the first call. 1543 * -ECHILD should be in ->notask_error before the first call.
1622 * Returns nonzero for a final return, when we have unlocked tasklist_lock. 1544 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1623 * Returns zero if the search for a child should continue; then 1545 * Returns zero if the search for a child should continue; then
1624 * *@notask_error is 0 if there were any eligible children, 1546 * ->notask_error is 0 if there were any eligible children,
1625 * or another error from security_task_wait(), or still -ECHILD. 1547 * or another error from security_task_wait(), or still -ECHILD.
1626 */ 1548 */
1627static int do_wait_thread(struct task_struct *tsk, int *notask_error, 1549static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1628 enum pid_type type, struct pid *pid, int options,
1629 struct siginfo __user *infop, int __user *stat_addr,
1630 struct rusage __user *ru)
1631{ 1550{
1632 struct task_struct *p; 1551 struct task_struct *p;
1633 1552
@@ -1636,9 +1555,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1636 * Do not consider detached threads. 1555 * Do not consider detached threads.
1637 */ 1556 */
1638 if (!task_detached(p)) { 1557 if (!task_detached(p)) {
1639 int ret = wait_consider_task(tsk, 0, p, notask_error, 1558 int ret = wait_consider_task(wo, 0, p);
1640 type, pid, options,
1641 infop, stat_addr, ru);
1642 if (ret) 1559 if (ret)
1643 return ret; 1560 return ret;
1644 } 1561 }
@@ -1647,22 +1564,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1647 return 0; 1564 return 0;
1648} 1565}
1649 1566
1650static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, 1567static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1651 enum pid_type type, struct pid *pid, int options,
1652 struct siginfo __user *infop, int __user *stat_addr,
1653 struct rusage __user *ru)
1654{ 1568{
1655 struct task_struct *p; 1569 struct task_struct *p;
1656 1570
1657 /*
1658 * Traditionally we see ptrace'd stopped tasks regardless of options.
1659 */
1660 options |= WUNTRACED;
1661
1662 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { 1571 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1663 int ret = wait_consider_task(tsk, 1, p, notask_error, 1572 int ret = wait_consider_task(wo, 1, p);
1664 type, pid, options,
1665 infop, stat_addr, ru);
1666 if (ret) 1573 if (ret)
1667 return ret; 1574 return ret;
1668 } 1575 }
@@ -1670,93 +1577,86 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
1670 return 0; 1577 return 0;
1671} 1578}
1672 1579
1673static long do_wait(enum pid_type type, struct pid *pid, int options, 1580static int child_wait_callback(wait_queue_t *wait, unsigned mode,
1674 struct siginfo __user *infop, int __user *stat_addr, 1581 int sync, void *key)
1675 struct rusage __user *ru) 1582{
1583 struct wait_opts *wo = container_of(wait, struct wait_opts,
1584 child_wait);
1585 struct task_struct *p = key;
1586
1587 if (!eligible_pid(wo, p))
1588 return 0;
1589
1590 if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
1591 return 0;
1592
1593 return default_wake_function(wait, mode, sync, key);
1594}
1595
1596void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
1597{
1598 __wake_up_sync_key(&parent->signal->wait_chldexit,
1599 TASK_INTERRUPTIBLE, 1, p);
1600}
1601
1602static long do_wait(struct wait_opts *wo)
1676{ 1603{
1677 DECLARE_WAITQUEUE(wait, current);
1678 struct task_struct *tsk; 1604 struct task_struct *tsk;
1679 int retval; 1605 int retval;
1680 1606
1681 trace_sched_process_wait(pid); 1607 trace_sched_process_wait(wo->wo_pid);
1682 1608
1683 add_wait_queue(&current->signal->wait_chldexit,&wait); 1609 init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
1610 wo->child_wait.private = current;
1611 add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1684repeat: 1612repeat:
1685 /* 1613 /*
1686 * If there is nothing that can match our critiera just get out. 1614 * If there is nothing that can match our critiera just get out.
1687 * We will clear @retval to zero if we see any child that might later 1615 * We will clear ->notask_error to zero if we see any child that
1688 * match our criteria, even if we are not able to reap it yet. 1616 * might later match our criteria, even if we are not able to reap
1617 * it yet.
1689 */ 1618 */
1690 retval = -ECHILD; 1619 wo->notask_error = -ECHILD;
1691 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) 1620 if ((wo->wo_type < PIDTYPE_MAX) &&
1692 goto end; 1621 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
1622 goto notask;
1693 1623
1694 current->state = TASK_INTERRUPTIBLE; 1624 set_current_state(TASK_INTERRUPTIBLE);
1695 read_lock(&tasklist_lock); 1625 read_lock(&tasklist_lock);
1696 tsk = current; 1626 tsk = current;
1697 do { 1627 do {
1698 int tsk_result = do_wait_thread(tsk, &retval, 1628 retval = do_wait_thread(wo, tsk);
1699 type, pid, options, 1629 if (retval)
1700 infop, stat_addr, ru);
1701 if (!tsk_result)
1702 tsk_result = ptrace_do_wait(tsk, &retval,
1703 type, pid, options,
1704 infop, stat_addr, ru);
1705 if (tsk_result) {
1706 /*
1707 * tasklist_lock is unlocked and we have a final result.
1708 */
1709 retval = tsk_result;
1710 goto end; 1630 goto end;
1711 }
1712 1631
1713 if (options & __WNOTHREAD) 1632 retval = ptrace_do_wait(wo, tsk);
1633 if (retval)
1634 goto end;
1635
1636 if (wo->wo_flags & __WNOTHREAD)
1714 break; 1637 break;
1715 tsk = next_thread(tsk); 1638 } while_each_thread(current, tsk);
1716 BUG_ON(tsk->signal != current->signal);
1717 } while (tsk != current);
1718 read_unlock(&tasklist_lock); 1639 read_unlock(&tasklist_lock);
1719 1640
1720 if (!retval && !(options & WNOHANG)) { 1641notask:
1642 retval = wo->notask_error;
1643 if (!retval && !(wo->wo_flags & WNOHANG)) {
1721 retval = -ERESTARTSYS; 1644 retval = -ERESTARTSYS;
1722 if (!signal_pending(current)) { 1645 if (!signal_pending(current)) {
1723 schedule(); 1646 schedule();
1724 goto repeat; 1647 goto repeat;
1725 } 1648 }
1726 } 1649 }
1727
1728end: 1650end:
1729 current->state = TASK_RUNNING; 1651 __set_current_state(TASK_RUNNING);
1730 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1652 remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
1731 if (infop) {
1732 if (retval > 0)
1733 retval = 0;
1734 else {
1735 /*
1736 * For a WNOHANG return, clear out all the fields
1737 * we would set so the user can easily tell the
1738 * difference.
1739 */
1740 if (!retval)
1741 retval = put_user(0, &infop->si_signo);
1742 if (!retval)
1743 retval = put_user(0, &infop->si_errno);
1744 if (!retval)
1745 retval = put_user(0, &infop->si_code);
1746 if (!retval)
1747 retval = put_user(0, &infop->si_pid);
1748 if (!retval)
1749 retval = put_user(0, &infop->si_uid);
1750 if (!retval)
1751 retval = put_user(0, &infop->si_status);
1752 }
1753 }
1754 return retval; 1653 return retval;
1755} 1654}
1756 1655
1757SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, 1656SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1758 infop, int, options, struct rusage __user *, ru) 1657 infop, int, options, struct rusage __user *, ru)
1759{ 1658{
1659 struct wait_opts wo;
1760 struct pid *pid = NULL; 1660 struct pid *pid = NULL;
1761 enum pid_type type; 1661 enum pid_type type;
1762 long ret; 1662 long ret;
@@ -1786,7 +1686,37 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1786 1686
1787 if (type < PIDTYPE_MAX) 1687 if (type < PIDTYPE_MAX)
1788 pid = find_get_pid(upid); 1688 pid = find_get_pid(upid);
1789 ret = do_wait(type, pid, options, infop, NULL, ru); 1689
1690 wo.wo_type = type;
1691 wo.wo_pid = pid;
1692 wo.wo_flags = options;
1693 wo.wo_info = infop;
1694 wo.wo_stat = NULL;
1695 wo.wo_rusage = ru;
1696 ret = do_wait(&wo);
1697
1698 if (ret > 0) {
1699 ret = 0;
1700 } else if (infop) {
1701 /*
1702 * For a WNOHANG return, clear out all the fields
1703 * we would set so the user can easily tell the
1704 * difference.
1705 */
1706 if (!ret)
1707 ret = put_user(0, &infop->si_signo);
1708 if (!ret)
1709 ret = put_user(0, &infop->si_errno);
1710 if (!ret)
1711 ret = put_user(0, &infop->si_code);
1712 if (!ret)
1713 ret = put_user(0, &infop->si_pid);
1714 if (!ret)
1715 ret = put_user(0, &infop->si_uid);
1716 if (!ret)
1717 ret = put_user(0, &infop->si_status);
1718 }
1719
1790 put_pid(pid); 1720 put_pid(pid);
1791 1721
1792 /* avoid REGPARM breakage on x86: */ 1722 /* avoid REGPARM breakage on x86: */
@@ -1797,6 +1727,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1797SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, 1727SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1798 int, options, struct rusage __user *, ru) 1728 int, options, struct rusage __user *, ru)
1799{ 1729{
1730 struct wait_opts wo;
1800 struct pid *pid = NULL; 1731 struct pid *pid = NULL;
1801 enum pid_type type; 1732 enum pid_type type;
1802 long ret; 1733 long ret;
@@ -1812,13 +1743,19 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1812 pid = find_get_pid(-upid); 1743 pid = find_get_pid(-upid);
1813 } else if (upid == 0) { 1744 } else if (upid == 0) {
1814 type = PIDTYPE_PGID; 1745 type = PIDTYPE_PGID;
1815 pid = get_pid(task_pgrp(current)); 1746 pid = get_task_pid(current, PIDTYPE_PGID);
1816 } else /* upid > 0 */ { 1747 } else /* upid > 0 */ {
1817 type = PIDTYPE_PID; 1748 type = PIDTYPE_PID;
1818 pid = find_get_pid(upid); 1749 pid = find_get_pid(upid);
1819 } 1750 }
1820 1751
1821 ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); 1752 wo.wo_type = type;
1753 wo.wo_pid = pid;
1754 wo.wo_flags = options | WEXITED;
1755 wo.wo_info = NULL;
1756 wo.wo_stat = stat_addr;
1757 wo.wo_rusage = ru;
1758 ret = do_wait(&wo);
1822 put_pid(pid); 1759 put_pid(pid);
1823 1760
1824 /* avoid REGPARM breakage on x86: */ 1761 /* avoid REGPARM breakage on x86: */