aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c520
1 files changed, 312 insertions, 208 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 8f6185e69b69..eb4d6470d1d0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -13,6 +13,7 @@
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h> 15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h>
16#include <linux/key.h> 17#include <linux/key.h>
17#include <linux/security.h> 18#include <linux/security.h>
18#include <linux/cpu.h> 19#include <linux/cpu.h>
@@ -45,6 +46,7 @@
45#include <linux/resource.h> 46#include <linux/resource.h>
46#include <linux/blkdev.h> 47#include <linux/blkdev.h>
47#include <linux/task_io_accounting_ops.h> 48#include <linux/task_io_accounting_ops.h>
49#include <linux/tracehook.h>
48 50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
50#include <asm/unistd.h> 52#include <asm/unistd.h>
@@ -70,7 +72,7 @@ static void __unhash_process(struct task_struct *p)
70 __get_cpu_var(process_counts)--; 72 __get_cpu_var(process_counts)--;
71 } 73 }
72 list_del_rcu(&p->thread_group); 74 list_del_rcu(&p->thread_group);
73 remove_parent(p); 75 list_del_init(&p->sibling);
74} 76}
75 77
76/* 78/*
@@ -84,7 +86,6 @@ static void __exit_signal(struct task_struct *tsk)
84 BUG_ON(!sig); 86 BUG_ON(!sig);
85 BUG_ON(!atomic_read(&sig->count)); 87 BUG_ON(!atomic_read(&sig->count));
86 88
87 rcu_read_lock();
88 sighand = rcu_dereference(tsk->sighand); 89 sighand = rcu_dereference(tsk->sighand);
89 spin_lock(&sighand->siglock); 90 spin_lock(&sighand->siglock);
90 91
@@ -120,6 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
120 sig->nivcsw += tsk->nivcsw; 121 sig->nivcsw += tsk->nivcsw;
121 sig->inblock += task_io_get_inblock(tsk); 122 sig->inblock += task_io_get_inblock(tsk);
122 sig->oublock += task_io_get_oublock(tsk); 123 sig->oublock += task_io_get_oublock(tsk);
124 task_io_accounting_add(&sig->ioac, &tsk->ioac);
123 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 125 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
124 sig = NULL; /* Marker for below. */ 126 sig = NULL; /* Marker for below. */
125 } 127 }
@@ -135,7 +137,6 @@ static void __exit_signal(struct task_struct *tsk)
135 tsk->signal = NULL; 137 tsk->signal = NULL;
136 tsk->sighand = NULL; 138 tsk->sighand = NULL;
137 spin_unlock(&sighand->siglock); 139 spin_unlock(&sighand->siglock);
138 rcu_read_unlock();
139 140
140 __cleanup_sighand(sighand); 141 __cleanup_sighand(sighand);
141 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 142 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
@@ -151,16 +152,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
151 put_task_struct(container_of(rhp, struct task_struct, rcu)); 152 put_task_struct(container_of(rhp, struct task_struct, rcu));
152} 153}
153 154
155
154void release_task(struct task_struct * p) 156void release_task(struct task_struct * p)
155{ 157{
156 struct task_struct *leader; 158 struct task_struct *leader;
157 int zap_leader; 159 int zap_leader;
158repeat: 160repeat:
161 tracehook_prepare_release_task(p);
159 atomic_dec(&p->user->processes); 162 atomic_dec(&p->user->processes);
160 proc_flush_task(p); 163 proc_flush_task(p);
161 write_lock_irq(&tasklist_lock); 164 write_lock_irq(&tasklist_lock);
162 ptrace_unlink(p); 165 tracehook_finish_release_task(p);
163 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
164 __exit_signal(p); 166 __exit_signal(p);
165 167
166 /* 168 /*
@@ -182,6 +184,13 @@ repeat:
182 * that case. 184 * that case.
183 */ 185 */
184 zap_leader = task_detached(leader); 186 zap_leader = task_detached(leader);
187
188 /*
189 * This maintains the invariant that release_task()
190 * only runs on a task in EXIT_DEAD, just for sanity.
191 */
192 if (zap_leader)
193 leader->exit_state = EXIT_DEAD;
185 } 194 }
186 195
187 write_unlock_irq(&tasklist_lock); 196 write_unlock_irq(&tasklist_lock);
@@ -314,9 +323,8 @@ static void reparent_to_kthreadd(void)
314 323
315 ptrace_unlink(current); 324 ptrace_unlink(current);
316 /* Reparent to init */ 325 /* Reparent to init */
317 remove_parent(current);
318 current->real_parent = current->parent = kthreadd_task; 326 current->real_parent = current->parent = kthreadd_task;
319 add_parent(current); 327 list_move_tail(&current->sibling, &current->real_parent->children);
320 328
321 /* Set the exit signal to SIGCHLD so we signal init on exit */ 329 /* Set the exit signal to SIGCHLD so we signal init on exit */
322 current->exit_signal = SIGCHLD; 330 current->exit_signal = SIGCHLD;
@@ -421,7 +429,7 @@ void daemonize(const char *name, ...)
421 * We don't want to have TIF_FREEZE set if the system-wide hibernation 429 * We don't want to have TIF_FREEZE set if the system-wide hibernation
422 * or suspend transition begins right now. 430 * or suspend transition begins right now.
423 */ 431 */
424 current->flags |= PF_NOFREEZE; 432 current->flags |= (PF_NOFREEZE | PF_KTHREAD);
425 433
426 if (current->nsproxy != &init_nsproxy) { 434 if (current->nsproxy != &init_nsproxy) {
427 get_nsproxy(&init_nsproxy); 435 get_nsproxy(&init_nsproxy);
@@ -546,8 +554,6 @@ void put_fs_struct(struct fs_struct *fs)
546 if (atomic_dec_and_test(&fs->count)) { 554 if (atomic_dec_and_test(&fs->count)) {
547 path_put(&fs->root); 555 path_put(&fs->root);
548 path_put(&fs->pwd); 556 path_put(&fs->pwd);
549 if (fs->altroot.dentry)
550 path_put(&fs->altroot);
551 kmem_cache_free(fs_cachep, fs); 557 kmem_cache_free(fs_cachep, fs);
552 } 558 }
553} 559}
@@ -655,26 +661,40 @@ assign_new_owner:
655static void exit_mm(struct task_struct * tsk) 661static void exit_mm(struct task_struct * tsk)
656{ 662{
657 struct mm_struct *mm = tsk->mm; 663 struct mm_struct *mm = tsk->mm;
664 struct core_state *core_state;
658 665
659 mm_release(tsk, mm); 666 mm_release(tsk, mm);
660 if (!mm) 667 if (!mm)
661 return; 668 return;
662 /* 669 /*
663 * Serialize with any possible pending coredump. 670 * Serialize with any possible pending coredump.
664 * We must hold mmap_sem around checking core_waiters 671 * We must hold mmap_sem around checking core_state
665 * and clearing tsk->mm. The core-inducing thread 672 * and clearing tsk->mm. The core-inducing thread
666 * will increment core_waiters for each thread in the 673 * will increment ->nr_threads for each thread in the
667 * group with ->mm != NULL. 674 * group with ->mm != NULL.
668 */ 675 */
669 down_read(&mm->mmap_sem); 676 down_read(&mm->mmap_sem);
670 if (mm->core_waiters) { 677 core_state = mm->core_state;
678 if (core_state) {
679 struct core_thread self;
671 up_read(&mm->mmap_sem); 680 up_read(&mm->mmap_sem);
672 down_write(&mm->mmap_sem);
673 if (!--mm->core_waiters)
674 complete(mm->core_startup_done);
675 up_write(&mm->mmap_sem);
676 681
677 wait_for_completion(&mm->core_done); 682 self.task = tsk;
683 self.next = xchg(&core_state->dumper.next, &self);
684 /*
685 * Implies mb(), the result of xchg() must be visible
686 * to core_state->dumper.
687 */
688 if (atomic_dec_and_test(&core_state->nr_threads))
689 complete(&core_state->startup);
690
691 for (;;) {
692 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
693 if (!self.task) /* see coredump_finish() */
694 break;
695 schedule();
696 }
697 __set_task_state(tsk, TASK_RUNNING);
678 down_read(&mm->mmap_sem); 698 down_read(&mm->mmap_sem);
679 } 699 }
680 atomic_inc(&mm->mm_count); 700 atomic_inc(&mm->mm_count);
@@ -691,37 +711,97 @@ static void exit_mm(struct task_struct * tsk)
691 mmput(mm); 711 mmput(mm);
692} 712}
693 713
694static void 714/*
695reparent_thread(struct task_struct *p, struct task_struct *father, int traced) 715 * Return nonzero if @parent's children should reap themselves.
716 *
717 * Called with write_lock_irq(&tasklist_lock) held.
718 */
719static int ignoring_children(struct task_struct *parent)
696{ 720{
697 if (p->pdeath_signal) 721 int ret;
698 /* We already hold the tasklist_lock here. */ 722 struct sighand_struct *psig = parent->sighand;
699 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); 723 unsigned long flags;
724 spin_lock_irqsave(&psig->siglock, flags);
725 ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
726 (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
727 spin_unlock_irqrestore(&psig->siglock, flags);
728 return ret;
729}
700 730
701 /* Move the child from its dying parent to the new one. */ 731/*
702 if (unlikely(traced)) { 732 * Detach all tasks we were using ptrace on.
703 /* Preserve ptrace links if someone else is tracing this child. */ 733 * Any that need to be release_task'd are put on the @dead list.
704 list_del_init(&p->ptrace_list); 734 *
705 if (ptrace_reparented(p)) 735 * Called with write_lock(&tasklist_lock) held.
706 list_add(&p->ptrace_list, &p->real_parent->ptrace_children); 736 */
707 } else { 737static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
708 /* If this child is being traced, then we're the one tracing it 738{
709 * anyway, so let go of it. 739 struct task_struct *p, *n;
740 int ign = -1;
741
742 list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
743 __ptrace_unlink(p);
744
745 if (p->exit_state != EXIT_ZOMBIE)
746 continue;
747
748 /*
749 * If it's a zombie, our attachedness prevented normal
750 * parent notification or self-reaping. Do notification
751 * now if it would have happened earlier. If it should
752 * reap itself, add it to the @dead list. We can't call
753 * release_task() here because we already hold tasklist_lock.
754 *
755 * If it's our own child, there is no notification to do.
756 * But if our normal children self-reap, then this child
757 * was prevented by ptrace and we must reap it now.
710 */ 758 */
711 p->ptrace = 0; 759 if (!task_detached(p) && thread_group_empty(p)) {
712 remove_parent(p); 760 if (!same_thread_group(p->real_parent, parent))
713 p->parent = p->real_parent; 761 do_notify_parent(p, p->exit_signal);
714 add_parent(p); 762 else {
763 if (ign < 0)
764 ign = ignoring_children(parent);
765 if (ign)
766 p->exit_signal = -1;
767 }
768 }
715 769
716 if (task_is_traced(p)) { 770 if (task_detached(p)) {
717 /* 771 /*
718 * If it was at a trace stop, turn it into 772 * Mark it as in the process of being reaped.
719 * a normal stop since it's no longer being
720 * traced.
721 */ 773 */
722 ptrace_untrace(p); 774 p->exit_state = EXIT_DEAD;
775 list_add(&p->ptrace_entry, dead);
723 } 776 }
724 } 777 }
778}
779
780/*
781 * Finish up exit-time ptrace cleanup.
782 *
783 * Called without locks.
784 */
785static void ptrace_exit_finish(struct task_struct *parent,
786 struct list_head *dead)
787{
788 struct task_struct *p, *n;
789
790 BUG_ON(!list_empty(&parent->ptraced));
791
792 list_for_each_entry_safe(p, n, dead, ptrace_entry) {
793 list_del_init(&p->ptrace_entry);
794 release_task(p);
795 }
796}
797
798static void reparent_thread(struct task_struct *p, struct task_struct *father)
799{
800 if (p->pdeath_signal)
801 /* We already hold the tasklist_lock here. */
802 group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
803
804 list_move_tail(&p->sibling, &p->real_parent->children);
725 805
726 /* If this is a threaded reparent there is no need to 806 /* If this is a threaded reparent there is no need to
727 * notify anyone anything has happened. 807 * notify anyone anything has happened.
@@ -736,7 +816,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
736 /* If we'd notified the old parent about this child's death, 816 /* If we'd notified the old parent about this child's death,
737 * also notify the new parent. 817 * also notify the new parent.
738 */ 818 */
739 if (!traced && p->exit_state == EXIT_ZOMBIE && 819 if (!ptrace_reparented(p) &&
820 p->exit_state == EXIT_ZOMBIE &&
740 !task_detached(p) && thread_group_empty(p)) 821 !task_detached(p) && thread_group_empty(p))
741 do_notify_parent(p, p->exit_signal); 822 do_notify_parent(p, p->exit_signal);
742 823
@@ -753,12 +834,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
753static void forget_original_parent(struct task_struct *father) 834static void forget_original_parent(struct task_struct *father)
754{ 835{
755 struct task_struct *p, *n, *reaper = father; 836 struct task_struct *p, *n, *reaper = father;
756 struct list_head ptrace_dead; 837 LIST_HEAD(ptrace_dead);
757
758 INIT_LIST_HEAD(&ptrace_dead);
759 838
760 write_lock_irq(&tasklist_lock); 839 write_lock_irq(&tasklist_lock);
761 840
841 /*
842 * First clean up ptrace if we were using it.
843 */
844 ptrace_exit(father, &ptrace_dead);
845
762 do { 846 do {
763 reaper = next_thread(reaper); 847 reaper = next_thread(reaper);
764 if (reaper == father) { 848 if (reaper == father) {
@@ -767,58 +851,19 @@ static void forget_original_parent(struct task_struct *father)
767 } 851 }
768 } while (reaper->flags & PF_EXITING); 852 } while (reaper->flags & PF_EXITING);
769 853
770 /*
771 * There are only two places where our children can be:
772 *
773 * - in our child list
774 * - in our ptraced child list
775 *
776 * Search them and reparent children.
777 */
778 list_for_each_entry_safe(p, n, &father->children, sibling) { 854 list_for_each_entry_safe(p, n, &father->children, sibling) {
779 int ptrace;
780
781 ptrace = p->ptrace;
782
783 /* if father isn't the real parent, then ptrace must be enabled */
784 BUG_ON(father != p->real_parent && !ptrace);
785
786 if (father == p->real_parent) {
787 /* reparent with a reaper, real father it's us */
788 p->real_parent = reaper;
789 reparent_thread(p, father, 0);
790 } else {
791 /* reparent ptraced task to its real parent */
792 __ptrace_unlink (p);
793 if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) &&
794 thread_group_empty(p))
795 do_notify_parent(p, p->exit_signal);
796 }
797
798 /*
799 * if the ptraced child is a detached zombie we must collect
800 * it before we exit, or it will remain zombie forever since
801 * we prevented it from self-reap itself while it was being
802 * traced by us, to be able to see it in wait4.
803 */
804 if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p)))
805 list_add(&p->ptrace_list, &ptrace_dead);
806 }
807
808 list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
809 p->real_parent = reaper; 855 p->real_parent = reaper;
810 reparent_thread(p, father, 1); 856 if (p->parent == father) {
857 BUG_ON(p->ptrace);
858 p->parent = p->real_parent;
859 }
860 reparent_thread(p, father);
811 } 861 }
812 862
813 write_unlock_irq(&tasklist_lock); 863 write_unlock_irq(&tasklist_lock);
814 BUG_ON(!list_empty(&father->children)); 864 BUG_ON(!list_empty(&father->children));
815 BUG_ON(!list_empty(&father->ptrace_children));
816
817 list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
818 list_del_init(&p->ptrace_list);
819 release_task(p);
820 }
821 865
866 ptrace_exit_finish(father, &ptrace_dead);
822} 867}
823 868
824/* 869/*
@@ -827,7 +872,8 @@ static void forget_original_parent(struct task_struct *father)
827 */ 872 */
828static void exit_notify(struct task_struct *tsk, int group_dead) 873static void exit_notify(struct task_struct *tsk, int group_dead)
829{ 874{
830 int state; 875 int signal;
876 void *cookie;
831 877
832 /* 878 /*
833 * This does two things: 879 * This does two things:
@@ -864,22 +910,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
864 !capable(CAP_KILL)) 910 !capable(CAP_KILL))
865 tsk->exit_signal = SIGCHLD; 911 tsk->exit_signal = SIGCHLD;
866 912
867 /* If something other than our normal parent is ptracing us, then 913 signal = tracehook_notify_death(tsk, &cookie, group_dead);
868 * send it a SIGCHLD instead of honoring exit_signal. exit_signal 914 if (signal > 0)
869 * only has special meaning to our real parent. 915 signal = do_notify_parent(tsk, signal);
870 */
871 if (!task_detached(tsk) && thread_group_empty(tsk)) {
872 int signal = ptrace_reparented(tsk) ?
873 SIGCHLD : tsk->exit_signal;
874 do_notify_parent(tsk, signal);
875 } else if (tsk->ptrace) {
876 do_notify_parent(tsk, SIGCHLD);
877 }
878 916
879 state = EXIT_ZOMBIE; 917 tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE;
880 if (task_detached(tsk) && likely(!tsk->ptrace))
881 state = EXIT_DEAD;
882 tsk->exit_state = state;
883 918
884 /* mt-exec, de_thread() is waiting for us */ 919 /* mt-exec, de_thread() is waiting for us */
885 if (thread_group_leader(tsk) && 920 if (thread_group_leader(tsk) &&
@@ -889,8 +924,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
889 924
890 write_unlock_irq(&tasklist_lock); 925 write_unlock_irq(&tasklist_lock);
891 926
927 tracehook_report_death(tsk, signal, cookie, group_dead);
928
892 /* If the process is dead, release it - nobody will wait for it */ 929 /* If the process is dead, release it - nobody will wait for it */
893 if (state == EXIT_DEAD) 930 if (signal < 0)
894 release_task(tsk); 931 release_task(tsk);
895} 932}
896 933
@@ -969,10 +1006,7 @@ NORET_TYPE void do_exit(long code)
969 if (unlikely(!tsk->pid)) 1006 if (unlikely(!tsk->pid))
970 panic("Attempted to kill the idle task!"); 1007 panic("Attempted to kill the idle task!");
971 1008
972 if (unlikely(current->ptrace & PT_TRACE_EXIT)) { 1009 tracehook_report_exit(&code);
973 current->ptrace_message = code;
974 ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
975 }
976 1010
977 /* 1011 /*
978 * We're taking recursive faults here in do_exit. Safest is to just 1012 * We're taking recursive faults here in do_exit. Safest is to just
@@ -1179,13 +1213,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1179 return 0; 1213 return 0;
1180 } 1214 }
1181 1215
1182 /*
1183 * Do not consider detached threads that are
1184 * not ptraced:
1185 */
1186 if (task_detached(p) && !p->ptrace)
1187 return 0;
1188
1189 /* Wait for all children (clone and not) if __WALL is set; 1216 /* Wait for all children (clone and not) if __WALL is set;
1190 * otherwise, wait for clone children *only* if __WCLONE is 1217 * otherwise, wait for clone children *only* if __WCLONE is
1191 * set; otherwise, wait for non-clone children *only*. (Note: 1218 * set; otherwise, wait for non-clone children *only*. (Note:
@@ -1196,14 +1223,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options,
1196 return 0; 1223 return 0;
1197 1224
1198 err = security_task_wait(p); 1225 err = security_task_wait(p);
1199 if (likely(!err)) 1226 if (err)
1200 return 1; 1227 return err;
1201 1228
1202 if (type != PIDTYPE_PID) 1229 return 1;
1203 return 0;
1204 /* This child was explicitly requested, abort */
1205 read_unlock(&tasklist_lock);
1206 return err;
1207} 1230}
1208 1231
1209static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, 1232static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
@@ -1237,7 +1260,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
1237 * the lock and this task is uninteresting. If we return nonzero, we have 1260 * the lock and this task is uninteresting. If we return nonzero, we have
1238 * released the lock and the system call should return. 1261 * released the lock and the system call should return.
1239 */ 1262 */
1240static int wait_task_zombie(struct task_struct *p, int noreap, 1263static int wait_task_zombie(struct task_struct *p, int options,
1241 struct siginfo __user *infop, 1264 struct siginfo __user *infop,
1242 int __user *stat_addr, struct rusage __user *ru) 1265 int __user *stat_addr, struct rusage __user *ru)
1243{ 1266{
@@ -1245,7 +1268,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1245 int retval, status, traced; 1268 int retval, status, traced;
1246 pid_t pid = task_pid_vnr(p); 1269 pid_t pid = task_pid_vnr(p);
1247 1270
1248 if (unlikely(noreap)) { 1271 if (!likely(options & WEXITED))
1272 return 0;
1273
1274 if (unlikely(options & WNOWAIT)) {
1249 uid_t uid = p->uid; 1275 uid_t uid = p->uid;
1250 int exit_code = p->exit_code; 1276 int exit_code = p->exit_code;
1251 int why, status; 1277 int why, status;
@@ -1326,6 +1352,8 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1326 psig->coublock += 1352 psig->coublock +=
1327 task_io_get_oublock(p) + 1353 task_io_get_oublock(p) +
1328 sig->oublock + sig->coublock; 1354 sig->oublock + sig->coublock;
1355 task_io_accounting_add(&psig->ioac, &p->ioac);
1356 task_io_accounting_add(&psig->ioac, &sig->ioac);
1329 spin_unlock_irq(&p->parent->sighand->siglock); 1357 spin_unlock_irq(&p->parent->sighand->siglock);
1330 } 1358 }
1331 1359
@@ -1395,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1395 * the lock and this task is uninteresting. If we return nonzero, we have 1423 * the lock and this task is uninteresting. If we return nonzero, we have
1396 * released the lock and the system call should return. 1424 * released the lock and the system call should return.
1397 */ 1425 */
1398static int wait_task_stopped(struct task_struct *p, 1426static int wait_task_stopped(int ptrace, struct task_struct *p,
1399 int noreap, struct siginfo __user *infop, 1427 int options, struct siginfo __user *infop,
1400 int __user *stat_addr, struct rusage __user *ru) 1428 int __user *stat_addr, struct rusage __user *ru)
1401{ 1429{
1402 int retval, exit_code, why; 1430 int retval, exit_code, why;
1403 uid_t uid = 0; /* unneeded, required by compiler */ 1431 uid_t uid = 0; /* unneeded, required by compiler */
1404 pid_t pid; 1432 pid_t pid;
1405 1433
1434 if (!(options & WUNTRACED))
1435 return 0;
1436
1406 exit_code = 0; 1437 exit_code = 0;
1407 spin_lock_irq(&p->sighand->siglock); 1438 spin_lock_irq(&p->sighand->siglock);
1408 1439
1409 if (unlikely(!task_is_stopped_or_traced(p))) 1440 if (unlikely(!task_is_stopped_or_traced(p)))
1410 goto unlock_sig; 1441 goto unlock_sig;
1411 1442
1412 if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) 1443 if (!ptrace && p->signal->group_stop_count > 0)
1413 /* 1444 /*
1414 * A group stop is in progress and this is the group leader. 1445 * A group stop is in progress and this is the group leader.
1415 * We won't report until all threads have stopped. 1446 * We won't report until all threads have stopped.
@@ -1420,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p,
1420 if (!exit_code) 1451 if (!exit_code)
1421 goto unlock_sig; 1452 goto unlock_sig;
1422 1453
1423 if (!noreap) 1454 if (!unlikely(options & WNOWAIT))
1424 p->exit_code = 0; 1455 p->exit_code = 0;
1425 1456
1426 uid = p->uid; 1457 uid = p->uid;
@@ -1438,10 +1469,10 @@ unlock_sig:
1438 */ 1469 */
1439 get_task_struct(p); 1470 get_task_struct(p);
1440 pid = task_pid_vnr(p); 1471 pid = task_pid_vnr(p);
1441 why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; 1472 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1442 read_unlock(&tasklist_lock); 1473 read_unlock(&tasklist_lock);
1443 1474
1444 if (unlikely(noreap)) 1475 if (unlikely(options & WNOWAIT))
1445 return wait_noreap_copyout(p, pid, uid, 1476 return wait_noreap_copyout(p, pid, uid,
1446 why, exit_code, 1477 why, exit_code,
1447 infop, ru); 1478 infop, ru);
@@ -1475,7 +1506,7 @@ unlock_sig:
1475 * the lock and this task is uninteresting. If we return nonzero, we have 1506 * the lock and this task is uninteresting. If we return nonzero, we have
1476 * released the lock and the system call should return. 1507 * released the lock and the system call should return.
1477 */ 1508 */
1478static int wait_task_continued(struct task_struct *p, int noreap, 1509static int wait_task_continued(struct task_struct *p, int options,
1479 struct siginfo __user *infop, 1510 struct siginfo __user *infop,
1480 int __user *stat_addr, struct rusage __user *ru) 1511 int __user *stat_addr, struct rusage __user *ru)
1481{ 1512{
@@ -1483,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1483 pid_t pid; 1514 pid_t pid;
1484 uid_t uid; 1515 uid_t uid;
1485 1516
1517 if (!unlikely(options & WCONTINUED))
1518 return 0;
1519
1486 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1520 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1487 return 0; 1521 return 0;
1488 1522
@@ -1492,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1492 spin_unlock_irq(&p->sighand->siglock); 1526 spin_unlock_irq(&p->sighand->siglock);
1493 return 0; 1527 return 0;
1494 } 1528 }
1495 if (!noreap) 1529 if (!unlikely(options & WNOWAIT))
1496 p->signal->flags &= ~SIGNAL_STOP_CONTINUED; 1530 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1497 spin_unlock_irq(&p->sighand->siglock); 1531 spin_unlock_irq(&p->sighand->siglock);
1498 1532
@@ -1518,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1518 return retval; 1552 return retval;
1519} 1553}
1520 1554
1555/*
1556 * Consider @p for a wait by @parent.
1557 *
1558 * -ECHILD should be in *@notask_error before the first call.
1559 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1560 * Returns zero if the search for a child should continue;
1561 * then *@notask_error is 0 if @p is an eligible child,
1562 * or another error from security_task_wait(), or still -ECHILD.
1563 */
1564static int wait_consider_task(struct task_struct *parent, int ptrace,
1565 struct task_struct *p, int *notask_error,
1566 enum pid_type type, struct pid *pid, int options,
1567 struct siginfo __user *infop,
1568 int __user *stat_addr, struct rusage __user *ru)
1569{
1570 int ret = eligible_child(type, pid, options, p);
1571 if (!ret)
1572 return ret;
1573
1574 if (unlikely(ret < 0)) {
1575 /*
1576 * If we have not yet seen any eligible child,
1577 * then let this error code replace -ECHILD.
1578 * A permission error will give the user a clue
1579 * to look for security policy problems, rather
1580 * than for mysterious wait bugs.
1581 */
1582 if (*notask_error)
1583 *notask_error = ret;
1584 }
1585
1586 if (likely(!ptrace) && unlikely(p->ptrace)) {
1587 /*
1588 * This child is hidden by ptrace.
1589 * We aren't allowed to see it now, but eventually we will.
1590 */
1591 *notask_error = 0;
1592 return 0;
1593 }
1594
1595 if (p->exit_state == EXIT_DEAD)
1596 return 0;
1597
1598 /*
1599 * We don't reap group leaders with subthreads.
1600 */
1601 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
1602 return wait_task_zombie(p, options, infop, stat_addr, ru);
1603
1604 /*
1605 * It's stopped or running now, so it might
1606 * later continue, exit, or stop again.
1607 */
1608 *notask_error = 0;
1609
1610 if (task_is_stopped_or_traced(p))
1611 return wait_task_stopped(ptrace, p, options,
1612 infop, stat_addr, ru);
1613
1614 return wait_task_continued(p, options, infop, stat_addr, ru);
1615}
1616
1617/*
1618 * Do the work of do_wait() for one thread in the group, @tsk.
1619 *
1620 * -ECHILD should be in *@notask_error before the first call.
1621 * Returns nonzero for a final return, when we have unlocked tasklist_lock.
1622 * Returns zero if the search for a child should continue; then
1623 * *@notask_error is 0 if there were any eligible children,
1624 * or another error from security_task_wait(), or still -ECHILD.
1625 */
1626static int do_wait_thread(struct task_struct *tsk, int *notask_error,
1627 enum pid_type type, struct pid *pid, int options,
1628 struct siginfo __user *infop, int __user *stat_addr,
1629 struct rusage __user *ru)
1630{
1631 struct task_struct *p;
1632
1633 list_for_each_entry(p, &tsk->children, sibling) {
1634 /*
1635 * Do not consider detached threads.
1636 */
1637 if (!task_detached(p)) {
1638 int ret = wait_consider_task(tsk, 0, p, notask_error,
1639 type, pid, options,
1640 infop, stat_addr, ru);
1641 if (ret)
1642 return ret;
1643 }
1644 }
1645
1646 return 0;
1647}
1648
1649static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
1650 enum pid_type type, struct pid *pid, int options,
1651 struct siginfo __user *infop, int __user *stat_addr,
1652 struct rusage __user *ru)
1653{
1654 struct task_struct *p;
1655
1656 /*
1657 * Traditionally we see ptrace'd stopped tasks regardless of options.
1658 */
1659 options |= WUNTRACED;
1660
1661 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1662 int ret = wait_consider_task(tsk, 1, p, notask_error,
1663 type, pid, options,
1664 infop, stat_addr, ru);
1665 if (ret)
1666 return ret;
1667 }
1668
1669 return 0;
1670}
1671
1521static long do_wait(enum pid_type type, struct pid *pid, int options, 1672static long do_wait(enum pid_type type, struct pid *pid, int options,
1522 struct siginfo __user *infop, int __user *stat_addr, 1673 struct siginfo __user *infop, int __user *stat_addr,
1523 struct rusage __user *ru) 1674 struct rusage __user *ru)
1524{ 1675{
1525 DECLARE_WAITQUEUE(wait, current); 1676 DECLARE_WAITQUEUE(wait, current);
1526 struct task_struct *tsk; 1677 struct task_struct *tsk;
1527 int flag, retval; 1678 int retval;
1528 1679
1529 add_wait_queue(&current->signal->wait_chldexit,&wait); 1680 add_wait_queue(&current->signal->wait_chldexit,&wait);
1530repeat: 1681repeat:
1531 /* If there is nothing that can match our critier just get out */ 1682 /*
1683 * If there is nothing that can match our critiera just get out.
1684 * We will clear @retval to zero if we see any child that might later
1685 * match our criteria, even if we are not able to reap it yet.
1686 */
1532 retval = -ECHILD; 1687 retval = -ECHILD;
1533 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) 1688 if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
1534 goto end; 1689 goto end;
1535 1690
1536 /*
1537 * We will set this flag if we see any child that might later
1538 * match our criteria, even if we are not able to reap it yet.
1539 */
1540 flag = retval = 0;
1541 current->state = TASK_INTERRUPTIBLE; 1691 current->state = TASK_INTERRUPTIBLE;
1542 read_lock(&tasklist_lock); 1692 read_lock(&tasklist_lock);
1543 tsk = current; 1693 tsk = current;
1544 do { 1694 do {
1545 struct task_struct *p; 1695 int tsk_result = do_wait_thread(tsk, &retval,
1546 1696 type, pid, options,
1547 list_for_each_entry(p, &tsk->children, sibling) { 1697 infop, stat_addr, ru);
1548 int ret = eligible_child(type, pid, options, p); 1698 if (!tsk_result)
1549 if (!ret) 1699 tsk_result = ptrace_do_wait(tsk, &retval,
1550 continue; 1700 type, pid, options,
1551 1701 infop, stat_addr, ru);
1552 if (unlikely(ret < 0)) { 1702 if (tsk_result) {
1553 retval = ret; 1703 /*
1554 } else if (task_is_stopped_or_traced(p)) { 1704 * tasklist_lock is unlocked and we have a final result.
1555 /* 1705 */
1556 * It's stopped now, so it might later 1706 retval = tsk_result;
1557 * continue, exit, or stop again. 1707 goto end;
1558 */
1559 flag = 1;
1560 if (!(p->ptrace & PT_PTRACED) &&
1561 !(options & WUNTRACED))
1562 continue;
1563
1564 retval = wait_task_stopped(p,
1565 (options & WNOWAIT), infop,
1566 stat_addr, ru);
1567 } else if (p->exit_state == EXIT_ZOMBIE &&
1568 !delay_group_leader(p)) {
1569 /*
1570 * We don't reap group leaders with subthreads.
1571 */
1572 if (!likely(options & WEXITED))
1573 continue;
1574 retval = wait_task_zombie(p,
1575 (options & WNOWAIT), infop,
1576 stat_addr, ru);
1577 } else if (p->exit_state != EXIT_DEAD) {
1578 /*
1579 * It's running now, so it might later
1580 * exit, stop, or stop and then continue.
1581 */
1582 flag = 1;
1583 if (!unlikely(options & WCONTINUED))
1584 continue;
1585 retval = wait_task_continued(p,
1586 (options & WNOWAIT), infop,
1587 stat_addr, ru);
1588 }
1589 if (retval != 0) /* tasklist_lock released */
1590 goto end;
1591 }
1592 if (!flag) {
1593 list_for_each_entry(p, &tsk->ptrace_children,
1594 ptrace_list) {
1595 flag = eligible_child(type, pid, options, p);
1596 if (!flag)
1597 continue;
1598 if (likely(flag > 0))
1599 break;
1600 retval = flag;
1601 goto end;
1602 }
1603 } 1708 }
1709
1604 if (options & __WNOTHREAD) 1710 if (options & __WNOTHREAD)
1605 break; 1711 break;
1606 tsk = next_thread(tsk); 1712 tsk = next_thread(tsk);
@@ -1608,16 +1714,14 @@ repeat:
1608 } while (tsk != current); 1714 } while (tsk != current);
1609 read_unlock(&tasklist_lock); 1715 read_unlock(&tasklist_lock);
1610 1716
1611 if (flag) { 1717 if (!retval && !(options & WNOHANG)) {
1612 if (options & WNOHANG)
1613 goto end;
1614 retval = -ERESTARTSYS; 1718 retval = -ERESTARTSYS;
1615 if (signal_pending(current)) 1719 if (!signal_pending(current)) {
1616 goto end; 1720 schedule();
1617 schedule(); 1721 goto repeat;
1618 goto repeat; 1722 }
1619 } 1723 }
1620 retval = -ECHILD; 1724
1621end: 1725end:
1622 current->state = TASK_RUNNING; 1726 current->state = TASK_RUNNING;
1623 remove_wait_queue(&current->signal->wait_chldexit,&wait); 1727 remove_wait_queue(&current->signal->wait_chldexit,&wait);