diff options
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 520 |
1 files changed, 312 insertions, 208 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 8f6185e69b69..eb4d6470d1d0 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/personality.h> | 13 | #include <linux/personality.h> |
14 | #include <linux/tty.h> | 14 | #include <linux/tty.h> |
15 | #include <linux/mnt_namespace.h> | 15 | #include <linux/mnt_namespace.h> |
16 | #include <linux/iocontext.h> | ||
16 | #include <linux/key.h> | 17 | #include <linux/key.h> |
17 | #include <linux/security.h> | 18 | #include <linux/security.h> |
18 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
@@ -45,6 +46,7 @@ | |||
45 | #include <linux/resource.h> | 46 | #include <linux/resource.h> |
46 | #include <linux/blkdev.h> | 47 | #include <linux/blkdev.h> |
47 | #include <linux/task_io_accounting_ops.h> | 48 | #include <linux/task_io_accounting_ops.h> |
49 | #include <linux/tracehook.h> | ||
48 | 50 | ||
49 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
50 | #include <asm/unistd.h> | 52 | #include <asm/unistd.h> |
@@ -70,7 +72,7 @@ static void __unhash_process(struct task_struct *p) | |||
70 | __get_cpu_var(process_counts)--; | 72 | __get_cpu_var(process_counts)--; |
71 | } | 73 | } |
72 | list_del_rcu(&p->thread_group); | 74 | list_del_rcu(&p->thread_group); |
73 | remove_parent(p); | 75 | list_del_init(&p->sibling); |
74 | } | 76 | } |
75 | 77 | ||
76 | /* | 78 | /* |
@@ -84,7 +86,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
84 | BUG_ON(!sig); | 86 | BUG_ON(!sig); |
85 | BUG_ON(!atomic_read(&sig->count)); | 87 | BUG_ON(!atomic_read(&sig->count)); |
86 | 88 | ||
87 | rcu_read_lock(); | ||
88 | sighand = rcu_dereference(tsk->sighand); | 89 | sighand = rcu_dereference(tsk->sighand); |
89 | spin_lock(&sighand->siglock); | 90 | spin_lock(&sighand->siglock); |
90 | 91 | ||
@@ -120,6 +121,7 @@ static void __exit_signal(struct task_struct *tsk) | |||
120 | sig->nivcsw += tsk->nivcsw; | 121 | sig->nivcsw += tsk->nivcsw; |
121 | sig->inblock += task_io_get_inblock(tsk); | 122 | sig->inblock += task_io_get_inblock(tsk); |
122 | sig->oublock += task_io_get_oublock(tsk); | 123 | sig->oublock += task_io_get_oublock(tsk); |
124 | task_io_accounting_add(&sig->ioac, &tsk->ioac); | ||
123 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | 125 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; |
124 | sig = NULL; /* Marker for below. */ | 126 | sig = NULL; /* Marker for below. */ |
125 | } | 127 | } |
@@ -135,7 +137,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
135 | tsk->signal = NULL; | 137 | tsk->signal = NULL; |
136 | tsk->sighand = NULL; | 138 | tsk->sighand = NULL; |
137 | spin_unlock(&sighand->siglock); | 139 | spin_unlock(&sighand->siglock); |
138 | rcu_read_unlock(); | ||
139 | 140 | ||
140 | __cleanup_sighand(sighand); | 141 | __cleanup_sighand(sighand); |
141 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | 142 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); |
@@ -151,16 +152,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
151 | put_task_struct(container_of(rhp, struct task_struct, rcu)); | 152 | put_task_struct(container_of(rhp, struct task_struct, rcu)); |
152 | } | 153 | } |
153 | 154 | ||
155 | |||
154 | void release_task(struct task_struct * p) | 156 | void release_task(struct task_struct * p) |
155 | { | 157 | { |
156 | struct task_struct *leader; | 158 | struct task_struct *leader; |
157 | int zap_leader; | 159 | int zap_leader; |
158 | repeat: | 160 | repeat: |
161 | tracehook_prepare_release_task(p); | ||
159 | atomic_dec(&p->user->processes); | 162 | atomic_dec(&p->user->processes); |
160 | proc_flush_task(p); | 163 | proc_flush_task(p); |
161 | write_lock_irq(&tasklist_lock); | 164 | write_lock_irq(&tasklist_lock); |
162 | ptrace_unlink(p); | 165 | tracehook_finish_release_task(p); |
163 | BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | ||
164 | __exit_signal(p); | 166 | __exit_signal(p); |
165 | 167 | ||
166 | /* | 168 | /* |
@@ -182,6 +184,13 @@ repeat: | |||
182 | * that case. | 184 | * that case. |
183 | */ | 185 | */ |
184 | zap_leader = task_detached(leader); | 186 | zap_leader = task_detached(leader); |
187 | |||
188 | /* | ||
189 | * This maintains the invariant that release_task() | ||
190 | * only runs on a task in EXIT_DEAD, just for sanity. | ||
191 | */ | ||
192 | if (zap_leader) | ||
193 | leader->exit_state = EXIT_DEAD; | ||
185 | } | 194 | } |
186 | 195 | ||
187 | write_unlock_irq(&tasklist_lock); | 196 | write_unlock_irq(&tasklist_lock); |
@@ -314,9 +323,8 @@ static void reparent_to_kthreadd(void) | |||
314 | 323 | ||
315 | ptrace_unlink(current); | 324 | ptrace_unlink(current); |
316 | /* Reparent to init */ | 325 | /* Reparent to init */ |
317 | remove_parent(current); | ||
318 | current->real_parent = current->parent = kthreadd_task; | 326 | current->real_parent = current->parent = kthreadd_task; |
319 | add_parent(current); | 327 | list_move_tail(¤t->sibling, ¤t->real_parent->children); |
320 | 328 | ||
321 | /* Set the exit signal to SIGCHLD so we signal init on exit */ | 329 | /* Set the exit signal to SIGCHLD so we signal init on exit */ |
322 | current->exit_signal = SIGCHLD; | 330 | current->exit_signal = SIGCHLD; |
@@ -421,7 +429,7 @@ void daemonize(const char *name, ...) | |||
421 | * We don't want to have TIF_FREEZE set if the system-wide hibernation | 429 | * We don't want to have TIF_FREEZE set if the system-wide hibernation |
422 | * or suspend transition begins right now. | 430 | * or suspend transition begins right now. |
423 | */ | 431 | */ |
424 | current->flags |= PF_NOFREEZE; | 432 | current->flags |= (PF_NOFREEZE | PF_KTHREAD); |
425 | 433 | ||
426 | if (current->nsproxy != &init_nsproxy) { | 434 | if (current->nsproxy != &init_nsproxy) { |
427 | get_nsproxy(&init_nsproxy); | 435 | get_nsproxy(&init_nsproxy); |
@@ -546,8 +554,6 @@ void put_fs_struct(struct fs_struct *fs) | |||
546 | if (atomic_dec_and_test(&fs->count)) { | 554 | if (atomic_dec_and_test(&fs->count)) { |
547 | path_put(&fs->root); | 555 | path_put(&fs->root); |
548 | path_put(&fs->pwd); | 556 | path_put(&fs->pwd); |
549 | if (fs->altroot.dentry) | ||
550 | path_put(&fs->altroot); | ||
551 | kmem_cache_free(fs_cachep, fs); | 557 | kmem_cache_free(fs_cachep, fs); |
552 | } | 558 | } |
553 | } | 559 | } |
@@ -655,26 +661,40 @@ assign_new_owner: | |||
655 | static void exit_mm(struct task_struct * tsk) | 661 | static void exit_mm(struct task_struct * tsk) |
656 | { | 662 | { |
657 | struct mm_struct *mm = tsk->mm; | 663 | struct mm_struct *mm = tsk->mm; |
664 | struct core_state *core_state; | ||
658 | 665 | ||
659 | mm_release(tsk, mm); | 666 | mm_release(tsk, mm); |
660 | if (!mm) | 667 | if (!mm) |
661 | return; | 668 | return; |
662 | /* | 669 | /* |
663 | * Serialize with any possible pending coredump. | 670 | * Serialize with any possible pending coredump. |
664 | * We must hold mmap_sem around checking core_waiters | 671 | * We must hold mmap_sem around checking core_state |
665 | * and clearing tsk->mm. The core-inducing thread | 672 | * and clearing tsk->mm. The core-inducing thread |
666 | * will increment core_waiters for each thread in the | 673 | * will increment ->nr_threads for each thread in the |
667 | * group with ->mm != NULL. | 674 | * group with ->mm != NULL. |
668 | */ | 675 | */ |
669 | down_read(&mm->mmap_sem); | 676 | down_read(&mm->mmap_sem); |
670 | if (mm->core_waiters) { | 677 | core_state = mm->core_state; |
678 | if (core_state) { | ||
679 | struct core_thread self; | ||
671 | up_read(&mm->mmap_sem); | 680 | up_read(&mm->mmap_sem); |
672 | down_write(&mm->mmap_sem); | ||
673 | if (!--mm->core_waiters) | ||
674 | complete(mm->core_startup_done); | ||
675 | up_write(&mm->mmap_sem); | ||
676 | 681 | ||
677 | wait_for_completion(&mm->core_done); | 682 | self.task = tsk; |
683 | self.next = xchg(&core_state->dumper.next, &self); | ||
684 | /* | ||
685 | * Implies mb(), the result of xchg() must be visible | ||
686 | * to core_state->dumper. | ||
687 | */ | ||
688 | if (atomic_dec_and_test(&core_state->nr_threads)) | ||
689 | complete(&core_state->startup); | ||
690 | |||
691 | for (;;) { | ||
692 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
693 | if (!self.task) /* see coredump_finish() */ | ||
694 | break; | ||
695 | schedule(); | ||
696 | } | ||
697 | __set_task_state(tsk, TASK_RUNNING); | ||
678 | down_read(&mm->mmap_sem); | 698 | down_read(&mm->mmap_sem); |
679 | } | 699 | } |
680 | atomic_inc(&mm->mm_count); | 700 | atomic_inc(&mm->mm_count); |
@@ -691,37 +711,97 @@ static void exit_mm(struct task_struct * tsk) | |||
691 | mmput(mm); | 711 | mmput(mm); |
692 | } | 712 | } |
693 | 713 | ||
694 | static void | 714 | /* |
695 | reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | 715 | * Return nonzero if @parent's children should reap themselves. |
716 | * | ||
717 | * Called with write_lock_irq(&tasklist_lock) held. | ||
718 | */ | ||
719 | static int ignoring_children(struct task_struct *parent) | ||
696 | { | 720 | { |
697 | if (p->pdeath_signal) | 721 | int ret; |
698 | /* We already hold the tasklist_lock here. */ | 722 | struct sighand_struct *psig = parent->sighand; |
699 | group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); | 723 | unsigned long flags; |
724 | spin_lock_irqsave(&psig->siglock, flags); | ||
725 | ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || | ||
726 | (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); | ||
727 | spin_unlock_irqrestore(&psig->siglock, flags); | ||
728 | return ret; | ||
729 | } | ||
700 | 730 | ||
701 | /* Move the child from its dying parent to the new one. */ | 731 | /* |
702 | if (unlikely(traced)) { | 732 | * Detach all tasks we were using ptrace on. |
703 | /* Preserve ptrace links if someone else is tracing this child. */ | 733 | * Any that need to be release_task'd are put on the @dead list. |
704 | list_del_init(&p->ptrace_list); | 734 | * |
705 | if (ptrace_reparented(p)) | 735 | * Called with write_lock(&tasklist_lock) held. |
706 | list_add(&p->ptrace_list, &p->real_parent->ptrace_children); | 736 | */ |
707 | } else { | 737 | static void ptrace_exit(struct task_struct *parent, struct list_head *dead) |
708 | /* If this child is being traced, then we're the one tracing it | 738 | { |
709 | * anyway, so let go of it. | 739 | struct task_struct *p, *n; |
740 | int ign = -1; | ||
741 | |||
742 | list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { | ||
743 | __ptrace_unlink(p); | ||
744 | |||
745 | if (p->exit_state != EXIT_ZOMBIE) | ||
746 | continue; | ||
747 | |||
748 | /* | ||
749 | * If it's a zombie, our attachedness prevented normal | ||
750 | * parent notification or self-reaping. Do notification | ||
751 | * now if it would have happened earlier. If it should | ||
752 | * reap itself, add it to the @dead list. We can't call | ||
753 | * release_task() here because we already hold tasklist_lock. | ||
754 | * | ||
755 | * If it's our own child, there is no notification to do. | ||
756 | * But if our normal children self-reap, then this child | ||
757 | * was prevented by ptrace and we must reap it now. | ||
710 | */ | 758 | */ |
711 | p->ptrace = 0; | 759 | if (!task_detached(p) && thread_group_empty(p)) { |
712 | remove_parent(p); | 760 | if (!same_thread_group(p->real_parent, parent)) |
713 | p->parent = p->real_parent; | 761 | do_notify_parent(p, p->exit_signal); |
714 | add_parent(p); | 762 | else { |
763 | if (ign < 0) | ||
764 | ign = ignoring_children(parent); | ||
765 | if (ign) | ||
766 | p->exit_signal = -1; | ||
767 | } | ||
768 | } | ||
715 | 769 | ||
716 | if (task_is_traced(p)) { | 770 | if (task_detached(p)) { |
717 | /* | 771 | /* |
718 | * If it was at a trace stop, turn it into | 772 | * Mark it as in the process of being reaped. |
719 | * a normal stop since it's no longer being | ||
720 | * traced. | ||
721 | */ | 773 | */ |
722 | ptrace_untrace(p); | 774 | p->exit_state = EXIT_DEAD; |
775 | list_add(&p->ptrace_entry, dead); | ||
723 | } | 776 | } |
724 | } | 777 | } |
778 | } | ||
779 | |||
780 | /* | ||
781 | * Finish up exit-time ptrace cleanup. | ||
782 | * | ||
783 | * Called without locks. | ||
784 | */ | ||
785 | static void ptrace_exit_finish(struct task_struct *parent, | ||
786 | struct list_head *dead) | ||
787 | { | ||
788 | struct task_struct *p, *n; | ||
789 | |||
790 | BUG_ON(!list_empty(&parent->ptraced)); | ||
791 | |||
792 | list_for_each_entry_safe(p, n, dead, ptrace_entry) { | ||
793 | list_del_init(&p->ptrace_entry); | ||
794 | release_task(p); | ||
795 | } | ||
796 | } | ||
797 | |||
798 | static void reparent_thread(struct task_struct *p, struct task_struct *father) | ||
799 | { | ||
800 | if (p->pdeath_signal) | ||
801 | /* We already hold the tasklist_lock here. */ | ||
802 | group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); | ||
803 | |||
804 | list_move_tail(&p->sibling, &p->real_parent->children); | ||
725 | 805 | ||
726 | /* If this is a threaded reparent there is no need to | 806 | /* If this is a threaded reparent there is no need to |
727 | * notify anyone anything has happened. | 807 | * notify anyone anything has happened. |
@@ -736,7 +816,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
736 | /* If we'd notified the old parent about this child's death, | 816 | /* If we'd notified the old parent about this child's death, |
737 | * also notify the new parent. | 817 | * also notify the new parent. |
738 | */ | 818 | */ |
739 | if (!traced && p->exit_state == EXIT_ZOMBIE && | 819 | if (!ptrace_reparented(p) && |
820 | p->exit_state == EXIT_ZOMBIE && | ||
740 | !task_detached(p) && thread_group_empty(p)) | 821 | !task_detached(p) && thread_group_empty(p)) |
741 | do_notify_parent(p, p->exit_signal); | 822 | do_notify_parent(p, p->exit_signal); |
742 | 823 | ||
@@ -753,12 +834,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) | |||
753 | static void forget_original_parent(struct task_struct *father) | 834 | static void forget_original_parent(struct task_struct *father) |
754 | { | 835 | { |
755 | struct task_struct *p, *n, *reaper = father; | 836 | struct task_struct *p, *n, *reaper = father; |
756 | struct list_head ptrace_dead; | 837 | LIST_HEAD(ptrace_dead); |
757 | |||
758 | INIT_LIST_HEAD(&ptrace_dead); | ||
759 | 838 | ||
760 | write_lock_irq(&tasklist_lock); | 839 | write_lock_irq(&tasklist_lock); |
761 | 840 | ||
841 | /* | ||
842 | * First clean up ptrace if we were using it. | ||
843 | */ | ||
844 | ptrace_exit(father, &ptrace_dead); | ||
845 | |||
762 | do { | 846 | do { |
763 | reaper = next_thread(reaper); | 847 | reaper = next_thread(reaper); |
764 | if (reaper == father) { | 848 | if (reaper == father) { |
@@ -767,58 +851,19 @@ static void forget_original_parent(struct task_struct *father) | |||
767 | } | 851 | } |
768 | } while (reaper->flags & PF_EXITING); | 852 | } while (reaper->flags & PF_EXITING); |
769 | 853 | ||
770 | /* | ||
771 | * There are only two places where our children can be: | ||
772 | * | ||
773 | * - in our child list | ||
774 | * - in our ptraced child list | ||
775 | * | ||
776 | * Search them and reparent children. | ||
777 | */ | ||
778 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 854 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
779 | int ptrace; | ||
780 | |||
781 | ptrace = p->ptrace; | ||
782 | |||
783 | /* if father isn't the real parent, then ptrace must be enabled */ | ||
784 | BUG_ON(father != p->real_parent && !ptrace); | ||
785 | |||
786 | if (father == p->real_parent) { | ||
787 | /* reparent with a reaper, real father it's us */ | ||
788 | p->real_parent = reaper; | ||
789 | reparent_thread(p, father, 0); | ||
790 | } else { | ||
791 | /* reparent ptraced task to its real parent */ | ||
792 | __ptrace_unlink (p); | ||
793 | if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && | ||
794 | thread_group_empty(p)) | ||
795 | do_notify_parent(p, p->exit_signal); | ||
796 | } | ||
797 | |||
798 | /* | ||
799 | * if the ptraced child is a detached zombie we must collect | ||
800 | * it before we exit, or it will remain zombie forever since | ||
801 | * we prevented it from self-reap itself while it was being | ||
802 | * traced by us, to be able to see it in wait4. | ||
803 | */ | ||
804 | if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) | ||
805 | list_add(&p->ptrace_list, &ptrace_dead); | ||
806 | } | ||
807 | |||
808 | list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) { | ||
809 | p->real_parent = reaper; | 855 | p->real_parent = reaper; |
810 | reparent_thread(p, father, 1); | 856 | if (p->parent == father) { |
857 | BUG_ON(p->ptrace); | ||
858 | p->parent = p->real_parent; | ||
859 | } | ||
860 | reparent_thread(p, father); | ||
811 | } | 861 | } |
812 | 862 | ||
813 | write_unlock_irq(&tasklist_lock); | 863 | write_unlock_irq(&tasklist_lock); |
814 | BUG_ON(!list_empty(&father->children)); | 864 | BUG_ON(!list_empty(&father->children)); |
815 | BUG_ON(!list_empty(&father->ptrace_children)); | ||
816 | |||
817 | list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) { | ||
818 | list_del_init(&p->ptrace_list); | ||
819 | release_task(p); | ||
820 | } | ||
821 | 865 | ||
866 | ptrace_exit_finish(father, &ptrace_dead); | ||
822 | } | 867 | } |
823 | 868 | ||
824 | /* | 869 | /* |
@@ -827,7 +872,8 @@ static void forget_original_parent(struct task_struct *father) | |||
827 | */ | 872 | */ |
828 | static void exit_notify(struct task_struct *tsk, int group_dead) | 873 | static void exit_notify(struct task_struct *tsk, int group_dead) |
829 | { | 874 | { |
830 | int state; | 875 | int signal; |
876 | void *cookie; | ||
831 | 877 | ||
832 | /* | 878 | /* |
833 | * This does two things: | 879 | * This does two things: |
@@ -864,22 +910,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
864 | !capable(CAP_KILL)) | 910 | !capable(CAP_KILL)) |
865 | tsk->exit_signal = SIGCHLD; | 911 | tsk->exit_signal = SIGCHLD; |
866 | 912 | ||
867 | /* If something other than our normal parent is ptracing us, then | 913 | signal = tracehook_notify_death(tsk, &cookie, group_dead); |
868 | * send it a SIGCHLD instead of honoring exit_signal. exit_signal | 914 | if (signal > 0) |
869 | * only has special meaning to our real parent. | 915 | signal = do_notify_parent(tsk, signal); |
870 | */ | ||
871 | if (!task_detached(tsk) && thread_group_empty(tsk)) { | ||
872 | int signal = ptrace_reparented(tsk) ? | ||
873 | SIGCHLD : tsk->exit_signal; | ||
874 | do_notify_parent(tsk, signal); | ||
875 | } else if (tsk->ptrace) { | ||
876 | do_notify_parent(tsk, SIGCHLD); | ||
877 | } | ||
878 | 916 | ||
879 | state = EXIT_ZOMBIE; | 917 | tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE; |
880 | if (task_detached(tsk) && likely(!tsk->ptrace)) | ||
881 | state = EXIT_DEAD; | ||
882 | tsk->exit_state = state; | ||
883 | 918 | ||
884 | /* mt-exec, de_thread() is waiting for us */ | 919 | /* mt-exec, de_thread() is waiting for us */ |
885 | if (thread_group_leader(tsk) && | 920 | if (thread_group_leader(tsk) && |
@@ -889,8 +924,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
889 | 924 | ||
890 | write_unlock_irq(&tasklist_lock); | 925 | write_unlock_irq(&tasklist_lock); |
891 | 926 | ||
927 | tracehook_report_death(tsk, signal, cookie, group_dead); | ||
928 | |||
892 | /* If the process is dead, release it - nobody will wait for it */ | 929 | /* If the process is dead, release it - nobody will wait for it */ |
893 | if (state == EXIT_DEAD) | 930 | if (signal < 0) |
894 | release_task(tsk); | 931 | release_task(tsk); |
895 | } | 932 | } |
896 | 933 | ||
@@ -969,10 +1006,7 @@ NORET_TYPE void do_exit(long code) | |||
969 | if (unlikely(!tsk->pid)) | 1006 | if (unlikely(!tsk->pid)) |
970 | panic("Attempted to kill the idle task!"); | 1007 | panic("Attempted to kill the idle task!"); |
971 | 1008 | ||
972 | if (unlikely(current->ptrace & PT_TRACE_EXIT)) { | 1009 | tracehook_report_exit(&code); |
973 | current->ptrace_message = code; | ||
974 | ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); | ||
975 | } | ||
976 | 1010 | ||
977 | /* | 1011 | /* |
978 | * We're taking recursive faults here in do_exit. Safest is to just | 1012 | * We're taking recursive faults here in do_exit. Safest is to just |
@@ -1179,13 +1213,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, | |||
1179 | return 0; | 1213 | return 0; |
1180 | } | 1214 | } |
1181 | 1215 | ||
1182 | /* | ||
1183 | * Do not consider detached threads that are | ||
1184 | * not ptraced: | ||
1185 | */ | ||
1186 | if (task_detached(p) && !p->ptrace) | ||
1187 | return 0; | ||
1188 | |||
1189 | /* Wait for all children (clone and not) if __WALL is set; | 1216 | /* Wait for all children (clone and not) if __WALL is set; |
1190 | * otherwise, wait for clone children *only* if __WCLONE is | 1217 | * otherwise, wait for clone children *only* if __WCLONE is |
1191 | * set; otherwise, wait for non-clone children *only*. (Note: | 1218 | * set; otherwise, wait for non-clone children *only*. (Note: |
@@ -1196,14 +1223,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, | |||
1196 | return 0; | 1223 | return 0; |
1197 | 1224 | ||
1198 | err = security_task_wait(p); | 1225 | err = security_task_wait(p); |
1199 | if (likely(!err)) | 1226 | if (err) |
1200 | return 1; | 1227 | return err; |
1201 | 1228 | ||
1202 | if (type != PIDTYPE_PID) | 1229 | return 1; |
1203 | return 0; | ||
1204 | /* This child was explicitly requested, abort */ | ||
1205 | read_unlock(&tasklist_lock); | ||
1206 | return err; | ||
1207 | } | 1230 | } |
1208 | 1231 | ||
1209 | static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | 1232 | static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, |
@@ -1237,7 +1260,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | |||
1237 | * the lock and this task is uninteresting. If we return nonzero, we have | 1260 | * the lock and this task is uninteresting. If we return nonzero, we have |
1238 | * released the lock and the system call should return. | 1261 | * released the lock and the system call should return. |
1239 | */ | 1262 | */ |
1240 | static int wait_task_zombie(struct task_struct *p, int noreap, | 1263 | static int wait_task_zombie(struct task_struct *p, int options, |
1241 | struct siginfo __user *infop, | 1264 | struct siginfo __user *infop, |
1242 | int __user *stat_addr, struct rusage __user *ru) | 1265 | int __user *stat_addr, struct rusage __user *ru) |
1243 | { | 1266 | { |
@@ -1245,7 +1268,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1245 | int retval, status, traced; | 1268 | int retval, status, traced; |
1246 | pid_t pid = task_pid_vnr(p); | 1269 | pid_t pid = task_pid_vnr(p); |
1247 | 1270 | ||
1248 | if (unlikely(noreap)) { | 1271 | if (!likely(options & WEXITED)) |
1272 | return 0; | ||
1273 | |||
1274 | if (unlikely(options & WNOWAIT)) { | ||
1249 | uid_t uid = p->uid; | 1275 | uid_t uid = p->uid; |
1250 | int exit_code = p->exit_code; | 1276 | int exit_code = p->exit_code; |
1251 | int why, status; | 1277 | int why, status; |
@@ -1326,6 +1352,8 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1326 | psig->coublock += | 1352 | psig->coublock += |
1327 | task_io_get_oublock(p) + | 1353 | task_io_get_oublock(p) + |
1328 | sig->oublock + sig->coublock; | 1354 | sig->oublock + sig->coublock; |
1355 | task_io_accounting_add(&psig->ioac, &p->ioac); | ||
1356 | task_io_accounting_add(&psig->ioac, &sig->ioac); | ||
1329 | spin_unlock_irq(&p->parent->sighand->siglock); | 1357 | spin_unlock_irq(&p->parent->sighand->siglock); |
1330 | } | 1358 | } |
1331 | 1359 | ||
@@ -1395,21 +1423,24 @@ static int wait_task_zombie(struct task_struct *p, int noreap, | |||
1395 | * the lock and this task is uninteresting. If we return nonzero, we have | 1423 | * the lock and this task is uninteresting. If we return nonzero, we have |
1396 | * released the lock and the system call should return. | 1424 | * released the lock and the system call should return. |
1397 | */ | 1425 | */ |
1398 | static int wait_task_stopped(struct task_struct *p, | 1426 | static int wait_task_stopped(int ptrace, struct task_struct *p, |
1399 | int noreap, struct siginfo __user *infop, | 1427 | int options, struct siginfo __user *infop, |
1400 | int __user *stat_addr, struct rusage __user *ru) | 1428 | int __user *stat_addr, struct rusage __user *ru) |
1401 | { | 1429 | { |
1402 | int retval, exit_code, why; | 1430 | int retval, exit_code, why; |
1403 | uid_t uid = 0; /* unneeded, required by compiler */ | 1431 | uid_t uid = 0; /* unneeded, required by compiler */ |
1404 | pid_t pid; | 1432 | pid_t pid; |
1405 | 1433 | ||
1434 | if (!(options & WUNTRACED)) | ||
1435 | return 0; | ||
1436 | |||
1406 | exit_code = 0; | 1437 | exit_code = 0; |
1407 | spin_lock_irq(&p->sighand->siglock); | 1438 | spin_lock_irq(&p->sighand->siglock); |
1408 | 1439 | ||
1409 | if (unlikely(!task_is_stopped_or_traced(p))) | 1440 | if (unlikely(!task_is_stopped_or_traced(p))) |
1410 | goto unlock_sig; | 1441 | goto unlock_sig; |
1411 | 1442 | ||
1412 | if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) | 1443 | if (!ptrace && p->signal->group_stop_count > 0) |
1413 | /* | 1444 | /* |
1414 | * A group stop is in progress and this is the group leader. | 1445 | * A group stop is in progress and this is the group leader. |
1415 | * We won't report until all threads have stopped. | 1446 | * We won't report until all threads have stopped. |
@@ -1420,7 +1451,7 @@ static int wait_task_stopped(struct task_struct *p, | |||
1420 | if (!exit_code) | 1451 | if (!exit_code) |
1421 | goto unlock_sig; | 1452 | goto unlock_sig; |
1422 | 1453 | ||
1423 | if (!noreap) | 1454 | if (!unlikely(options & WNOWAIT)) |
1424 | p->exit_code = 0; | 1455 | p->exit_code = 0; |
1425 | 1456 | ||
1426 | uid = p->uid; | 1457 | uid = p->uid; |
@@ -1438,10 +1469,10 @@ unlock_sig: | |||
1438 | */ | 1469 | */ |
1439 | get_task_struct(p); | 1470 | get_task_struct(p); |
1440 | pid = task_pid_vnr(p); | 1471 | pid = task_pid_vnr(p); |
1441 | why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; | 1472 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; |
1442 | read_unlock(&tasklist_lock); | 1473 | read_unlock(&tasklist_lock); |
1443 | 1474 | ||
1444 | if (unlikely(noreap)) | 1475 | if (unlikely(options & WNOWAIT)) |
1445 | return wait_noreap_copyout(p, pid, uid, | 1476 | return wait_noreap_copyout(p, pid, uid, |
1446 | why, exit_code, | 1477 | why, exit_code, |
1447 | infop, ru); | 1478 | infop, ru); |
@@ -1475,7 +1506,7 @@ unlock_sig: | |||
1475 | * the lock and this task is uninteresting. If we return nonzero, we have | 1506 | * the lock and this task is uninteresting. If we return nonzero, we have |
1476 | * released the lock and the system call should return. | 1507 | * released the lock and the system call should return. |
1477 | */ | 1508 | */ |
1478 | static int wait_task_continued(struct task_struct *p, int noreap, | 1509 | static int wait_task_continued(struct task_struct *p, int options, |
1479 | struct siginfo __user *infop, | 1510 | struct siginfo __user *infop, |
1480 | int __user *stat_addr, struct rusage __user *ru) | 1511 | int __user *stat_addr, struct rusage __user *ru) |
1481 | { | 1512 | { |
@@ -1483,6 +1514,9 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1483 | pid_t pid; | 1514 | pid_t pid; |
1484 | uid_t uid; | 1515 | uid_t uid; |
1485 | 1516 | ||
1517 | if (!unlikely(options & WCONTINUED)) | ||
1518 | return 0; | ||
1519 | |||
1486 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) | 1520 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) |
1487 | return 0; | 1521 | return 0; |
1488 | 1522 | ||
@@ -1492,7 +1526,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1492 | spin_unlock_irq(&p->sighand->siglock); | 1526 | spin_unlock_irq(&p->sighand->siglock); |
1493 | return 0; | 1527 | return 0; |
1494 | } | 1528 | } |
1495 | if (!noreap) | 1529 | if (!unlikely(options & WNOWAIT)) |
1496 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; | 1530 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; |
1497 | spin_unlock_irq(&p->sighand->siglock); | 1531 | spin_unlock_irq(&p->sighand->siglock); |
1498 | 1532 | ||
@@ -1518,89 +1552,161 @@ static int wait_task_continued(struct task_struct *p, int noreap, | |||
1518 | return retval; | 1552 | return retval; |
1519 | } | 1553 | } |
1520 | 1554 | ||
1555 | /* | ||
1556 | * Consider @p for a wait by @parent. | ||
1557 | * | ||
1558 | * -ECHILD should be in *@notask_error before the first call. | ||
1559 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | ||
1560 | * Returns zero if the search for a child should continue; | ||
1561 | * then *@notask_error is 0 if @p is an eligible child, | ||
1562 | * or another error from security_task_wait(), or still -ECHILD. | ||
1563 | */ | ||
1564 | static int wait_consider_task(struct task_struct *parent, int ptrace, | ||
1565 | struct task_struct *p, int *notask_error, | ||
1566 | enum pid_type type, struct pid *pid, int options, | ||
1567 | struct siginfo __user *infop, | ||
1568 | int __user *stat_addr, struct rusage __user *ru) | ||
1569 | { | ||
1570 | int ret = eligible_child(type, pid, options, p); | ||
1571 | if (!ret) | ||
1572 | return ret; | ||
1573 | |||
1574 | if (unlikely(ret < 0)) { | ||
1575 | /* | ||
1576 | * If we have not yet seen any eligible child, | ||
1577 | * then let this error code replace -ECHILD. | ||
1578 | * A permission error will give the user a clue | ||
1579 | * to look for security policy problems, rather | ||
1580 | * than for mysterious wait bugs. | ||
1581 | */ | ||
1582 | if (*notask_error) | ||
1583 | *notask_error = ret; | ||
1584 | } | ||
1585 | |||
1586 | if (likely(!ptrace) && unlikely(p->ptrace)) { | ||
1587 | /* | ||
1588 | * This child is hidden by ptrace. | ||
1589 | * We aren't allowed to see it now, but eventually we will. | ||
1590 | */ | ||
1591 | *notask_error = 0; | ||
1592 | return 0; | ||
1593 | } | ||
1594 | |||
1595 | if (p->exit_state == EXIT_DEAD) | ||
1596 | return 0; | ||
1597 | |||
1598 | /* | ||
1599 | * We don't reap group leaders with subthreads. | ||
1600 | */ | ||
1601 | if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) | ||
1602 | return wait_task_zombie(p, options, infop, stat_addr, ru); | ||
1603 | |||
1604 | /* | ||
1605 | * It's stopped or running now, so it might | ||
1606 | * later continue, exit, or stop again. | ||
1607 | */ | ||
1608 | *notask_error = 0; | ||
1609 | |||
1610 | if (task_is_stopped_or_traced(p)) | ||
1611 | return wait_task_stopped(ptrace, p, options, | ||
1612 | infop, stat_addr, ru); | ||
1613 | |||
1614 | return wait_task_continued(p, options, infop, stat_addr, ru); | ||
1615 | } | ||
1616 | |||
1617 | /* | ||
1618 | * Do the work of do_wait() for one thread in the group, @tsk. | ||
1619 | * | ||
1620 | * -ECHILD should be in *@notask_error before the first call. | ||
1621 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | ||
1622 | * Returns zero if the search for a child should continue; then | ||
1623 | * *@notask_error is 0 if there were any eligible children, | ||
1624 | * or another error from security_task_wait(), or still -ECHILD. | ||
1625 | */ | ||
1626 | static int do_wait_thread(struct task_struct *tsk, int *notask_error, | ||
1627 | enum pid_type type, struct pid *pid, int options, | ||
1628 | struct siginfo __user *infop, int __user *stat_addr, | ||
1629 | struct rusage __user *ru) | ||
1630 | { | ||
1631 | struct task_struct *p; | ||
1632 | |||
1633 | list_for_each_entry(p, &tsk->children, sibling) { | ||
1634 | /* | ||
1635 | * Do not consider detached threads. | ||
1636 | */ | ||
1637 | if (!task_detached(p)) { | ||
1638 | int ret = wait_consider_task(tsk, 0, p, notask_error, | ||
1639 | type, pid, options, | ||
1640 | infop, stat_addr, ru); | ||
1641 | if (ret) | ||
1642 | return ret; | ||
1643 | } | ||
1644 | } | ||
1645 | |||
1646 | return 0; | ||
1647 | } | ||
1648 | |||
1649 | static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, | ||
1650 | enum pid_type type, struct pid *pid, int options, | ||
1651 | struct siginfo __user *infop, int __user *stat_addr, | ||
1652 | struct rusage __user *ru) | ||
1653 | { | ||
1654 | struct task_struct *p; | ||
1655 | |||
1656 | /* | ||
1657 | * Traditionally we see ptrace'd stopped tasks regardless of options. | ||
1658 | */ | ||
1659 | options |= WUNTRACED; | ||
1660 | |||
1661 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { | ||
1662 | int ret = wait_consider_task(tsk, 1, p, notask_error, | ||
1663 | type, pid, options, | ||
1664 | infop, stat_addr, ru); | ||
1665 | if (ret) | ||
1666 | return ret; | ||
1667 | } | ||
1668 | |||
1669 | return 0; | ||
1670 | } | ||
1671 | |||
1521 | static long do_wait(enum pid_type type, struct pid *pid, int options, | 1672 | static long do_wait(enum pid_type type, struct pid *pid, int options, |
1522 | struct siginfo __user *infop, int __user *stat_addr, | 1673 | struct siginfo __user *infop, int __user *stat_addr, |
1523 | struct rusage __user *ru) | 1674 | struct rusage __user *ru) |
1524 | { | 1675 | { |
1525 | DECLARE_WAITQUEUE(wait, current); | 1676 | DECLARE_WAITQUEUE(wait, current); |
1526 | struct task_struct *tsk; | 1677 | struct task_struct *tsk; |
1527 | int flag, retval; | 1678 | int retval; |
1528 | 1679 | ||
1529 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1680 | add_wait_queue(¤t->signal->wait_chldexit,&wait); |
1530 | repeat: | 1681 | repeat: |
1531 | /* If there is nothing that can match our critier just get out */ | 1682 | /* |
1683 | * If there is nothing that can match our critiera just get out. | ||
1684 | * We will clear @retval to zero if we see any child that might later | ||
1685 | * match our criteria, even if we are not able to reap it yet. | ||
1686 | */ | ||
1532 | retval = -ECHILD; | 1687 | retval = -ECHILD; |
1533 | if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) | 1688 | if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) |
1534 | goto end; | 1689 | goto end; |
1535 | 1690 | ||
1536 | /* | ||
1537 | * We will set this flag if we see any child that might later | ||
1538 | * match our criteria, even if we are not able to reap it yet. | ||
1539 | */ | ||
1540 | flag = retval = 0; | ||
1541 | current->state = TASK_INTERRUPTIBLE; | 1691 | current->state = TASK_INTERRUPTIBLE; |
1542 | read_lock(&tasklist_lock); | 1692 | read_lock(&tasklist_lock); |
1543 | tsk = current; | 1693 | tsk = current; |
1544 | do { | 1694 | do { |
1545 | struct task_struct *p; | 1695 | int tsk_result = do_wait_thread(tsk, &retval, |
1546 | 1696 | type, pid, options, | |
1547 | list_for_each_entry(p, &tsk->children, sibling) { | 1697 | infop, stat_addr, ru); |
1548 | int ret = eligible_child(type, pid, options, p); | 1698 | if (!tsk_result) |
1549 | if (!ret) | 1699 | tsk_result = ptrace_do_wait(tsk, &retval, |
1550 | continue; | 1700 | type, pid, options, |
1551 | 1701 | infop, stat_addr, ru); | |
1552 | if (unlikely(ret < 0)) { | 1702 | if (tsk_result) { |
1553 | retval = ret; | 1703 | /* |
1554 | } else if (task_is_stopped_or_traced(p)) { | 1704 | * tasklist_lock is unlocked and we have a final result. |
1555 | /* | 1705 | */ |
1556 | * It's stopped now, so it might later | 1706 | retval = tsk_result; |
1557 | * continue, exit, or stop again. | 1707 | goto end; |
1558 | */ | ||
1559 | flag = 1; | ||
1560 | if (!(p->ptrace & PT_PTRACED) && | ||
1561 | !(options & WUNTRACED)) | ||
1562 | continue; | ||
1563 | |||
1564 | retval = wait_task_stopped(p, | ||
1565 | (options & WNOWAIT), infop, | ||
1566 | stat_addr, ru); | ||
1567 | } else if (p->exit_state == EXIT_ZOMBIE && | ||
1568 | !delay_group_leader(p)) { | ||
1569 | /* | ||
1570 | * We don't reap group leaders with subthreads. | ||
1571 | */ | ||
1572 | if (!likely(options & WEXITED)) | ||
1573 | continue; | ||
1574 | retval = wait_task_zombie(p, | ||
1575 | (options & WNOWAIT), infop, | ||
1576 | stat_addr, ru); | ||
1577 | } else if (p->exit_state != EXIT_DEAD) { | ||
1578 | /* | ||
1579 | * It's running now, so it might later | ||
1580 | * exit, stop, or stop and then continue. | ||
1581 | */ | ||
1582 | flag = 1; | ||
1583 | if (!unlikely(options & WCONTINUED)) | ||
1584 | continue; | ||
1585 | retval = wait_task_continued(p, | ||
1586 | (options & WNOWAIT), infop, | ||
1587 | stat_addr, ru); | ||
1588 | } | ||
1589 | if (retval != 0) /* tasklist_lock released */ | ||
1590 | goto end; | ||
1591 | } | ||
1592 | if (!flag) { | ||
1593 | list_for_each_entry(p, &tsk->ptrace_children, | ||
1594 | ptrace_list) { | ||
1595 | flag = eligible_child(type, pid, options, p); | ||
1596 | if (!flag) | ||
1597 | continue; | ||
1598 | if (likely(flag > 0)) | ||
1599 | break; | ||
1600 | retval = flag; | ||
1601 | goto end; | ||
1602 | } | ||
1603 | } | 1708 | } |
1709 | |||
1604 | if (options & __WNOTHREAD) | 1710 | if (options & __WNOTHREAD) |
1605 | break; | 1711 | break; |
1606 | tsk = next_thread(tsk); | 1712 | tsk = next_thread(tsk); |
@@ -1608,16 +1714,14 @@ repeat: | |||
1608 | } while (tsk != current); | 1714 | } while (tsk != current); |
1609 | read_unlock(&tasklist_lock); | 1715 | read_unlock(&tasklist_lock); |
1610 | 1716 | ||
1611 | if (flag) { | 1717 | if (!retval && !(options & WNOHANG)) { |
1612 | if (options & WNOHANG) | ||
1613 | goto end; | ||
1614 | retval = -ERESTARTSYS; | 1718 | retval = -ERESTARTSYS; |
1615 | if (signal_pending(current)) | 1719 | if (!signal_pending(current)) { |
1616 | goto end; | 1720 | schedule(); |
1617 | schedule(); | 1721 | goto repeat; |
1618 | goto repeat; | 1722 | } |
1619 | } | 1723 | } |
1620 | retval = -ECHILD; | 1724 | |
1621 | end: | 1725 | end: |
1622 | current->state = TASK_RUNNING; | 1726 | current->state = TASK_RUNNING; |
1623 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); | 1727 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); |