diff options
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 711 |
1 files changed, 324 insertions, 387 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3ad7c6..1143012951e9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
13 | #include <linux/personality.h> | 13 | #include <linux/personality.h> |
14 | #include <linux/tty.h> | 14 | #include <linux/tty.h> |
15 | #include <linux/mnt_namespace.h> | ||
16 | #include <linux/iocontext.h> | 15 | #include <linux/iocontext.h> |
17 | #include <linux/key.h> | 16 | #include <linux/key.h> |
18 | #include <linux/security.h> | 17 | #include <linux/security.h> |
@@ -46,8 +45,11 @@ | |||
46 | #include <linux/blkdev.h> | 45 | #include <linux/blkdev.h> |
47 | #include <linux/task_io_accounting_ops.h> | 46 | #include <linux/task_io_accounting_ops.h> |
48 | #include <linux/tracehook.h> | 47 | #include <linux/tracehook.h> |
48 | #include <linux/fs_struct.h> | ||
49 | #include <linux/init_task.h> | 49 | #include <linux/init_task.h> |
50 | #include <trace/sched.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | ||
52 | #include <linux/hw_breakpoint.h> | ||
51 | 53 | ||
52 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
53 | #include <asm/unistd.h> | 55 | #include <asm/unistd.h> |
@@ -55,17 +57,8 @@ | |||
55 | #include <asm/mmu_context.h> | 57 | #include <asm/mmu_context.h> |
56 | #include "cred-internals.h" | 58 | #include "cred-internals.h" |
57 | 59 | ||
58 | DEFINE_TRACE(sched_process_free); | ||
59 | DEFINE_TRACE(sched_process_exit); | ||
60 | DEFINE_TRACE(sched_process_wait); | ||
61 | |||
62 | static void exit_mm(struct task_struct * tsk); | 60 | static void exit_mm(struct task_struct * tsk); |
63 | 61 | ||
64 | static inline int task_detached(struct task_struct *p) | ||
65 | { | ||
66 | return p->exit_signal == -1; | ||
67 | } | ||
68 | |||
69 | static void __unhash_process(struct task_struct *p) | 62 | static void __unhash_process(struct task_struct *p) |
70 | { | 63 | { |
71 | nr_threads--; | 64 | nr_threads--; |
@@ -118,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk) | |||
118 | * We won't ever get here for the group leader, since it | 111 | * We won't ever get here for the group leader, since it |
119 | * will have been the last reference on the signal_struct. | 112 | * will have been the last reference on the signal_struct. |
120 | */ | 113 | */ |
121 | sig->utime = cputime_add(sig->utime, task_utime(tsk)); | 114 | sig->utime = cputime_add(sig->utime, tsk->utime); |
122 | sig->stime = cputime_add(sig->stime, task_stime(tsk)); | 115 | sig->stime = cputime_add(sig->stime, tsk->stime); |
123 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); | 116 | sig->gtime = cputime_add(sig->gtime, tsk->gtime); |
124 | sig->min_flt += tsk->min_flt; | 117 | sig->min_flt += tsk->min_flt; |
125 | sig->maj_flt += tsk->maj_flt; | 118 | sig->maj_flt += tsk->maj_flt; |
126 | sig->nvcsw += tsk->nvcsw; | 119 | sig->nvcsw += tsk->nvcsw; |
@@ -162,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
162 | { | 155 | { |
163 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 156 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
164 | 157 | ||
158 | #ifdef CONFIG_PERF_EVENTS | ||
159 | WARN_ON_ONCE(tsk->perf_event_ctxp); | ||
160 | #endif | ||
165 | trace_sched_process_free(tsk); | 161 | trace_sched_process_free(tsk); |
166 | put_task_struct(tsk); | 162 | put_task_struct(tsk); |
167 | } | 163 | } |
@@ -178,6 +174,7 @@ repeat: | |||
178 | atomic_dec(&__task_cred(p)->user->processes); | 174 | atomic_dec(&__task_cred(p)->user->processes); |
179 | 175 | ||
180 | proc_flush_task(p); | 176 | proc_flush_task(p); |
177 | |||
181 | write_lock_irq(&tasklist_lock); | 178 | write_lock_irq(&tasklist_lock); |
182 | tracehook_finish_release_task(p); | 179 | tracehook_finish_release_task(p); |
183 | __exit_signal(p); | 180 | __exit_signal(p); |
@@ -362,16 +359,12 @@ static void reparent_to_kthreadd(void) | |||
362 | void __set_special_pids(struct pid *pid) | 359 | void __set_special_pids(struct pid *pid) |
363 | { | 360 | { |
364 | struct task_struct *curr = current->group_leader; | 361 | struct task_struct *curr = current->group_leader; |
365 | pid_t nr = pid_nr(pid); | ||
366 | 362 | ||
367 | if (task_session(curr) != pid) { | 363 | if (task_session(curr) != pid) |
368 | change_pid(curr, PIDTYPE_SID, pid); | 364 | change_pid(curr, PIDTYPE_SID, pid); |
369 | set_task_session(curr, nr); | 365 | |
370 | } | 366 | if (task_pgrp(curr) != pid) |
371 | if (task_pgrp(curr) != pid) { | ||
372 | change_pid(curr, PIDTYPE_PGID, pid); | 367 | change_pid(curr, PIDTYPE_PGID, pid); |
373 | set_task_pgrp(curr, nr); | ||
374 | } | ||
375 | } | 368 | } |
376 | 369 | ||
377 | static void set_special_pids(struct pid *pid) | 370 | static void set_special_pids(struct pid *pid) |
@@ -382,9 +375,8 @@ static void set_special_pids(struct pid *pid) | |||
382 | } | 375 | } |
383 | 376 | ||
384 | /* | 377 | /* |
385 | * Let kernel threads use this to say that they | 378 | * Let kernel threads use this to say that they allow a certain signal. |
386 | * allow a certain signal (since daemonize() will | 379 | * Must not be used if kthread was cloned with CLONE_SIGHAND. |
387 | * have disabled all of them by default). | ||
388 | */ | 380 | */ |
389 | int allow_signal(int sig) | 381 | int allow_signal(int sig) |
390 | { | 382 | { |
@@ -392,14 +384,14 @@ int allow_signal(int sig) | |||
392 | return -EINVAL; | 384 | return -EINVAL; |
393 | 385 | ||
394 | spin_lock_irq(¤t->sighand->siglock); | 386 | spin_lock_irq(¤t->sighand->siglock); |
387 | /* This is only needed for daemonize()'ed kthreads */ | ||
395 | sigdelset(¤t->blocked, sig); | 388 | sigdelset(¤t->blocked, sig); |
396 | if (!current->mm) { | 389 | /* |
397 | /* Kernel threads handle their own signals. | 390 | * Kernel threads handle their own signals. Let the signal code |
398 | Let the signal code know it'll be handled, so | 391 | * know it'll be handled, so that they don't get converted to |
399 | that they don't get converted to SIGKILL or | 392 | * SIGKILL or just silently dropped. |
400 | just silently dropped */ | 393 | */ |
401 | current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; | 394 | current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2; |
402 | } | ||
403 | recalc_sigpending(); | 395 | recalc_sigpending(); |
404 | spin_unlock_irq(¤t->sighand->siglock); | 396 | spin_unlock_irq(¤t->sighand->siglock); |
405 | return 0; | 397 | return 0; |
@@ -429,7 +421,6 @@ EXPORT_SYMBOL(disallow_signal); | |||
429 | void daemonize(const char *name, ...) | 421 | void daemonize(const char *name, ...) |
430 | { | 422 | { |
431 | va_list args; | 423 | va_list args; |
432 | struct fs_struct *fs; | ||
433 | sigset_t blocked; | 424 | sigset_t blocked; |
434 | 425 | ||
435 | va_start(args, name); | 426 | va_start(args, name); |
@@ -462,11 +453,7 @@ void daemonize(const char *name, ...) | |||
462 | 453 | ||
463 | /* Become as one with the init task */ | 454 | /* Become as one with the init task */ |
464 | 455 | ||
465 | exit_fs(current); /* current->fs->count--; */ | 456 | daemonize_fs_struct(); |
466 | fs = init_task.fs; | ||
467 | current->fs = fs; | ||
468 | atomic_inc(&fs->count); | ||
469 | |||
470 | exit_files(current); | 457 | exit_files(current); |
471 | current->files = init_task.files; | 458 | current->files = init_task.files; |
472 | atomic_inc(¤t->files->count); | 459 | atomic_inc(¤t->files->count); |
@@ -565,30 +552,6 @@ void exit_files(struct task_struct *tsk) | |||
565 | } | 552 | } |
566 | } | 553 | } |
567 | 554 | ||
568 | void put_fs_struct(struct fs_struct *fs) | ||
569 | { | ||
570 | /* No need to hold fs->lock if we are killing it */ | ||
571 | if (atomic_dec_and_test(&fs->count)) { | ||
572 | path_put(&fs->root); | ||
573 | path_put(&fs->pwd); | ||
574 | kmem_cache_free(fs_cachep, fs); | ||
575 | } | ||
576 | } | ||
577 | |||
578 | void exit_fs(struct task_struct *tsk) | ||
579 | { | ||
580 | struct fs_struct * fs = tsk->fs; | ||
581 | |||
582 | if (fs) { | ||
583 | task_lock(tsk); | ||
584 | tsk->fs = NULL; | ||
585 | task_unlock(tsk); | ||
586 | put_fs_struct(fs); | ||
587 | } | ||
588 | } | ||
589 | |||
590 | EXPORT_SYMBOL_GPL(exit_fs); | ||
591 | |||
592 | #ifdef CONFIG_MM_OWNER | 555 | #ifdef CONFIG_MM_OWNER |
593 | /* | 556 | /* |
594 | * Task p is exiting and it owned mm, lets find a new owner for it | 557 | * Task p is exiting and it owned mm, lets find a new owner for it |
@@ -627,7 +590,7 @@ retry: | |||
627 | /* | 590 | /* |
628 | * Search in the siblings | 591 | * Search in the siblings |
629 | */ | 592 | */ |
630 | list_for_each_entry(c, &p->parent->children, sibling) { | 593 | list_for_each_entry(c, &p->real_parent->children, sibling) { |
631 | if (c->mm == mm) | 594 | if (c->mm == mm) |
632 | goto assign_new_owner; | 595 | goto assign_new_owner; |
633 | } | 596 | } |
@@ -732,119 +695,6 @@ static void exit_mm(struct task_struct * tsk) | |||
732 | } | 695 | } |
733 | 696 | ||
734 | /* | 697 | /* |
735 | * Return nonzero if @parent's children should reap themselves. | ||
736 | * | ||
737 | * Called with write_lock_irq(&tasklist_lock) held. | ||
738 | */ | ||
739 | static int ignoring_children(struct task_struct *parent) | ||
740 | { | ||
741 | int ret; | ||
742 | struct sighand_struct *psig = parent->sighand; | ||
743 | unsigned long flags; | ||
744 | spin_lock_irqsave(&psig->siglock, flags); | ||
745 | ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || | ||
746 | (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); | ||
747 | spin_unlock_irqrestore(&psig->siglock, flags); | ||
748 | return ret; | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * Detach all tasks we were using ptrace on. | ||
753 | * Any that need to be release_task'd are put on the @dead list. | ||
754 | * | ||
755 | * Called with write_lock(&tasklist_lock) held. | ||
756 | */ | ||
757 | static void ptrace_exit(struct task_struct *parent, struct list_head *dead) | ||
758 | { | ||
759 | struct task_struct *p, *n; | ||
760 | int ign = -1; | ||
761 | |||
762 | list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { | ||
763 | __ptrace_unlink(p); | ||
764 | |||
765 | if (p->exit_state != EXIT_ZOMBIE) | ||
766 | continue; | ||
767 | |||
768 | /* | ||
769 | * If it's a zombie, our attachedness prevented normal | ||
770 | * parent notification or self-reaping. Do notification | ||
771 | * now if it would have happened earlier. If it should | ||
772 | * reap itself, add it to the @dead list. We can't call | ||
773 | * release_task() here because we already hold tasklist_lock. | ||
774 | * | ||
775 | * If it's our own child, there is no notification to do. | ||
776 | * But if our normal children self-reap, then this child | ||
777 | * was prevented by ptrace and we must reap it now. | ||
778 | */ | ||
779 | if (!task_detached(p) && thread_group_empty(p)) { | ||
780 | if (!same_thread_group(p->real_parent, parent)) | ||
781 | do_notify_parent(p, p->exit_signal); | ||
782 | else { | ||
783 | if (ign < 0) | ||
784 | ign = ignoring_children(parent); | ||
785 | if (ign) | ||
786 | p->exit_signal = -1; | ||
787 | } | ||
788 | } | ||
789 | |||
790 | if (task_detached(p)) { | ||
791 | /* | ||
792 | * Mark it as in the process of being reaped. | ||
793 | */ | ||
794 | p->exit_state = EXIT_DEAD; | ||
795 | list_add(&p->ptrace_entry, dead); | ||
796 | } | ||
797 | } | ||
798 | } | ||
799 | |||
800 | /* | ||
801 | * Finish up exit-time ptrace cleanup. | ||
802 | * | ||
803 | * Called without locks. | ||
804 | */ | ||
805 | static void ptrace_exit_finish(struct task_struct *parent, | ||
806 | struct list_head *dead) | ||
807 | { | ||
808 | struct task_struct *p, *n; | ||
809 | |||
810 | BUG_ON(!list_empty(&parent->ptraced)); | ||
811 | |||
812 | list_for_each_entry_safe(p, n, dead, ptrace_entry) { | ||
813 | list_del_init(&p->ptrace_entry); | ||
814 | release_task(p); | ||
815 | } | ||
816 | } | ||
817 | |||
818 | static void reparent_thread(struct task_struct *p, struct task_struct *father) | ||
819 | { | ||
820 | if (p->pdeath_signal) | ||
821 | /* We already hold the tasklist_lock here. */ | ||
822 | group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); | ||
823 | |||
824 | list_move_tail(&p->sibling, &p->real_parent->children); | ||
825 | |||
826 | /* If this is a threaded reparent there is no need to | ||
827 | * notify anyone anything has happened. | ||
828 | */ | ||
829 | if (same_thread_group(p->real_parent, father)) | ||
830 | return; | ||
831 | |||
832 | /* We don't want people slaying init. */ | ||
833 | if (!task_detached(p)) | ||
834 | p->exit_signal = SIGCHLD; | ||
835 | |||
836 | /* If we'd notified the old parent about this child's death, | ||
837 | * also notify the new parent. | ||
838 | */ | ||
839 | if (!ptrace_reparented(p) && | ||
840 | p->exit_state == EXIT_ZOMBIE && | ||
841 | !task_detached(p) && thread_group_empty(p)) | ||
842 | do_notify_parent(p, p->exit_signal); | ||
843 | |||
844 | kill_orphaned_pgrp(p, father); | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * When we die, we re-parent all our children. | 698 | * When we die, we re-parent all our children. |
849 | * Try to give them to another thread in our thread | 699 | * Try to give them to another thread in our thread |
850 | * group, and if no such member exists, give it to | 700 | * group, and if no such member exists, give it to |
@@ -883,31 +733,68 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
883 | return pid_ns->child_reaper; | 733 | return pid_ns->child_reaper; |
884 | } | 734 | } |
885 | 735 | ||
736 | /* | ||
737 | * Any that need to be release_task'd are put on the @dead list. | ||
738 | */ | ||
739 | static void reparent_thread(struct task_struct *father, struct task_struct *p, | ||
740 | struct list_head *dead) | ||
741 | { | ||
742 | if (p->pdeath_signal) | ||
743 | group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); | ||
744 | |||
745 | list_move_tail(&p->sibling, &p->real_parent->children); | ||
746 | |||
747 | if (task_detached(p)) | ||
748 | return; | ||
749 | /* | ||
750 | * If this is a threaded reparent there is no need to | ||
751 | * notify anyone anything has happened. | ||
752 | */ | ||
753 | if (same_thread_group(p->real_parent, father)) | ||
754 | return; | ||
755 | |||
756 | /* We don't want people slaying init. */ | ||
757 | p->exit_signal = SIGCHLD; | ||
758 | |||
759 | /* If it has exited notify the new parent about this child's death. */ | ||
760 | if (!task_ptrace(p) && | ||
761 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { | ||
762 | do_notify_parent(p, p->exit_signal); | ||
763 | if (task_detached(p)) { | ||
764 | p->exit_state = EXIT_DEAD; | ||
765 | list_move_tail(&p->sibling, dead); | ||
766 | } | ||
767 | } | ||
768 | |||
769 | kill_orphaned_pgrp(p, father); | ||
770 | } | ||
771 | |||
886 | static void forget_original_parent(struct task_struct *father) | 772 | static void forget_original_parent(struct task_struct *father) |
887 | { | 773 | { |
888 | struct task_struct *p, *n, *reaper; | 774 | struct task_struct *p, *n, *reaper; |
889 | LIST_HEAD(ptrace_dead); | 775 | LIST_HEAD(dead_children); |
776 | |||
777 | exit_ptrace(father); | ||
890 | 778 | ||
891 | write_lock_irq(&tasklist_lock); | 779 | write_lock_irq(&tasklist_lock); |
892 | reaper = find_new_reaper(father); | 780 | reaper = find_new_reaper(father); |
893 | /* | ||
894 | * First clean up ptrace if we were using it. | ||
895 | */ | ||
896 | ptrace_exit(father, &ptrace_dead); | ||
897 | 781 | ||
898 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 782 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
899 | p->real_parent = reaper; | 783 | p->real_parent = reaper; |
900 | if (p->parent == father) { | 784 | if (p->parent == father) { |
901 | BUG_ON(p->ptrace); | 785 | BUG_ON(task_ptrace(p)); |
902 | p->parent = p->real_parent; | 786 | p->parent = p->real_parent; |
903 | } | 787 | } |
904 | reparent_thread(p, father); | 788 | reparent_thread(father, p, &dead_children); |
905 | } | 789 | } |
906 | |||
907 | write_unlock_irq(&tasklist_lock); | 790 | write_unlock_irq(&tasklist_lock); |
791 | |||
908 | BUG_ON(!list_empty(&father->children)); | 792 | BUG_ON(!list_empty(&father->children)); |
909 | 793 | ||
910 | ptrace_exit_finish(father, &ptrace_dead); | 794 | list_for_each_entry_safe(p, n, &dead_children, sibling) { |
795 | list_del_init(&p->sibling); | ||
796 | release_task(p); | ||
797 | } | ||
911 | } | 798 | } |
912 | 799 | ||
913 | /* | 800 | /* |
@@ -950,8 +837,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
950 | */ | 837 | */ |
951 | if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && | 838 | if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && |
952 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || | 839 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || |
953 | tsk->self_exec_id != tsk->parent_exec_id) && | 840 | tsk->self_exec_id != tsk->parent_exec_id)) |
954 | !capable(CAP_KILL)) | ||
955 | tsk->exit_signal = SIGCHLD; | 841 | tsk->exit_signal = SIGCHLD; |
956 | 842 | ||
957 | signal = tracehook_notify_death(tsk, &cookie, group_dead); | 843 | signal = tracehook_notify_death(tsk, &cookie, group_dead); |
@@ -1016,6 +902,8 @@ NORET_TYPE void do_exit(long code) | |||
1016 | 902 | ||
1017 | tracehook_report_exit(&code); | 903 | tracehook_report_exit(&code); |
1018 | 904 | ||
905 | validate_creds_for_do_exit(tsk); | ||
906 | |||
1019 | /* | 907 | /* |
1020 | * We're taking recursive faults here in do_exit. Safest is to just | 908 | * We're taking recursive faults here in do_exit. Safest is to just |
1021 | * leave this task alone and wait for reboot. | 909 | * leave this task alone and wait for reboot. |
@@ -1037,6 +925,8 @@ NORET_TYPE void do_exit(long code) | |||
1037 | schedule(); | 925 | schedule(); |
1038 | } | 926 | } |
1039 | 927 | ||
928 | exit_irq_thread(); | ||
929 | |||
1040 | exit_signals(tsk); /* sets PF_EXITING */ | 930 | exit_signals(tsk); /* sets PF_EXITING */ |
1041 | /* | 931 | /* |
1042 | * tsk->flags are checked in the futex code to protect against | 932 | * tsk->flags are checked in the futex code to protect against |
@@ -1056,6 +946,8 @@ NORET_TYPE void do_exit(long code) | |||
1056 | if (group_dead) { | 946 | if (group_dead) { |
1057 | hrtimer_cancel(&tsk->signal->real_timer); | 947 | hrtimer_cancel(&tsk->signal->real_timer); |
1058 | exit_itimers(tsk->signal); | 948 | exit_itimers(tsk->signal); |
949 | if (tsk->mm) | ||
950 | setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); | ||
1059 | } | 951 | } |
1060 | acct_collect(code, group_dead); | 952 | acct_collect(code, group_dead); |
1061 | if (group_dead) | 953 | if (group_dead) |
@@ -1083,22 +975,25 @@ NORET_TYPE void do_exit(long code) | |||
1083 | disassociate_ctty(1); | 975 | disassociate_ctty(1); |
1084 | 976 | ||
1085 | module_put(task_thread_info(tsk)->exec_domain->module); | 977 | module_put(task_thread_info(tsk)->exec_domain->module); |
1086 | if (tsk->binfmt) | ||
1087 | module_put(tsk->binfmt->module); | ||
1088 | 978 | ||
1089 | proc_exit_connector(tsk); | 979 | proc_exit_connector(tsk); |
980 | |||
981 | /* | ||
982 | * FIXME: do that only when needed, using sched_exit tracepoint | ||
983 | */ | ||
984 | flush_ptrace_hw_breakpoint(tsk); | ||
985 | /* | ||
986 | * Flush inherited counters to the parent - before the parent | ||
987 | * gets woken up by child-exit notifications. | ||
988 | */ | ||
989 | perf_event_exit_task(tsk); | ||
990 | |||
1090 | exit_notify(tsk, group_dead); | 991 | exit_notify(tsk, group_dead); |
1091 | #ifdef CONFIG_NUMA | 992 | #ifdef CONFIG_NUMA |
1092 | mpol_put(tsk->mempolicy); | 993 | mpol_put(tsk->mempolicy); |
1093 | tsk->mempolicy = NULL; | 994 | tsk->mempolicy = NULL; |
1094 | #endif | 995 | #endif |
1095 | #ifdef CONFIG_FUTEX | 996 | #ifdef CONFIG_FUTEX |
1096 | /* | ||
1097 | * This must happen late, after the PID is not | ||
1098 | * hashed anymore: | ||
1099 | */ | ||
1100 | if (unlikely(!list_empty(&tsk->pi_state_list))) | ||
1101 | exit_pi_state_list(tsk); | ||
1102 | if (unlikely(current->pi_state_cache)) | 997 | if (unlikely(current->pi_state_cache)) |
1103 | kfree(current->pi_state_cache); | 998 | kfree(current->pi_state_cache); |
1104 | #endif | 999 | #endif |
@@ -1114,12 +1009,15 @@ NORET_TYPE void do_exit(long code) | |||
1114 | tsk->flags |= PF_EXITPIDONE; | 1009 | tsk->flags |= PF_EXITPIDONE; |
1115 | 1010 | ||
1116 | if (tsk->io_context) | 1011 | if (tsk->io_context) |
1117 | exit_io_context(); | 1012 | exit_io_context(tsk); |
1118 | 1013 | ||
1119 | if (tsk->splice_pipe) | 1014 | if (tsk->splice_pipe) |
1120 | __free_pipe_info(tsk->splice_pipe); | 1015 | __free_pipe_info(tsk->splice_pipe); |
1121 | 1016 | ||
1017 | validate_creds_for_do_exit(tsk); | ||
1018 | |||
1122 | preempt_disable(); | 1019 | preempt_disable(); |
1020 | exit_rcu(); | ||
1123 | /* causes final put_task_struct in finish_task_switch(). */ | 1021 | /* causes final put_task_struct in finish_task_switch(). */ |
1124 | tsk->state = TASK_DEAD; | 1022 | tsk->state = TASK_DEAD; |
1125 | schedule(); | 1023 | schedule(); |
@@ -1189,62 +1087,72 @@ SYSCALL_DEFINE1(exit_group, int, error_code) | |||
1189 | return 0; | 1087 | return 0; |
1190 | } | 1088 | } |
1191 | 1089 | ||
1192 | static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | 1090 | struct wait_opts { |
1091 | enum pid_type wo_type; | ||
1092 | int wo_flags; | ||
1093 | struct pid *wo_pid; | ||
1094 | |||
1095 | struct siginfo __user *wo_info; | ||
1096 | int __user *wo_stat; | ||
1097 | struct rusage __user *wo_rusage; | ||
1098 | |||
1099 | wait_queue_t child_wait; | ||
1100 | int notask_error; | ||
1101 | }; | ||
1102 | |||
1103 | static inline | ||
1104 | struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | ||
1193 | { | 1105 | { |
1194 | struct pid *pid = NULL; | 1106 | if (type != PIDTYPE_PID) |
1195 | if (type == PIDTYPE_PID) | 1107 | task = task->group_leader; |
1196 | pid = task->pids[type].pid; | 1108 | return task->pids[type].pid; |
1197 | else if (type < PIDTYPE_MAX) | ||
1198 | pid = task->group_leader->pids[type].pid; | ||
1199 | return pid; | ||
1200 | } | 1109 | } |
1201 | 1110 | ||
1202 | static int eligible_child(enum pid_type type, struct pid *pid, int options, | 1111 | static int eligible_pid(struct wait_opts *wo, struct task_struct *p) |
1203 | struct task_struct *p) | ||
1204 | { | 1112 | { |
1205 | int err; | 1113 | return wo->wo_type == PIDTYPE_MAX || |
1206 | 1114 | task_pid_type(p, wo->wo_type) == wo->wo_pid; | |
1207 | if (type < PIDTYPE_MAX) { | 1115 | } |
1208 | if (task_pid_type(p, type) != pid) | ||
1209 | return 0; | ||
1210 | } | ||
1211 | 1116 | ||
1117 | static int eligible_child(struct wait_opts *wo, struct task_struct *p) | ||
1118 | { | ||
1119 | if (!eligible_pid(wo, p)) | ||
1120 | return 0; | ||
1212 | /* Wait for all children (clone and not) if __WALL is set; | 1121 | /* Wait for all children (clone and not) if __WALL is set; |
1213 | * otherwise, wait for clone children *only* if __WCLONE is | 1122 | * otherwise, wait for clone children *only* if __WCLONE is |
1214 | * set; otherwise, wait for non-clone children *only*. (Note: | 1123 | * set; otherwise, wait for non-clone children *only*. (Note: |
1215 | * A "clone" child here is one that reports to its parent | 1124 | * A "clone" child here is one that reports to its parent |
1216 | * using a signal other than SIGCHLD.) */ | 1125 | * using a signal other than SIGCHLD.) */ |
1217 | if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) | 1126 | if (((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE)) |
1218 | && !(options & __WALL)) | 1127 | && !(wo->wo_flags & __WALL)) |
1219 | return 0; | 1128 | return 0; |
1220 | 1129 | ||
1221 | err = security_task_wait(p); | ||
1222 | if (err) | ||
1223 | return err; | ||
1224 | |||
1225 | return 1; | 1130 | return 1; |
1226 | } | 1131 | } |
1227 | 1132 | ||
1228 | static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | 1133 | static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, |
1229 | int why, int status, | 1134 | pid_t pid, uid_t uid, int why, int status) |
1230 | struct siginfo __user *infop, | ||
1231 | struct rusage __user *rusagep) | ||
1232 | { | 1135 | { |
1233 | int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; | 1136 | struct siginfo __user *infop; |
1137 | int retval = wo->wo_rusage | ||
1138 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1234 | 1139 | ||
1235 | put_task_struct(p); | 1140 | put_task_struct(p); |
1236 | if (!retval) | 1141 | infop = wo->wo_info; |
1237 | retval = put_user(SIGCHLD, &infop->si_signo); | 1142 | if (infop) { |
1238 | if (!retval) | 1143 | if (!retval) |
1239 | retval = put_user(0, &infop->si_errno); | 1144 | retval = put_user(SIGCHLD, &infop->si_signo); |
1240 | if (!retval) | 1145 | if (!retval) |
1241 | retval = put_user((short)why, &infop->si_code); | 1146 | retval = put_user(0, &infop->si_errno); |
1242 | if (!retval) | 1147 | if (!retval) |
1243 | retval = put_user(pid, &infop->si_pid); | 1148 | retval = put_user((short)why, &infop->si_code); |
1244 | if (!retval) | 1149 | if (!retval) |
1245 | retval = put_user(uid, &infop->si_uid); | 1150 | retval = put_user(pid, &infop->si_pid); |
1246 | if (!retval) | 1151 | if (!retval) |
1247 | retval = put_user(status, &infop->si_status); | 1152 | retval = put_user(uid, &infop->si_uid); |
1153 | if (!retval) | ||
1154 | retval = put_user(status, &infop->si_status); | ||
1155 | } | ||
1248 | if (!retval) | 1156 | if (!retval) |
1249 | retval = pid; | 1157 | retval = pid; |
1250 | return retval; | 1158 | return retval; |
@@ -1256,19 +1164,18 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, | |||
1256 | * the lock and this task is uninteresting. If we return nonzero, we have | 1164 | * the lock and this task is uninteresting. If we return nonzero, we have |
1257 | * released the lock and the system call should return. | 1165 | * released the lock and the system call should return. |
1258 | */ | 1166 | */ |
1259 | static int wait_task_zombie(struct task_struct *p, int options, | 1167 | static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) |
1260 | struct siginfo __user *infop, | ||
1261 | int __user *stat_addr, struct rusage __user *ru) | ||
1262 | { | 1168 | { |
1263 | unsigned long state; | 1169 | unsigned long state; |
1264 | int retval, status, traced; | 1170 | int retval, status, traced; |
1265 | pid_t pid = task_pid_vnr(p); | 1171 | pid_t pid = task_pid_vnr(p); |
1266 | uid_t uid = __task_cred(p)->uid; | 1172 | uid_t uid = __task_cred(p)->uid; |
1173 | struct siginfo __user *infop; | ||
1267 | 1174 | ||
1268 | if (!likely(options & WEXITED)) | 1175 | if (!likely(wo->wo_flags & WEXITED)) |
1269 | return 0; | 1176 | return 0; |
1270 | 1177 | ||
1271 | if (unlikely(options & WNOWAIT)) { | 1178 | if (unlikely(wo->wo_flags & WNOWAIT)) { |
1272 | int exit_code = p->exit_code; | 1179 | int exit_code = p->exit_code; |
1273 | int why, status; | 1180 | int why, status; |
1274 | 1181 | ||
@@ -1281,8 +1188,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1281 | why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; | 1188 | why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; |
1282 | status = exit_code & 0x7f; | 1189 | status = exit_code & 0x7f; |
1283 | } | 1190 | } |
1284 | return wait_noreap_copyout(p, pid, uid, why, | 1191 | return wait_noreap_copyout(wo, p, pid, uid, why, status); |
1285 | status, infop, ru); | ||
1286 | } | 1192 | } |
1287 | 1193 | ||
1288 | /* | 1194 | /* |
@@ -1296,11 +1202,15 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1296 | } | 1202 | } |
1297 | 1203 | ||
1298 | traced = ptrace_reparented(p); | 1204 | traced = ptrace_reparented(p); |
1299 | 1205 | /* | |
1300 | if (likely(!traced)) { | 1206 | * It can be ptraced but not reparented, check |
1207 | * !task_detached() to filter out sub-threads. | ||
1208 | */ | ||
1209 | if (likely(!traced) && likely(!task_detached(p))) { | ||
1301 | struct signal_struct *psig; | 1210 | struct signal_struct *psig; |
1302 | struct signal_struct *sig; | 1211 | struct signal_struct *sig; |
1303 | struct task_cputime cputime; | 1212 | unsigned long maxrss; |
1213 | cputime_t tgutime, tgstime; | ||
1304 | 1214 | ||
1305 | /* | 1215 | /* |
1306 | * The resource counters for the group leader are in its | 1216 | * The resource counters for the group leader are in its |
@@ -1313,25 +1223,25 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1313 | * p->signal fields, because they are only touched by | 1223 | * p->signal fields, because they are only touched by |
1314 | * __exit_signal, which runs with tasklist_lock | 1224 | * __exit_signal, which runs with tasklist_lock |
1315 | * write-locked anyway, and so is excluded here. We do | 1225 | * write-locked anyway, and so is excluded here. We do |
1316 | * need to protect the access to p->parent->signal fields, | 1226 | * need to protect the access to parent->signal fields, |
1317 | * as other threads in the parent group can be right | 1227 | * as other threads in the parent group can be right |
1318 | * here reaping other children at the same time. | 1228 | * here reaping other children at the same time. |
1319 | * | 1229 | * |
1320 | * We use thread_group_cputime() to get times for the thread | 1230 | * We use thread_group_times() to get times for the thread |
1321 | * group, which consolidates times for all threads in the | 1231 | * group, which consolidates times for all threads in the |
1322 | * group including the group leader. | 1232 | * group including the group leader. |
1323 | */ | 1233 | */ |
1324 | thread_group_cputime(p, &cputime); | 1234 | thread_group_times(p, &tgutime, &tgstime); |
1325 | spin_lock_irq(&p->parent->sighand->siglock); | 1235 | spin_lock_irq(&p->real_parent->sighand->siglock); |
1326 | psig = p->parent->signal; | 1236 | psig = p->real_parent->signal; |
1327 | sig = p->signal; | 1237 | sig = p->signal; |
1328 | psig->cutime = | 1238 | psig->cutime = |
1329 | cputime_add(psig->cutime, | 1239 | cputime_add(psig->cutime, |
1330 | cputime_add(cputime.utime, | 1240 | cputime_add(tgutime, |
1331 | sig->cutime)); | 1241 | sig->cutime)); |
1332 | psig->cstime = | 1242 | psig->cstime = |
1333 | cputime_add(psig->cstime, | 1243 | cputime_add(psig->cstime, |
1334 | cputime_add(cputime.stime, | 1244 | cputime_add(tgstime, |
1335 | sig->cstime)); | 1245 | sig->cstime)); |
1336 | psig->cgtime = | 1246 | psig->cgtime = |
1337 | cputime_add(psig->cgtime, | 1247 | cputime_add(psig->cgtime, |
@@ -1352,9 +1262,12 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1352 | psig->coublock += | 1262 | psig->coublock += |
1353 | task_io_get_oublock(p) + | 1263 | task_io_get_oublock(p) + |
1354 | sig->oublock + sig->coublock; | 1264 | sig->oublock + sig->coublock; |
1265 | maxrss = max(sig->maxrss, sig->cmaxrss); | ||
1266 | if (psig->cmaxrss < maxrss) | ||
1267 | psig->cmaxrss = maxrss; | ||
1355 | task_io_accounting_add(&psig->ioac, &p->ioac); | 1268 | task_io_accounting_add(&psig->ioac, &p->ioac); |
1356 | task_io_accounting_add(&psig->ioac, &sig->ioac); | 1269 | task_io_accounting_add(&psig->ioac, &sig->ioac); |
1357 | spin_unlock_irq(&p->parent->sighand->siglock); | 1270 | spin_unlock_irq(&p->real_parent->sighand->siglock); |
1358 | } | 1271 | } |
1359 | 1272 | ||
1360 | /* | 1273 | /* |
@@ -1363,11 +1276,14 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1363 | */ | 1276 | */ |
1364 | read_unlock(&tasklist_lock); | 1277 | read_unlock(&tasklist_lock); |
1365 | 1278 | ||
1366 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1279 | retval = wo->wo_rusage |
1280 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1367 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) | 1281 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) |
1368 | ? p->signal->group_exit_code : p->exit_code; | 1282 | ? p->signal->group_exit_code : p->exit_code; |
1369 | if (!retval && stat_addr) | 1283 | if (!retval && wo->wo_stat) |
1370 | retval = put_user(status, stat_addr); | 1284 | retval = put_user(status, wo->wo_stat); |
1285 | |||
1286 | infop = wo->wo_info; | ||
1371 | if (!retval && infop) | 1287 | if (!retval && infop) |
1372 | retval = put_user(SIGCHLD, &infop->si_signo); | 1288 | retval = put_user(SIGCHLD, &infop->si_signo); |
1373 | if (!retval && infop) | 1289 | if (!retval && infop) |
@@ -1417,42 +1333,51 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1417 | return retval; | 1333 | return retval; |
1418 | } | 1334 | } |
1419 | 1335 | ||
1336 | static int *task_stopped_code(struct task_struct *p, bool ptrace) | ||
1337 | { | ||
1338 | if (ptrace) { | ||
1339 | if (task_is_stopped_or_traced(p)) | ||
1340 | return &p->exit_code; | ||
1341 | } else { | ||
1342 | if (p->signal->flags & SIGNAL_STOP_STOPPED) | ||
1343 | return &p->signal->group_exit_code; | ||
1344 | } | ||
1345 | return NULL; | ||
1346 | } | ||
1347 | |||
1420 | /* | 1348 | /* |
1421 | * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold | 1349 | * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold |
1422 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold | 1350 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold |
1423 | * the lock and this task is uninteresting. If we return nonzero, we have | 1351 | * the lock and this task is uninteresting. If we return nonzero, we have |
1424 | * released the lock and the system call should return. | 1352 | * released the lock and the system call should return. |
1425 | */ | 1353 | */ |
1426 | static int wait_task_stopped(int ptrace, struct task_struct *p, | 1354 | static int wait_task_stopped(struct wait_opts *wo, |
1427 | int options, struct siginfo __user *infop, | 1355 | int ptrace, struct task_struct *p) |
1428 | int __user *stat_addr, struct rusage __user *ru) | ||
1429 | { | 1356 | { |
1430 | int retval, exit_code, why; | 1357 | struct siginfo __user *infop; |
1358 | int retval, exit_code, *p_code, why; | ||
1431 | uid_t uid = 0; /* unneeded, required by compiler */ | 1359 | uid_t uid = 0; /* unneeded, required by compiler */ |
1432 | pid_t pid; | 1360 | pid_t pid; |
1433 | 1361 | ||
1434 | if (!(options & WUNTRACED)) | 1362 | /* |
1363 | * Traditionally we see ptrace'd stopped tasks regardless of options. | ||
1364 | */ | ||
1365 | if (!ptrace && !(wo->wo_flags & WUNTRACED)) | ||
1435 | return 0; | 1366 | return 0; |
1436 | 1367 | ||
1437 | exit_code = 0; | 1368 | exit_code = 0; |
1438 | spin_lock_irq(&p->sighand->siglock); | 1369 | spin_lock_irq(&p->sighand->siglock); |
1439 | 1370 | ||
1440 | if (unlikely(!task_is_stopped_or_traced(p))) | 1371 | p_code = task_stopped_code(p, ptrace); |
1441 | goto unlock_sig; | 1372 | if (unlikely(!p_code)) |
1442 | |||
1443 | if (!ptrace && p->signal->group_stop_count > 0) | ||
1444 | /* | ||
1445 | * A group stop is in progress and this is the group leader. | ||
1446 | * We won't report until all threads have stopped. | ||
1447 | */ | ||
1448 | goto unlock_sig; | 1373 | goto unlock_sig; |
1449 | 1374 | ||
1450 | exit_code = p->exit_code; | 1375 | exit_code = *p_code; |
1451 | if (!exit_code) | 1376 | if (!exit_code) |
1452 | goto unlock_sig; | 1377 | goto unlock_sig; |
1453 | 1378 | ||
1454 | if (!unlikely(options & WNOWAIT)) | 1379 | if (!unlikely(wo->wo_flags & WNOWAIT)) |
1455 | p->exit_code = 0; | 1380 | *p_code = 0; |
1456 | 1381 | ||
1457 | /* don't need the RCU readlock here as we're holding a spinlock */ | 1382 | /* don't need the RCU readlock here as we're holding a spinlock */ |
1458 | uid = __task_cred(p)->uid; | 1383 | uid = __task_cred(p)->uid; |
@@ -1473,14 +1398,15 @@ unlock_sig: | |||
1473 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; | 1398 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; |
1474 | read_unlock(&tasklist_lock); | 1399 | read_unlock(&tasklist_lock); |
1475 | 1400 | ||
1476 | if (unlikely(options & WNOWAIT)) | 1401 | if (unlikely(wo->wo_flags & WNOWAIT)) |
1477 | return wait_noreap_copyout(p, pid, uid, | 1402 | return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); |
1478 | why, exit_code, | 1403 | |
1479 | infop, ru); | 1404 | retval = wo->wo_rusage |
1405 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1406 | if (!retval && wo->wo_stat) | ||
1407 | retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); | ||
1480 | 1408 | ||
1481 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1409 | infop = wo->wo_info; |
1482 | if (!retval && stat_addr) | ||
1483 | retval = put_user((exit_code << 8) | 0x7f, stat_addr); | ||
1484 | if (!retval && infop) | 1410 | if (!retval && infop) |
1485 | retval = put_user(SIGCHLD, &infop->si_signo); | 1411 | retval = put_user(SIGCHLD, &infop->si_signo); |
1486 | if (!retval && infop) | 1412 | if (!retval && infop) |
@@ -1507,15 +1433,13 @@ unlock_sig: | |||
1507 | * the lock and this task is uninteresting. If we return nonzero, we have | 1433 | * the lock and this task is uninteresting. If we return nonzero, we have |
1508 | * released the lock and the system call should return. | 1434 | * released the lock and the system call should return. |
1509 | */ | 1435 | */ |
1510 | static int wait_task_continued(struct task_struct *p, int options, | 1436 | static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) |
1511 | struct siginfo __user *infop, | ||
1512 | int __user *stat_addr, struct rusage __user *ru) | ||
1513 | { | 1437 | { |
1514 | int retval; | 1438 | int retval; |
1515 | pid_t pid; | 1439 | pid_t pid; |
1516 | uid_t uid; | 1440 | uid_t uid; |
1517 | 1441 | ||
1518 | if (!unlikely(options & WCONTINUED)) | 1442 | if (!unlikely(wo->wo_flags & WCONTINUED)) |
1519 | return 0; | 1443 | return 0; |
1520 | 1444 | ||
1521 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) | 1445 | if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) |
@@ -1527,7 +1451,7 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1527 | spin_unlock_irq(&p->sighand->siglock); | 1451 | spin_unlock_irq(&p->sighand->siglock); |
1528 | return 0; | 1452 | return 0; |
1529 | } | 1453 | } |
1530 | if (!unlikely(options & WNOWAIT)) | 1454 | if (!unlikely(wo->wo_flags & WNOWAIT)) |
1531 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; | 1455 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; |
1532 | uid = __task_cred(p)->uid; | 1456 | uid = __task_cred(p)->uid; |
1533 | spin_unlock_irq(&p->sighand->siglock); | 1457 | spin_unlock_irq(&p->sighand->siglock); |
@@ -1536,17 +1460,17 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1536 | get_task_struct(p); | 1460 | get_task_struct(p); |
1537 | read_unlock(&tasklist_lock); | 1461 | read_unlock(&tasklist_lock); |
1538 | 1462 | ||
1539 | if (!infop) { | 1463 | if (!wo->wo_info) { |
1540 | retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; | 1464 | retval = wo->wo_rusage |
1465 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | ||
1541 | put_task_struct(p); | 1466 | put_task_struct(p); |
1542 | if (!retval && stat_addr) | 1467 | if (!retval && wo->wo_stat) |
1543 | retval = put_user(0xffff, stat_addr); | 1468 | retval = put_user(0xffff, wo->wo_stat); |
1544 | if (!retval) | 1469 | if (!retval) |
1545 | retval = pid; | 1470 | retval = pid; |
1546 | } else { | 1471 | } else { |
1547 | retval = wait_noreap_copyout(p, pid, uid, | 1472 | retval = wait_noreap_copyout(wo, p, pid, uid, |
1548 | CLD_CONTINUED, SIGCONT, | 1473 | CLD_CONTINUED, SIGCONT); |
1549 | infop, ru); | ||
1550 | BUG_ON(retval == 0); | 1474 | BUG_ON(retval == 0); |
1551 | } | 1475 | } |
1552 | 1476 | ||
@@ -1556,22 +1480,20 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1556 | /* | 1480 | /* |
1557 | * Consider @p for a wait by @parent. | 1481 | * Consider @p for a wait by @parent. |
1558 | * | 1482 | * |
1559 | * -ECHILD should be in *@notask_error before the first call. | 1483 | * -ECHILD should be in ->notask_error before the first call. |
1560 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | 1484 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. |
1561 | * Returns zero if the search for a child should continue; | 1485 | * Returns zero if the search for a child should continue; |
1562 | * then *@notask_error is 0 if @p is an eligible child, | 1486 | * then ->notask_error is 0 if @p is an eligible child, |
1563 | * or another error from security_task_wait(), or still -ECHILD. | 1487 | * or another error from security_task_wait(), or still -ECHILD. |
1564 | */ | 1488 | */ |
1565 | static int wait_consider_task(struct task_struct *parent, int ptrace, | 1489 | static int wait_consider_task(struct wait_opts *wo, int ptrace, |
1566 | struct task_struct *p, int *notask_error, | 1490 | struct task_struct *p) |
1567 | enum pid_type type, struct pid *pid, int options, | ||
1568 | struct siginfo __user *infop, | ||
1569 | int __user *stat_addr, struct rusage __user *ru) | ||
1570 | { | 1491 | { |
1571 | int ret = eligible_child(type, pid, options, p); | 1492 | int ret = eligible_child(wo, p); |
1572 | if (!ret) | 1493 | if (!ret) |
1573 | return ret; | 1494 | return ret; |
1574 | 1495 | ||
1496 | ret = security_task_wait(p); | ||
1575 | if (unlikely(ret < 0)) { | 1497 | if (unlikely(ret < 0)) { |
1576 | /* | 1498 | /* |
1577 | * If we have not yet seen any eligible child, | 1499 | * If we have not yet seen any eligible child, |
@@ -1580,16 +1502,17 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, | |||
1580 | * to look for security policy problems, rather | 1502 | * to look for security policy problems, rather |
1581 | * than for mysterious wait bugs. | 1503 | * than for mysterious wait bugs. |
1582 | */ | 1504 | */ |
1583 | if (*notask_error) | 1505 | if (wo->notask_error) |
1584 | *notask_error = ret; | 1506 | wo->notask_error = ret; |
1507 | return 0; | ||
1585 | } | 1508 | } |
1586 | 1509 | ||
1587 | if (likely(!ptrace) && unlikely(p->ptrace)) { | 1510 | if (likely(!ptrace) && unlikely(task_ptrace(p))) { |
1588 | /* | 1511 | /* |
1589 | * This child is hidden by ptrace. | 1512 | * This child is hidden by ptrace. |
1590 | * We aren't allowed to see it now, but eventually we will. | 1513 | * We aren't allowed to see it now, but eventually we will. |
1591 | */ | 1514 | */ |
1592 | *notask_error = 0; | 1515 | wo->notask_error = 0; |
1593 | return 0; | 1516 | return 0; |
1594 | } | 1517 | } |
1595 | 1518 | ||
@@ -1600,34 +1523,30 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, | |||
1600 | * We don't reap group leaders with subthreads. | 1523 | * We don't reap group leaders with subthreads. |
1601 | */ | 1524 | */ |
1602 | if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) | 1525 | if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) |
1603 | return wait_task_zombie(p, options, infop, stat_addr, ru); | 1526 | return wait_task_zombie(wo, p); |
1604 | 1527 | ||
1605 | /* | 1528 | /* |
1606 | * It's stopped or running now, so it might | 1529 | * It's stopped or running now, so it might |
1607 | * later continue, exit, or stop again. | 1530 | * later continue, exit, or stop again. |
1608 | */ | 1531 | */ |
1609 | *notask_error = 0; | 1532 | wo->notask_error = 0; |
1610 | 1533 | ||
1611 | if (task_is_stopped_or_traced(p)) | 1534 | if (task_stopped_code(p, ptrace)) |
1612 | return wait_task_stopped(ptrace, p, options, | 1535 | return wait_task_stopped(wo, ptrace, p); |
1613 | infop, stat_addr, ru); | ||
1614 | 1536 | ||
1615 | return wait_task_continued(p, options, infop, stat_addr, ru); | 1537 | return wait_task_continued(wo, p); |
1616 | } | 1538 | } |
1617 | 1539 | ||
1618 | /* | 1540 | /* |
1619 | * Do the work of do_wait() for one thread in the group, @tsk. | 1541 | * Do the work of do_wait() for one thread in the group, @tsk. |
1620 | * | 1542 | * |
1621 | * -ECHILD should be in *@notask_error before the first call. | 1543 | * -ECHILD should be in ->notask_error before the first call. |
1622 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. | 1544 | * Returns nonzero for a final return, when we have unlocked tasklist_lock. |
1623 | * Returns zero if the search for a child should continue; then | 1545 | * Returns zero if the search for a child should continue; then |
1624 | * *@notask_error is 0 if there were any eligible children, | 1546 | * ->notask_error is 0 if there were any eligible children, |
1625 | * or another error from security_task_wait(), or still -ECHILD. | 1547 | * or another error from security_task_wait(), or still -ECHILD. |
1626 | */ | 1548 | */ |
1627 | static int do_wait_thread(struct task_struct *tsk, int *notask_error, | 1549 | static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) |
1628 | enum pid_type type, struct pid *pid, int options, | ||
1629 | struct siginfo __user *infop, int __user *stat_addr, | ||
1630 | struct rusage __user *ru) | ||
1631 | { | 1550 | { |
1632 | struct task_struct *p; | 1551 | struct task_struct *p; |
1633 | 1552 | ||
@@ -1636,9 +1555,7 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error, | |||
1636 | * Do not consider detached threads. | 1555 | * Do not consider detached threads. |
1637 | */ | 1556 | */ |
1638 | if (!task_detached(p)) { | 1557 | if (!task_detached(p)) { |
1639 | int ret = wait_consider_task(tsk, 0, p, notask_error, | 1558 | int ret = wait_consider_task(wo, 0, p); |
1640 | type, pid, options, | ||
1641 | infop, stat_addr, ru); | ||
1642 | if (ret) | 1559 | if (ret) |
1643 | return ret; | 1560 | return ret; |
1644 | } | 1561 | } |
@@ -1647,22 +1564,12 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error, | |||
1647 | return 0; | 1564 | return 0; |
1648 | } | 1565 | } |
1649 | 1566 | ||
1650 | static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, | 1567 | static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) |
1651 | enum pid_type type, struct pid *pid, int options, | ||
1652 | struct siginfo __user *infop, int __user *stat_addr, | ||
1653 | struct rusage __user *ru) | ||
1654 | { | 1568 | { |
1655 | struct task_struct *p; | 1569 | struct task_struct *p; |
1656 | 1570 | ||
1657 | /* | ||
1658 | * Traditionally we see ptrace'd stopped tasks regardless of options. | ||
1659 | */ | ||
1660 | options |= WUNTRACED; | ||
1661 | |||
1662 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { | 1571 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { |
1663 | int ret = wait_consider_task(tsk, 1, p, notask_error, | 1572 | int ret = wait_consider_task(wo, 1, p); |
1664 | type, pid, options, | ||
1665 | infop, stat_addr, ru); | ||
1666 | if (ret) | 1573 | if (ret) |
1667 | return ret; | 1574 | return ret; |
1668 | } | 1575 | } |
@@ -1670,93 +1577,86 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, | |||
1670 | return 0; | 1577 | return 0; |
1671 | } | 1578 | } |
1672 | 1579 | ||
1673 | static long do_wait(enum pid_type type, struct pid *pid, int options, | 1580 | static int child_wait_callback(wait_queue_t *wait, unsigned mode, |
1674 | struct siginfo __user *infop, int __user *stat_addr, | 1581 | int sync, void *key) |
1675 | struct rusage __user *ru) | 1582 | { |
1583 | struct wait_opts *wo = container_of(wait, struct wait_opts, | ||
1584 | child_wait); | ||
1585 | struct task_struct *p = key; | ||
1586 | |||
1587 | if (!eligible_pid(wo, p)) | ||
1588 | return 0; | ||
1589 | |||
1590 | if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) | ||
1591 | return 0; | ||
1592 | |||
1593 | return default_wake_function(wait, mode, sync, key); | ||
1594 | } | ||
1595 | |||
1596 | void __wake_up_parent(struct task_struct *p, struct task_struct *parent) | ||
1597 | { | ||
1598 | __wake_up_sync_key(&parent->signal->wait_chldexit, | ||
1599 | TASK_INTERRUPTIBLE, 1, p); | ||
1600 | } | ||
1601 | |||
1602 | static long do_wait(struct wait_opts *wo) | ||
1676 | { | 1603 | { |
1677 | DECLARE_WAITQUEUE(wait, current); | ||
1678 | struct task_struct *tsk; | 1604 | struct task_struct *tsk; |
1679 | int retval; | 1605 | int retval; |
1680 | 1606 | ||
1681 | trace_sched_process_wait(pid); | 1607 | trace_sched_process_wait(wo->wo_pid); |
1682 | 1608 | ||
1683 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1609 | init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); |
1610 | wo->child_wait.private = current; | ||
1611 | add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); | ||
1684 | repeat: | 1612 | repeat: |
1685 | /* | 1613 | /* |
1686 | * If there is nothing that can match our critiera just get out. | 1614 | * If there is nothing that can match our critiera just get out. |
1687 | * We will clear @retval to zero if we see any child that might later | 1615 | * We will clear ->notask_error to zero if we see any child that |
1688 | * match our criteria, even if we are not able to reap it yet. | 1616 | * might later match our criteria, even if we are not able to reap |
1617 | * it yet. | ||
1689 | */ | 1618 | */ |
1690 | retval = -ECHILD; | 1619 | wo->notask_error = -ECHILD; |
1691 | if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) | 1620 | if ((wo->wo_type < PIDTYPE_MAX) && |
1692 | goto end; | 1621 | (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) |
1622 | goto notask; | ||
1693 | 1623 | ||
1694 | current->state = TASK_INTERRUPTIBLE; | 1624 | set_current_state(TASK_INTERRUPTIBLE); |
1695 | read_lock(&tasklist_lock); | 1625 | read_lock(&tasklist_lock); |
1696 | tsk = current; | 1626 | tsk = current; |
1697 | do { | 1627 | do { |
1698 | int tsk_result = do_wait_thread(tsk, &retval, | 1628 | retval = do_wait_thread(wo, tsk); |
1699 | type, pid, options, | 1629 | if (retval) |
1700 | infop, stat_addr, ru); | ||
1701 | if (!tsk_result) | ||
1702 | tsk_result = ptrace_do_wait(tsk, &retval, | ||
1703 | type, pid, options, | ||
1704 | infop, stat_addr, ru); | ||
1705 | if (tsk_result) { | ||
1706 | /* | ||
1707 | * tasklist_lock is unlocked and we have a final result. | ||
1708 | */ | ||
1709 | retval = tsk_result; | ||
1710 | goto end; | 1630 | goto end; |
1711 | } | ||
1712 | 1631 | ||
1713 | if (options & __WNOTHREAD) | 1632 | retval = ptrace_do_wait(wo, tsk); |
1633 | if (retval) | ||
1634 | goto end; | ||
1635 | |||
1636 | if (wo->wo_flags & __WNOTHREAD) | ||
1714 | break; | 1637 | break; |
1715 | tsk = next_thread(tsk); | 1638 | } while_each_thread(current, tsk); |
1716 | BUG_ON(tsk->signal != current->signal); | ||
1717 | } while (tsk != current); | ||
1718 | read_unlock(&tasklist_lock); | 1639 | read_unlock(&tasklist_lock); |
1719 | 1640 | ||
1720 | if (!retval && !(options & WNOHANG)) { | 1641 | notask: |
1642 | retval = wo->notask_error; | ||
1643 | if (!retval && !(wo->wo_flags & WNOHANG)) { | ||
1721 | retval = -ERESTARTSYS; | 1644 | retval = -ERESTARTSYS; |
1722 | if (!signal_pending(current)) { | 1645 | if (!signal_pending(current)) { |
1723 | schedule(); | 1646 | schedule(); |
1724 | goto repeat; | 1647 | goto repeat; |
1725 | } | 1648 | } |
1726 | } | 1649 | } |
1727 | |||
1728 | end: | 1650 | end: |
1729 | current->state = TASK_RUNNING; | 1651 | __set_current_state(TASK_RUNNING); |
1730 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); | 1652 | remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); |
1731 | if (infop) { | ||
1732 | if (retval > 0) | ||
1733 | retval = 0; | ||
1734 | else { | ||
1735 | /* | ||
1736 | * For a WNOHANG return, clear out all the fields | ||
1737 | * we would set so the user can easily tell the | ||
1738 | * difference. | ||
1739 | */ | ||
1740 | if (!retval) | ||
1741 | retval = put_user(0, &infop->si_signo); | ||
1742 | if (!retval) | ||
1743 | retval = put_user(0, &infop->si_errno); | ||
1744 | if (!retval) | ||
1745 | retval = put_user(0, &infop->si_code); | ||
1746 | if (!retval) | ||
1747 | retval = put_user(0, &infop->si_pid); | ||
1748 | if (!retval) | ||
1749 | retval = put_user(0, &infop->si_uid); | ||
1750 | if (!retval) | ||
1751 | retval = put_user(0, &infop->si_status); | ||
1752 | } | ||
1753 | } | ||
1754 | return retval; | 1653 | return retval; |
1755 | } | 1654 | } |
1756 | 1655 | ||
1757 | SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | 1656 | SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, |
1758 | infop, int, options, struct rusage __user *, ru) | 1657 | infop, int, options, struct rusage __user *, ru) |
1759 | { | 1658 | { |
1659 | struct wait_opts wo; | ||
1760 | struct pid *pid = NULL; | 1660 | struct pid *pid = NULL; |
1761 | enum pid_type type; | 1661 | enum pid_type type; |
1762 | long ret; | 1662 | long ret; |
@@ -1786,7 +1686,37 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
1786 | 1686 | ||
1787 | if (type < PIDTYPE_MAX) | 1687 | if (type < PIDTYPE_MAX) |
1788 | pid = find_get_pid(upid); | 1688 | pid = find_get_pid(upid); |
1789 | ret = do_wait(type, pid, options, infop, NULL, ru); | 1689 | |
1690 | wo.wo_type = type; | ||
1691 | wo.wo_pid = pid; | ||
1692 | wo.wo_flags = options; | ||
1693 | wo.wo_info = infop; | ||
1694 | wo.wo_stat = NULL; | ||
1695 | wo.wo_rusage = ru; | ||
1696 | ret = do_wait(&wo); | ||
1697 | |||
1698 | if (ret > 0) { | ||
1699 | ret = 0; | ||
1700 | } else if (infop) { | ||
1701 | /* | ||
1702 | * For a WNOHANG return, clear out all the fields | ||
1703 | * we would set so the user can easily tell the | ||
1704 | * difference. | ||
1705 | */ | ||
1706 | if (!ret) | ||
1707 | ret = put_user(0, &infop->si_signo); | ||
1708 | if (!ret) | ||
1709 | ret = put_user(0, &infop->si_errno); | ||
1710 | if (!ret) | ||
1711 | ret = put_user(0, &infop->si_code); | ||
1712 | if (!ret) | ||
1713 | ret = put_user(0, &infop->si_pid); | ||
1714 | if (!ret) | ||
1715 | ret = put_user(0, &infop->si_uid); | ||
1716 | if (!ret) | ||
1717 | ret = put_user(0, &infop->si_status); | ||
1718 | } | ||
1719 | |||
1790 | put_pid(pid); | 1720 | put_pid(pid); |
1791 | 1721 | ||
1792 | /* avoid REGPARM breakage on x86: */ | 1722 | /* avoid REGPARM breakage on x86: */ |
@@ -1797,6 +1727,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
1797 | SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, | 1727 | SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, |
1798 | int, options, struct rusage __user *, ru) | 1728 | int, options, struct rusage __user *, ru) |
1799 | { | 1729 | { |
1730 | struct wait_opts wo; | ||
1800 | struct pid *pid = NULL; | 1731 | struct pid *pid = NULL; |
1801 | enum pid_type type; | 1732 | enum pid_type type; |
1802 | long ret; | 1733 | long ret; |
@@ -1812,13 +1743,19 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, | |||
1812 | pid = find_get_pid(-upid); | 1743 | pid = find_get_pid(-upid); |
1813 | } else if (upid == 0) { | 1744 | } else if (upid == 0) { |
1814 | type = PIDTYPE_PGID; | 1745 | type = PIDTYPE_PGID; |
1815 | pid = get_pid(task_pgrp(current)); | 1746 | pid = get_task_pid(current, PIDTYPE_PGID); |
1816 | } else /* upid > 0 */ { | 1747 | } else /* upid > 0 */ { |
1817 | type = PIDTYPE_PID; | 1748 | type = PIDTYPE_PID; |
1818 | pid = find_get_pid(upid); | 1749 | pid = find_get_pid(upid); |
1819 | } | 1750 | } |
1820 | 1751 | ||
1821 | ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); | 1752 | wo.wo_type = type; |
1753 | wo.wo_pid = pid; | ||
1754 | wo.wo_flags = options | WEXITED; | ||
1755 | wo.wo_info = NULL; | ||
1756 | wo.wo_stat = stat_addr; | ||
1757 | wo.wo_rusage = ru; | ||
1758 | ret = do_wait(&wo); | ||
1822 | put_pid(pid); | 1759 | put_pid(pid); |
1823 | 1760 | ||
1824 | /* avoid REGPARM breakage on x86: */ | 1761 | /* avoid REGPARM breakage on x86: */ |