diff options
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 185 |
1 files changed, 72 insertions, 113 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index ad933bb29ec7..85a83c831856 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/resource.h> | 46 | #include <linux/resource.h> |
47 | #include <linux/blkdev.h> | 47 | #include <linux/blkdev.h> |
48 | #include <linux/task_io_accounting_ops.h> | 48 | #include <linux/task_io_accounting_ops.h> |
49 | #include <linux/tracehook.h> | ||
49 | 50 | ||
50 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
51 | #include <asm/unistd.h> | 52 | #include <asm/unistd.h> |
@@ -111,27 +112,16 @@ static void __exit_signal(struct task_struct *tsk) | |||
111 | * We won't ever get here for the group leader, since it | 112 | * We won't ever get here for the group leader, since it |
112 | * will have been the last reference on the signal_struct. | 113 | * will have been the last reference on the signal_struct. |
113 | */ | 114 | */ |
114 | sig->utime = cputime_add(sig->utime, tsk->utime); | 115 | sig->utime = cputime_add(sig->utime, task_utime(tsk)); |
115 | sig->stime = cputime_add(sig->stime, tsk->stime); | 116 | sig->stime = cputime_add(sig->stime, task_stime(tsk)); |
116 | sig->gtime = cputime_add(sig->gtime, tsk->gtime); | 117 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); |
117 | sig->min_flt += tsk->min_flt; | 118 | sig->min_flt += tsk->min_flt; |
118 | sig->maj_flt += tsk->maj_flt; | 119 | sig->maj_flt += tsk->maj_flt; |
119 | sig->nvcsw += tsk->nvcsw; | 120 | sig->nvcsw += tsk->nvcsw; |
120 | sig->nivcsw += tsk->nivcsw; | 121 | sig->nivcsw += tsk->nivcsw; |
121 | sig->inblock += task_io_get_inblock(tsk); | 122 | sig->inblock += task_io_get_inblock(tsk); |
122 | sig->oublock += task_io_get_oublock(tsk); | 123 | sig->oublock += task_io_get_oublock(tsk); |
123 | #ifdef CONFIG_TASK_XACCT | 124 | task_io_accounting_add(&sig->ioac, &tsk->ioac); |
124 | sig->rchar += tsk->rchar; | ||
125 | sig->wchar += tsk->wchar; | ||
126 | sig->syscr += tsk->syscr; | ||
127 | sig->syscw += tsk->syscw; | ||
128 | #endif /* CONFIG_TASK_XACCT */ | ||
129 | #ifdef CONFIG_TASK_IO_ACCOUNTING | ||
130 | sig->ioac.read_bytes += tsk->ioac.read_bytes; | ||
131 | sig->ioac.write_bytes += tsk->ioac.write_bytes; | ||
132 | sig->ioac.cancelled_write_bytes += | ||
133 | tsk->ioac.cancelled_write_bytes; | ||
134 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | ||
135 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | 125 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; |
136 | sig = NULL; /* Marker for below. */ | 126 | sig = NULL; /* Marker for below. */ |
137 | } | 127 | } |
@@ -162,27 +152,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
162 | put_task_struct(container_of(rhp, struct task_struct, rcu)); | 152 | put_task_struct(container_of(rhp, struct task_struct, rcu)); |
163 | } | 153 | } |
164 | 154 | ||
165 | /* | ||
166 | * Do final ptrace-related cleanup of a zombie being reaped. | ||
167 | * | ||
168 | * Called with write_lock(&tasklist_lock) held. | ||
169 | */ | ||
170 | static void ptrace_release_task(struct task_struct *p) | ||
171 | { | ||
172 | BUG_ON(!list_empty(&p->ptraced)); | ||
173 | ptrace_unlink(p); | ||
174 | BUG_ON(!list_empty(&p->ptrace_entry)); | ||
175 | } | ||
176 | 155 | ||
177 | void release_task(struct task_struct * p) | 156 | void release_task(struct task_struct * p) |
178 | { | 157 | { |
179 | struct task_struct *leader; | 158 | struct task_struct *leader; |
180 | int zap_leader; | 159 | int zap_leader; |
181 | repeat: | 160 | repeat: |
161 | tracehook_prepare_release_task(p); | ||
182 | atomic_dec(&p->user->processes); | 162 | atomic_dec(&p->user->processes); |
183 | proc_flush_task(p); | 163 | proc_flush_task(p); |
184 | write_lock_irq(&tasklist_lock); | 164 | write_lock_irq(&tasklist_lock); |
185 | ptrace_release_task(p); | 165 | tracehook_finish_release_task(p); |
186 | __exit_signal(p); | 166 | __exit_signal(p); |
187 | 167 | ||
188 | /* | 168 | /* |
@@ -204,6 +184,13 @@ repeat: | |||
204 | * that case. | 184 | * that case. |
205 | */ | 185 | */ |
206 | zap_leader = task_detached(leader); | 186 | zap_leader = task_detached(leader); |
187 | |||
188 | /* | ||
189 | * This maintains the invariant that release_task() | ||
190 | * only runs on a task in EXIT_DEAD, just for sanity. | ||
191 | */ | ||
192 | if (zap_leader) | ||
193 | leader->exit_state = EXIT_DEAD; | ||
207 | } | 194 | } |
208 | 195 | ||
209 | write_unlock_irq(&tasklist_lock); | 196 | write_unlock_irq(&tasklist_lock); |
@@ -567,8 +554,6 @@ void put_fs_struct(struct fs_struct *fs) | |||
567 | if (atomic_dec_and_test(&fs->count)) { | 554 | if (atomic_dec_and_test(&fs->count)) { |
568 | path_put(&fs->root); | 555 | path_put(&fs->root); |
569 | path_put(&fs->pwd); | 556 | path_put(&fs->pwd); |
570 | if (fs->altroot.dentry) | ||
571 | path_put(&fs->altroot); | ||
572 | kmem_cache_free(fs_cachep, fs); | 557 | kmem_cache_free(fs_cachep, fs); |
573 | } | 558 | } |
574 | } | 559 | } |
@@ -598,8 +583,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | |||
598 | * If there are other users of the mm and the owner (us) is exiting | 583 | * If there are other users of the mm and the owner (us) is exiting |
599 | * we need to find a new owner to take on the responsibility. | 584 | * we need to find a new owner to take on the responsibility. |
600 | */ | 585 | */ |
601 | if (!mm) | ||
602 | return 0; | ||
603 | if (atomic_read(&mm->mm_users) <= 1) | 586 | if (atomic_read(&mm->mm_users) <= 1) |
604 | return 0; | 587 | return 0; |
605 | if (mm->owner != p) | 588 | if (mm->owner != p) |
@@ -642,6 +625,16 @@ retry: | |||
642 | } while_each_thread(g, c); | 625 | } while_each_thread(g, c); |
643 | 626 | ||
644 | read_unlock(&tasklist_lock); | 627 | read_unlock(&tasklist_lock); |
628 | /* | ||
629 | * We found no owner yet mm_users > 1: this implies that we are | ||
630 | * most likely racing with swapoff (try_to_unuse()) or /proc or | ||
631 | * ptrace or page migration (get_task_mm()). Mark owner as NULL, | ||
632 | * so that subsystems can understand the callback and take action. | ||
633 | */ | ||
634 | down_write(&mm->mmap_sem); | ||
635 | cgroup_mm_owner_callbacks(mm->owner, NULL); | ||
636 | mm->owner = NULL; | ||
637 | up_write(&mm->mmap_sem); | ||
645 | return; | 638 | return; |
646 | 639 | ||
647 | assign_new_owner: | 640 | assign_new_owner: |
@@ -846,26 +839,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) | |||
846 | * the child reaper process (ie "init") in our pid | 839 | * the child reaper process (ie "init") in our pid |
847 | * space. | 840 | * space. |
848 | */ | 841 | */ |
842 | static struct task_struct *find_new_reaper(struct task_struct *father) | ||
843 | { | ||
844 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | ||
845 | struct task_struct *thread; | ||
846 | |||
847 | thread = father; | ||
848 | while_each_thread(father, thread) { | ||
849 | if (thread->flags & PF_EXITING) | ||
850 | continue; | ||
851 | if (unlikely(pid_ns->child_reaper == father)) | ||
852 | pid_ns->child_reaper = thread; | ||
853 | return thread; | ||
854 | } | ||
855 | |||
856 | if (unlikely(pid_ns->child_reaper == father)) { | ||
857 | write_unlock_irq(&tasklist_lock); | ||
858 | if (unlikely(pid_ns == &init_pid_ns)) | ||
859 | panic("Attempted to kill init!"); | ||
860 | |||
861 | zap_pid_ns_processes(pid_ns); | ||
862 | write_lock_irq(&tasklist_lock); | ||
863 | /* | ||
864 | * We can not clear ->child_reaper or leave it alone. | ||
865 | * There may by stealth EXIT_DEAD tasks on ->children, | ||
866 | * forget_original_parent() must move them somewhere. | ||
867 | */ | ||
868 | pid_ns->child_reaper = init_pid_ns.child_reaper; | ||
869 | } | ||
870 | |||
871 | return pid_ns->child_reaper; | ||
872 | } | ||
873 | |||
849 | static void forget_original_parent(struct task_struct *father) | 874 | static void forget_original_parent(struct task_struct *father) |
850 | { | 875 | { |
851 | struct task_struct *p, *n, *reaper = father; | 876 | struct task_struct *p, *n, *reaper; |
852 | LIST_HEAD(ptrace_dead); | 877 | LIST_HEAD(ptrace_dead); |
853 | 878 | ||
854 | write_lock_irq(&tasklist_lock); | 879 | write_lock_irq(&tasklist_lock); |
855 | 880 | reaper = find_new_reaper(father); | |
856 | /* | 881 | /* |
857 | * First clean up ptrace if we were using it. | 882 | * First clean up ptrace if we were using it. |
858 | */ | 883 | */ |
859 | ptrace_exit(father, &ptrace_dead); | 884 | ptrace_exit(father, &ptrace_dead); |
860 | 885 | ||
861 | do { | ||
862 | reaper = next_thread(reaper); | ||
863 | if (reaper == father) { | ||
864 | reaper = task_child_reaper(father); | ||
865 | break; | ||
866 | } | ||
867 | } while (reaper->flags & PF_EXITING); | ||
868 | |||
869 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 886 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
870 | p->real_parent = reaper; | 887 | p->real_parent = reaper; |
871 | if (p->parent == father) { | 888 | if (p->parent == father) { |
@@ -887,7 +904,8 @@ static void forget_original_parent(struct task_struct *father) | |||
887 | */ | 904 | */ |
888 | static void exit_notify(struct task_struct *tsk, int group_dead) | 905 | static void exit_notify(struct task_struct *tsk, int group_dead) |
889 | { | 906 | { |
890 | int state; | 907 | int signal; |
908 | void *cookie; | ||
891 | 909 | ||
892 | /* | 910 | /* |
893 | * This does two things: | 911 | * This does two things: |
@@ -924,33 +942,24 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
924 | !capable(CAP_KILL)) | 942 | !capable(CAP_KILL)) |
925 | tsk->exit_signal = SIGCHLD; | 943 | tsk->exit_signal = SIGCHLD; |
926 | 944 | ||
927 | /* If something other than our normal parent is ptracing us, then | 945 | signal = tracehook_notify_death(tsk, &cookie, group_dead); |
928 | * send it a SIGCHLD instead of honoring exit_signal. exit_signal | 946 | if (signal >= 0) |
929 | * only has special meaning to our real parent. | 947 | signal = do_notify_parent(tsk, signal); |
930 | */ | ||
931 | if (!task_detached(tsk) && thread_group_empty(tsk)) { | ||
932 | int signal = ptrace_reparented(tsk) ? | ||
933 | SIGCHLD : tsk->exit_signal; | ||
934 | do_notify_parent(tsk, signal); | ||
935 | } else if (tsk->ptrace) { | ||
936 | do_notify_parent(tsk, SIGCHLD); | ||
937 | } | ||
938 | 948 | ||
939 | state = EXIT_ZOMBIE; | 949 | tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; |
940 | if (task_detached(tsk) && likely(!tsk->ptrace)) | ||
941 | state = EXIT_DEAD; | ||
942 | tsk->exit_state = state; | ||
943 | 950 | ||
944 | /* mt-exec, de_thread() is waiting for us */ | 951 | /* mt-exec, de_thread() is waiting for us */ |
945 | if (thread_group_leader(tsk) && | 952 | if (thread_group_leader(tsk) && |
946 | tsk->signal->notify_count < 0 && | 953 | tsk->signal->group_exit_task && |
947 | tsk->signal->group_exit_task) | 954 | tsk->signal->notify_count < 0) |
948 | wake_up_process(tsk->signal->group_exit_task); | 955 | wake_up_process(tsk->signal->group_exit_task); |
949 | 956 | ||
950 | write_unlock_irq(&tasklist_lock); | 957 | write_unlock_irq(&tasklist_lock); |
951 | 958 | ||
959 | tracehook_report_death(tsk, signal, cookie, group_dead); | ||
960 | |||
952 | /* If the process is dead, release it - nobody will wait for it */ | 961 | /* If the process is dead, release it - nobody will wait for it */ |
953 | if (state == EXIT_DEAD) | 962 | if (signal == DEATH_REAP) |
954 | release_task(tsk); | 963 | release_task(tsk); |
955 | } | 964 | } |
956 | 965 | ||
@@ -982,39 +991,6 @@ static void check_stack_usage(void) | |||
982 | static inline void check_stack_usage(void) {} | 991 | static inline void check_stack_usage(void) {} |
983 | #endif | 992 | #endif |
984 | 993 | ||
985 | static inline void exit_child_reaper(struct task_struct *tsk) | ||
986 | { | ||
987 | if (likely(tsk->group_leader != task_child_reaper(tsk))) | ||
988 | return; | ||
989 | |||
990 | if (tsk->nsproxy->pid_ns == &init_pid_ns) | ||
991 | panic("Attempted to kill init!"); | ||
992 | |||
993 | /* | ||
994 | * @tsk is the last thread in the 'cgroup-init' and is exiting. | ||
995 | * Terminate all remaining processes in the namespace and reap them | ||
996 | * before exiting @tsk. | ||
997 | * | ||
998 | * Note that @tsk (last thread of cgroup-init) may not necessarily | ||
999 | * be the child-reaper (i.e main thread of cgroup-init) of the | ||
1000 | * namespace i.e the child_reaper may have already exited. | ||
1001 | * | ||
1002 | * Even after a child_reaper exits, we let it inherit orphaned children, | ||
1003 | * because, pid_ns->child_reaper remains valid as long as there is | ||
1004 | * at least one living sub-thread in the cgroup init. | ||
1005 | |||
1006 | * This living sub-thread of the cgroup-init will be notified when | ||
1007 | * a child inherited by the 'child-reaper' exits (do_notify_parent() | ||
1008 | * uses __group_send_sig_info()). Further, when reaping child processes, | ||
1009 | * do_wait() iterates over children of all living sub threads. | ||
1010 | |||
1011 | * i.e even though 'child_reaper' thread is listed as the parent of the | ||
1012 | * orphaned children, any living sub-thread in the cgroup-init can | ||
1013 | * perform the role of the child_reaper. | ||
1014 | */ | ||
1015 | zap_pid_ns_processes(tsk->nsproxy->pid_ns); | ||
1016 | } | ||
1017 | |||
1018 | NORET_TYPE void do_exit(long code) | 994 | NORET_TYPE void do_exit(long code) |
1019 | { | 995 | { |
1020 | struct task_struct *tsk = current; | 996 | struct task_struct *tsk = current; |
@@ -1029,10 +1005,7 @@ NORET_TYPE void do_exit(long code) | |||
1029 | if (unlikely(!tsk->pid)) | 1005 | if (unlikely(!tsk->pid)) |
1030 | panic("Attempted to kill the idle task!"); | 1006 | panic("Attempted to kill the idle task!"); |
1031 | 1007 | ||
1032 | if (unlikely(current->ptrace & PT_TRACE_EXIT)) { | 1008 | tracehook_report_exit(&code); |
1033 | current->ptrace_message = code; | ||
1034 | ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); | ||
1035 | } | ||
1036 | 1009 | ||
1037 | /* | 1010 | /* |
1038 | * We're taking recursive faults here in do_exit. Safest is to just | 1011 | * We're taking recursive faults here in do_exit. Safest is to just |
@@ -1077,7 +1050,6 @@ NORET_TYPE void do_exit(long code) | |||
1077 | } | 1050 | } |
1078 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 1051 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
1079 | if (group_dead) { | 1052 | if (group_dead) { |
1080 | exit_child_reaper(tsk); | ||
1081 | hrtimer_cancel(&tsk->signal->real_timer); | 1053 | hrtimer_cancel(&tsk->signal->real_timer); |
1082 | exit_itimers(tsk->signal); | 1054 | exit_itimers(tsk->signal); |
1083 | } | 1055 | } |
@@ -1378,21 +1350,8 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1378 | psig->coublock += | 1350 | psig->coublock += |
1379 | task_io_get_oublock(p) + | 1351 | task_io_get_oublock(p) + |
1380 | sig->oublock + sig->coublock; | 1352 | sig->oublock + sig->coublock; |
1381 | #ifdef CONFIG_TASK_XACCT | 1353 | task_io_accounting_add(&psig->ioac, &p->ioac); |
1382 | psig->rchar += p->rchar + sig->rchar; | 1354 | task_io_accounting_add(&psig->ioac, &sig->ioac); |
1383 | psig->wchar += p->wchar + sig->wchar; | ||
1384 | psig->syscr += p->syscr + sig->syscr; | ||
1385 | psig->syscw += p->syscw + sig->syscw; | ||
1386 | #endif /* CONFIG_TASK_XACCT */ | ||
1387 | #ifdef CONFIG_TASK_IO_ACCOUNTING | ||
1388 | psig->ioac.read_bytes += | ||
1389 | p->ioac.read_bytes + sig->ioac.read_bytes; | ||
1390 | psig->ioac.write_bytes += | ||
1391 | p->ioac.write_bytes + sig->ioac.write_bytes; | ||
1392 | psig->ioac.cancelled_write_bytes += | ||
1393 | p->ioac.cancelled_write_bytes + | ||
1394 | sig->ioac.cancelled_write_bytes; | ||
1395 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | ||
1396 | spin_unlock_irq(&p->parent->sighand->siglock); | 1355 | spin_unlock_irq(&p->parent->sighand->siglock); |
1397 | } | 1356 | } |
1398 | 1357 | ||