diff options
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 124 |
1 files changed, 61 insertions, 63 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 38ec40630149..059b38cae384 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -112,9 +112,7 @@ static void __exit_signal(struct task_struct *tsk) | |||
112 | * We won't ever get here for the group leader, since it | 112 | * We won't ever get here for the group leader, since it |
113 | * will have been the last reference on the signal_struct. | 113 | * will have been the last reference on the signal_struct. |
114 | */ | 114 | */ |
115 | sig->utime = cputime_add(sig->utime, tsk->utime); | 115 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); |
116 | sig->stime = cputime_add(sig->stime, tsk->stime); | ||
117 | sig->gtime = cputime_add(sig->gtime, tsk->gtime); | ||
118 | sig->min_flt += tsk->min_flt; | 116 | sig->min_flt += tsk->min_flt; |
119 | sig->maj_flt += tsk->maj_flt; | 117 | sig->maj_flt += tsk->maj_flt; |
120 | sig->nvcsw += tsk->nvcsw; | 118 | sig->nvcsw += tsk->nvcsw; |
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
122 | sig->inblock += task_io_get_inblock(tsk); | 120 | sig->inblock += task_io_get_inblock(tsk); |
123 | sig->oublock += task_io_get_oublock(tsk); | 121 | sig->oublock += task_io_get_oublock(tsk); |
124 | task_io_accounting_add(&sig->ioac, &tsk->ioac); | 122 | task_io_accounting_add(&sig->ioac, &tsk->ioac); |
125 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | ||
126 | sig = NULL; /* Marker for below. */ | 123 | sig = NULL; /* Marker for below. */ |
127 | } | 124 | } |
128 | 125 | ||
@@ -583,8 +580,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) | |||
583 | * If there are other users of the mm and the owner (us) is exiting | 580 | * If there are other users of the mm and the owner (us) is exiting |
584 | * we need to find a new owner to take on the responsibility. | 581 | * we need to find a new owner to take on the responsibility. |
585 | */ | 582 | */ |
586 | if (!mm) | ||
587 | return 0; | ||
588 | if (atomic_read(&mm->mm_users) <= 1) | 583 | if (atomic_read(&mm->mm_users) <= 1) |
589 | return 0; | 584 | return 0; |
590 | if (mm->owner != p) | 585 | if (mm->owner != p) |
@@ -627,29 +622,38 @@ retry: | |||
627 | } while_each_thread(g, c); | 622 | } while_each_thread(g, c); |
628 | 623 | ||
629 | read_unlock(&tasklist_lock); | 624 | read_unlock(&tasklist_lock); |
625 | /* | ||
626 | * We found no owner yet mm_users > 1: this implies that we are | ||
627 | * most likely racing with swapoff (try_to_unuse()) or /proc or | ||
628 | * ptrace or page migration (get_task_mm()). Mark owner as NULL, | ||
629 | * so that subsystems can understand the callback and take action. | ||
630 | */ | ||
631 | down_write(&mm->mmap_sem); | ||
632 | cgroup_mm_owner_callbacks(mm->owner, NULL); | ||
633 | mm->owner = NULL; | ||
634 | up_write(&mm->mmap_sem); | ||
630 | return; | 635 | return; |
631 | 636 | ||
632 | assign_new_owner: | 637 | assign_new_owner: |
633 | BUG_ON(c == p); | 638 | BUG_ON(c == p); |
634 | get_task_struct(c); | 639 | get_task_struct(c); |
640 | read_unlock(&tasklist_lock); | ||
641 | down_write(&mm->mmap_sem); | ||
635 | /* | 642 | /* |
636 | * The task_lock protects c->mm from changing. | 643 | * The task_lock protects c->mm from changing. |
637 | * We always want mm->owner->mm == mm | 644 | * We always want mm->owner->mm == mm |
638 | */ | 645 | */ |
639 | task_lock(c); | 646 | task_lock(c); |
640 | /* | ||
641 | * Delay read_unlock() till we have the task_lock() | ||
642 | * to ensure that c does not slip away underneath us | ||
643 | */ | ||
644 | read_unlock(&tasklist_lock); | ||
645 | if (c->mm != mm) { | 647 | if (c->mm != mm) { |
646 | task_unlock(c); | 648 | task_unlock(c); |
649 | up_write(&mm->mmap_sem); | ||
647 | put_task_struct(c); | 650 | put_task_struct(c); |
648 | goto retry; | 651 | goto retry; |
649 | } | 652 | } |
650 | cgroup_mm_owner_callbacks(mm->owner, c); | 653 | cgroup_mm_owner_callbacks(mm->owner, c); |
651 | mm->owner = c; | 654 | mm->owner = c; |
652 | task_unlock(c); | 655 | task_unlock(c); |
656 | up_write(&mm->mmap_sem); | ||
653 | put_task_struct(c); | 657 | put_task_struct(c); |
654 | } | 658 | } |
655 | #endif /* CONFIG_MM_OWNER */ | 659 | #endif /* CONFIG_MM_OWNER */ |
@@ -831,26 +835,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) | |||
831 | * the child reaper process (ie "init") in our pid | 835 | * the child reaper process (ie "init") in our pid |
832 | * space. | 836 | * space. |
833 | */ | 837 | */ |
838 | static struct task_struct *find_new_reaper(struct task_struct *father) | ||
839 | { | ||
840 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | ||
841 | struct task_struct *thread; | ||
842 | |||
843 | thread = father; | ||
844 | while_each_thread(father, thread) { | ||
845 | if (thread->flags & PF_EXITING) | ||
846 | continue; | ||
847 | if (unlikely(pid_ns->child_reaper == father)) | ||
848 | pid_ns->child_reaper = thread; | ||
849 | return thread; | ||
850 | } | ||
851 | |||
852 | if (unlikely(pid_ns->child_reaper == father)) { | ||
853 | write_unlock_irq(&tasklist_lock); | ||
854 | if (unlikely(pid_ns == &init_pid_ns)) | ||
855 | panic("Attempted to kill init!"); | ||
856 | |||
857 | zap_pid_ns_processes(pid_ns); | ||
858 | write_lock_irq(&tasklist_lock); | ||
859 | /* | ||
860 | * We can not clear ->child_reaper or leave it alone. | ||
861 | * There may by stealth EXIT_DEAD tasks on ->children, | ||
862 | * forget_original_parent() must move them somewhere. | ||
863 | */ | ||
864 | pid_ns->child_reaper = init_pid_ns.child_reaper; | ||
865 | } | ||
866 | |||
867 | return pid_ns->child_reaper; | ||
868 | } | ||
869 | |||
834 | static void forget_original_parent(struct task_struct *father) | 870 | static void forget_original_parent(struct task_struct *father) |
835 | { | 871 | { |
836 | struct task_struct *p, *n, *reaper = father; | 872 | struct task_struct *p, *n, *reaper; |
837 | LIST_HEAD(ptrace_dead); | 873 | LIST_HEAD(ptrace_dead); |
838 | 874 | ||
839 | write_lock_irq(&tasklist_lock); | 875 | write_lock_irq(&tasklist_lock); |
840 | 876 | reaper = find_new_reaper(father); | |
841 | /* | 877 | /* |
842 | * First clean up ptrace if we were using it. | 878 | * First clean up ptrace if we were using it. |
843 | */ | 879 | */ |
844 | ptrace_exit(father, &ptrace_dead); | 880 | ptrace_exit(father, &ptrace_dead); |
845 | 881 | ||
846 | do { | ||
847 | reaper = next_thread(reaper); | ||
848 | if (reaper == father) { | ||
849 | reaper = task_child_reaper(father); | ||
850 | break; | ||
851 | } | ||
852 | } while (reaper->flags & PF_EXITING); | ||
853 | |||
854 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 882 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
855 | p->real_parent = reaper; | 883 | p->real_parent = reaper; |
856 | if (p->parent == father) { | 884 | if (p->parent == father) { |
@@ -918,8 +946,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
918 | 946 | ||
919 | /* mt-exec, de_thread() is waiting for us */ | 947 | /* mt-exec, de_thread() is waiting for us */ |
920 | if (thread_group_leader(tsk) && | 948 | if (thread_group_leader(tsk) && |
921 | tsk->signal->notify_count < 0 && | 949 | tsk->signal->group_exit_task && |
922 | tsk->signal->group_exit_task) | 950 | tsk->signal->notify_count < 0) |
923 | wake_up_process(tsk->signal->group_exit_task); | 951 | wake_up_process(tsk->signal->group_exit_task); |
924 | 952 | ||
925 | write_unlock_irq(&tasklist_lock); | 953 | write_unlock_irq(&tasklist_lock); |
@@ -959,39 +987,6 @@ static void check_stack_usage(void) | |||
959 | static inline void check_stack_usage(void) {} | 987 | static inline void check_stack_usage(void) {} |
960 | #endif | 988 | #endif |
961 | 989 | ||
962 | static inline void exit_child_reaper(struct task_struct *tsk) | ||
963 | { | ||
964 | if (likely(tsk->group_leader != task_child_reaper(tsk))) | ||
965 | return; | ||
966 | |||
967 | if (tsk->nsproxy->pid_ns == &init_pid_ns) | ||
968 | panic("Attempted to kill init!"); | ||
969 | |||
970 | /* | ||
971 | * @tsk is the last thread in the 'cgroup-init' and is exiting. | ||
972 | * Terminate all remaining processes in the namespace and reap them | ||
973 | * before exiting @tsk. | ||
974 | * | ||
975 | * Note that @tsk (last thread of cgroup-init) may not necessarily | ||
976 | * be the child-reaper (i.e main thread of cgroup-init) of the | ||
977 | * namespace i.e the child_reaper may have already exited. | ||
978 | * | ||
979 | * Even after a child_reaper exits, we let it inherit orphaned children, | ||
980 | * because, pid_ns->child_reaper remains valid as long as there is | ||
981 | * at least one living sub-thread in the cgroup init. | ||
982 | |||
983 | * This living sub-thread of the cgroup-init will be notified when | ||
984 | * a child inherited by the 'child-reaper' exits (do_notify_parent() | ||
985 | * uses __group_send_sig_info()). Further, when reaping child processes, | ||
986 | * do_wait() iterates over children of all living sub threads. | ||
987 | |||
988 | * i.e even though 'child_reaper' thread is listed as the parent of the | ||
989 | * orphaned children, any living sub-thread in the cgroup-init can | ||
990 | * perform the role of the child_reaper. | ||
991 | */ | ||
992 | zap_pid_ns_processes(tsk->nsproxy->pid_ns); | ||
993 | } | ||
994 | |||
995 | NORET_TYPE void do_exit(long code) | 990 | NORET_TYPE void do_exit(long code) |
996 | { | 991 | { |
997 | struct task_struct *tsk = current; | 992 | struct task_struct *tsk = current; |
@@ -1051,7 +1046,6 @@ NORET_TYPE void do_exit(long code) | |||
1051 | } | 1046 | } |
1052 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 1047 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
1053 | if (group_dead) { | 1048 | if (group_dead) { |
1054 | exit_child_reaper(tsk); | ||
1055 | hrtimer_cancel(&tsk->signal->real_timer); | 1049 | hrtimer_cancel(&tsk->signal->real_timer); |
1056 | exit_itimers(tsk->signal); | 1050 | exit_itimers(tsk->signal); |
1057 | } | 1051 | } |
@@ -1304,6 +1298,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1304 | if (likely(!traced)) { | 1298 | if (likely(!traced)) { |
1305 | struct signal_struct *psig; | 1299 | struct signal_struct *psig; |
1306 | struct signal_struct *sig; | 1300 | struct signal_struct *sig; |
1301 | struct task_cputime cputime; | ||
1307 | 1302 | ||
1308 | /* | 1303 | /* |
1309 | * The resource counters for the group leader are in its | 1304 | * The resource counters for the group leader are in its |
@@ -1319,20 +1314,23 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1319 | * need to protect the access to p->parent->signal fields, | 1314 | * need to protect the access to p->parent->signal fields, |
1320 | * as other threads in the parent group can be right | 1315 | * as other threads in the parent group can be right |
1321 | * here reaping other children at the same time. | 1316 | * here reaping other children at the same time. |
1317 | * | ||
1318 | * We use thread_group_cputime() to get times for the thread | ||
1319 | * group, which consolidates times for all threads in the | ||
1320 | * group including the group leader. | ||
1322 | */ | 1321 | */ |
1323 | spin_lock_irq(&p->parent->sighand->siglock); | 1322 | spin_lock_irq(&p->parent->sighand->siglock); |
1324 | psig = p->parent->signal; | 1323 | psig = p->parent->signal; |
1325 | sig = p->signal; | 1324 | sig = p->signal; |
1325 | thread_group_cputime(p, &cputime); | ||
1326 | psig->cutime = | 1326 | psig->cutime = |
1327 | cputime_add(psig->cutime, | 1327 | cputime_add(psig->cutime, |
1328 | cputime_add(p->utime, | 1328 | cputime_add(cputime.utime, |
1329 | cputime_add(sig->utime, | 1329 | sig->cutime)); |
1330 | sig->cutime))); | ||
1331 | psig->cstime = | 1330 | psig->cstime = |
1332 | cputime_add(psig->cstime, | 1331 | cputime_add(psig->cstime, |
1333 | cputime_add(p->stime, | 1332 | cputime_add(cputime.stime, |
1334 | cputime_add(sig->stime, | 1333 | sig->cstime)); |
1335 | sig->cstime))); | ||
1336 | psig->cgtime = | 1334 | psig->cgtime = |
1337 | cputime_add(psig->cgtime, | 1335 | cputime_add(psig->cgtime, |
1338 | cputime_add(p->gtime, | 1336 | cputime_add(p->gtime, |