diff options
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 82 |
1 files changed, 46 insertions, 36 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index c8d0485578be..e69edc74aeeb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -40,18 +40,24 @@ | |||
40 | #include <linux/cn_proc.h> | 40 | #include <linux/cn_proc.h> |
41 | #include <linux/mutex.h> | 41 | #include <linux/mutex.h> |
42 | #include <linux/futex.h> | 42 | #include <linux/futex.h> |
43 | #include <linux/compat.h> | ||
44 | #include <linux/pipe_fs_i.h> | 43 | #include <linux/pipe_fs_i.h> |
45 | #include <linux/audit.h> /* for audit_free() */ | 44 | #include <linux/audit.h> /* for audit_free() */ |
46 | #include <linux/resource.h> | 45 | #include <linux/resource.h> |
47 | #include <linux/blkdev.h> | 46 | #include <linux/blkdev.h> |
48 | #include <linux/task_io_accounting_ops.h> | 47 | #include <linux/task_io_accounting_ops.h> |
49 | #include <linux/tracehook.h> | 48 | #include <linux/tracehook.h> |
49 | #include <linux/init_task.h> | ||
50 | #include <trace/sched.h> | ||
50 | 51 | ||
51 | #include <asm/uaccess.h> | 52 | #include <asm/uaccess.h> |
52 | #include <asm/unistd.h> | 53 | #include <asm/unistd.h> |
53 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
54 | #include <asm/mmu_context.h> | 55 | #include <asm/mmu_context.h> |
56 | #include "cred-internals.h" | ||
57 | |||
58 | DEFINE_TRACE(sched_process_free); | ||
59 | DEFINE_TRACE(sched_process_exit); | ||
60 | DEFINE_TRACE(sched_process_wait); | ||
55 | 61 | ||
56 | static void exit_mm(struct task_struct * tsk); | 62 | static void exit_mm(struct task_struct * tsk); |
57 | 63 | ||
@@ -112,8 +118,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
112 | * We won't ever get here for the group leader, since it | 118 | * We won't ever get here for the group leader, since it |
113 | * will have been the last reference on the signal_struct. | 119 | * will have been the last reference on the signal_struct. |
114 | */ | 120 | */ |
115 | sig->utime = cputime_add(sig->utime, task_utime(tsk)); | ||
116 | sig->stime = cputime_add(sig->stime, task_stime(tsk)); | ||
117 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); | 121 | sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); |
118 | sig->min_flt += tsk->min_flt; | 122 | sig->min_flt += tsk->min_flt; |
119 | sig->maj_flt += tsk->maj_flt; | 123 | sig->maj_flt += tsk->maj_flt; |
@@ -122,7 +126,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
122 | sig->inblock += task_io_get_inblock(tsk); | 126 | sig->inblock += task_io_get_inblock(tsk); |
123 | sig->oublock += task_io_get_oublock(tsk); | 127 | sig->oublock += task_io_get_oublock(tsk); |
124 | task_io_accounting_add(&sig->ioac, &tsk->ioac); | 128 | task_io_accounting_add(&sig->ioac, &tsk->ioac); |
125 | sig->sum_sched_runtime += tsk->se.sum_exec_runtime; | ||
126 | sig = NULL; /* Marker for below. */ | 129 | sig = NULL; /* Marker for below. */ |
127 | } | 130 | } |
128 | 131 | ||
@@ -143,13 +146,21 @@ static void __exit_signal(struct task_struct *tsk) | |||
143 | if (sig) { | 146 | if (sig) { |
144 | flush_sigqueue(&sig->shared_pending); | 147 | flush_sigqueue(&sig->shared_pending); |
145 | taskstats_tgid_free(sig); | 148 | taskstats_tgid_free(sig); |
149 | /* | ||
150 | * Make sure ->signal can't go away under rq->lock, | ||
151 | * see account_group_exec_runtime(). | ||
152 | */ | ||
153 | task_rq_unlock_wait(tsk); | ||
146 | __cleanup_signal(sig); | 154 | __cleanup_signal(sig); |
147 | } | 155 | } |
148 | } | 156 | } |
149 | 157 | ||
150 | static void delayed_put_task_struct(struct rcu_head *rhp) | 158 | static void delayed_put_task_struct(struct rcu_head *rhp) |
151 | { | 159 | { |
152 | put_task_struct(container_of(rhp, struct task_struct, rcu)); | 160 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
161 | |||
162 | trace_sched_process_free(tsk); | ||
163 | put_task_struct(tsk); | ||
153 | } | 164 | } |
154 | 165 | ||
155 | 166 | ||
@@ -159,7 +170,10 @@ void release_task(struct task_struct * p) | |||
159 | int zap_leader; | 170 | int zap_leader; |
160 | repeat: | 171 | repeat: |
161 | tracehook_prepare_release_task(p); | 172 | tracehook_prepare_release_task(p); |
162 | atomic_dec(&p->user->processes); | 173 | /* don't need to get the RCU readlock here - the process is dead and |
174 | * can't be modifying its own credentials */ | ||
175 | atomic_dec(&__task_cred(p)->user->processes); | ||
176 | |||
163 | proc_flush_task(p); | 177 | proc_flush_task(p); |
164 | write_lock_irq(&tasklist_lock); | 178 | write_lock_irq(&tasklist_lock); |
165 | tracehook_finish_release_task(p); | 179 | tracehook_finish_release_task(p); |
@@ -334,12 +348,12 @@ static void reparent_to_kthreadd(void) | |||
334 | /* cpus_allowed? */ | 348 | /* cpus_allowed? */ |
335 | /* rt_priority? */ | 349 | /* rt_priority? */ |
336 | /* signals? */ | 350 | /* signals? */ |
337 | security_task_reparent_to_init(current); | ||
338 | memcpy(current->signal->rlim, init_task.signal->rlim, | 351 | memcpy(current->signal->rlim, init_task.signal->rlim, |
339 | sizeof(current->signal->rlim)); | 352 | sizeof(current->signal->rlim)); |
340 | atomic_inc(&(INIT_USER->__count)); | 353 | |
354 | atomic_inc(&init_cred.usage); | ||
355 | commit_creds(&init_cred); | ||
341 | write_unlock_irq(&tasklist_lock); | 356 | write_unlock_irq(&tasklist_lock); |
342 | switch_uid(INIT_USER); | ||
343 | } | 357 | } |
344 | 358 | ||
345 | void __set_special_pids(struct pid *pid) | 359 | void __set_special_pids(struct pid *pid) |
@@ -640,24 +654,23 @@ retry: | |||
640 | assign_new_owner: | 654 | assign_new_owner: |
641 | BUG_ON(c == p); | 655 | BUG_ON(c == p); |
642 | get_task_struct(c); | 656 | get_task_struct(c); |
657 | read_unlock(&tasklist_lock); | ||
658 | down_write(&mm->mmap_sem); | ||
643 | /* | 659 | /* |
644 | * The task_lock protects c->mm from changing. | 660 | * The task_lock protects c->mm from changing. |
645 | * We always want mm->owner->mm == mm | 661 | * We always want mm->owner->mm == mm |
646 | */ | 662 | */ |
647 | task_lock(c); | 663 | task_lock(c); |
648 | /* | ||
649 | * Delay read_unlock() till we have the task_lock() | ||
650 | * to ensure that c does not slip away underneath us | ||
651 | */ | ||
652 | read_unlock(&tasklist_lock); | ||
653 | if (c->mm != mm) { | 664 | if (c->mm != mm) { |
654 | task_unlock(c); | 665 | task_unlock(c); |
666 | up_write(&mm->mmap_sem); | ||
655 | put_task_struct(c); | 667 | put_task_struct(c); |
656 | goto retry; | 668 | goto retry; |
657 | } | 669 | } |
658 | cgroup_mm_owner_callbacks(mm->owner, c); | 670 | cgroup_mm_owner_callbacks(mm->owner, c); |
659 | mm->owner = c; | 671 | mm->owner = c; |
660 | task_unlock(c); | 672 | task_unlock(c); |
673 | up_write(&mm->mmap_sem); | ||
661 | put_task_struct(c); | 674 | put_task_struct(c); |
662 | } | 675 | } |
663 | #endif /* CONFIG_MM_OWNER */ | 676 | #endif /* CONFIG_MM_OWNER */ |
@@ -1021,8 +1034,6 @@ NORET_TYPE void do_exit(long code) | |||
1021 | * task into the wait for ever nirwana as well. | 1034 | * task into the wait for ever nirwana as well. |
1022 | */ | 1035 | */ |
1023 | tsk->flags |= PF_EXITPIDONE; | 1036 | tsk->flags |= PF_EXITPIDONE; |
1024 | if (tsk->io_context) | ||
1025 | exit_io_context(); | ||
1026 | set_current_state(TASK_UNINTERRUPTIBLE); | 1037 | set_current_state(TASK_UNINTERRUPTIBLE); |
1027 | schedule(); | 1038 | schedule(); |
1028 | } | 1039 | } |
@@ -1051,14 +1062,6 @@ NORET_TYPE void do_exit(long code) | |||
1051 | exit_itimers(tsk->signal); | 1062 | exit_itimers(tsk->signal); |
1052 | } | 1063 | } |
1053 | acct_collect(code, group_dead); | 1064 | acct_collect(code, group_dead); |
1054 | #ifdef CONFIG_FUTEX | ||
1055 | if (unlikely(tsk->robust_list)) | ||
1056 | exit_robust_list(tsk); | ||
1057 | #ifdef CONFIG_COMPAT | ||
1058 | if (unlikely(tsk->compat_robust_list)) | ||
1059 | compat_exit_robust_list(tsk); | ||
1060 | #endif | ||
1061 | #endif | ||
1062 | if (group_dead) | 1065 | if (group_dead) |
1063 | tty_audit_exit(); | 1066 | tty_audit_exit(); |
1064 | if (unlikely(tsk->audit_context)) | 1067 | if (unlikely(tsk->audit_context)) |
@@ -1071,13 +1074,14 @@ NORET_TYPE void do_exit(long code) | |||
1071 | 1074 | ||
1072 | if (group_dead) | 1075 | if (group_dead) |
1073 | acct_process(); | 1076 | acct_process(); |
1077 | trace_sched_process_exit(tsk); | ||
1078 | |||
1074 | exit_sem(tsk); | 1079 | exit_sem(tsk); |
1075 | exit_files(tsk); | 1080 | exit_files(tsk); |
1076 | exit_fs(tsk); | 1081 | exit_fs(tsk); |
1077 | check_stack_usage(); | 1082 | check_stack_usage(); |
1078 | exit_thread(); | 1083 | exit_thread(); |
1079 | cgroup_exit(tsk, 1); | 1084 | cgroup_exit(tsk, 1); |
1080 | exit_keys(tsk); | ||
1081 | 1085 | ||
1082 | if (group_dead && tsk->signal->leader) | 1086 | if (group_dead && tsk->signal->leader) |
1083 | disassociate_ctty(1); | 1087 | disassociate_ctty(1); |
@@ -1122,7 +1126,6 @@ NORET_TYPE void do_exit(long code) | |||
1122 | preempt_disable(); | 1126 | preempt_disable(); |
1123 | /* causes final put_task_struct in finish_task_switch(). */ | 1127 | /* causes final put_task_struct in finish_task_switch(). */ |
1124 | tsk->state = TASK_DEAD; | 1128 | tsk->state = TASK_DEAD; |
1125 | |||
1126 | schedule(); | 1129 | schedule(); |
1127 | BUG(); | 1130 | BUG(); |
1128 | /* Avoid "noreturn function does return". */ | 1131 | /* Avoid "noreturn function does return". */ |
@@ -1262,12 +1265,12 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1262 | unsigned long state; | 1265 | unsigned long state; |
1263 | int retval, status, traced; | 1266 | int retval, status, traced; |
1264 | pid_t pid = task_pid_vnr(p); | 1267 | pid_t pid = task_pid_vnr(p); |
1268 | uid_t uid = __task_cred(p)->uid; | ||
1265 | 1269 | ||
1266 | if (!likely(options & WEXITED)) | 1270 | if (!likely(options & WEXITED)) |
1267 | return 0; | 1271 | return 0; |
1268 | 1272 | ||
1269 | if (unlikely(options & WNOWAIT)) { | 1273 | if (unlikely(options & WNOWAIT)) { |
1270 | uid_t uid = p->uid; | ||
1271 | int exit_code = p->exit_code; | 1274 | int exit_code = p->exit_code; |
1272 | int why, status; | 1275 | int why, status; |
1273 | 1276 | ||
@@ -1299,6 +1302,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1299 | if (likely(!traced)) { | 1302 | if (likely(!traced)) { |
1300 | struct signal_struct *psig; | 1303 | struct signal_struct *psig; |
1301 | struct signal_struct *sig; | 1304 | struct signal_struct *sig; |
1305 | struct task_cputime cputime; | ||
1302 | 1306 | ||
1303 | /* | 1307 | /* |
1304 | * The resource counters for the group leader are in its | 1308 | * The resource counters for the group leader are in its |
@@ -1314,20 +1318,23 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1314 | * need to protect the access to p->parent->signal fields, | 1318 | * need to protect the access to p->parent->signal fields, |
1315 | * as other threads in the parent group can be right | 1319 | * as other threads in the parent group can be right |
1316 | * here reaping other children at the same time. | 1320 | * here reaping other children at the same time. |
1321 | * | ||
1322 | * We use thread_group_cputime() to get times for the thread | ||
1323 | * group, which consolidates times for all threads in the | ||
1324 | * group including the group leader. | ||
1317 | */ | 1325 | */ |
1326 | thread_group_cputime(p, &cputime); | ||
1318 | spin_lock_irq(&p->parent->sighand->siglock); | 1327 | spin_lock_irq(&p->parent->sighand->siglock); |
1319 | psig = p->parent->signal; | 1328 | psig = p->parent->signal; |
1320 | sig = p->signal; | 1329 | sig = p->signal; |
1321 | psig->cutime = | 1330 | psig->cutime = |
1322 | cputime_add(psig->cutime, | 1331 | cputime_add(psig->cutime, |
1323 | cputime_add(p->utime, | 1332 | cputime_add(cputime.utime, |
1324 | cputime_add(sig->utime, | 1333 | sig->cutime)); |
1325 | sig->cutime))); | ||
1326 | psig->cstime = | 1334 | psig->cstime = |
1327 | cputime_add(psig->cstime, | 1335 | cputime_add(psig->cstime, |
1328 | cputime_add(p->stime, | 1336 | cputime_add(cputime.stime, |
1329 | cputime_add(sig->stime, | 1337 | sig->cstime)); |
1330 | sig->cstime))); | ||
1331 | psig->cgtime = | 1338 | psig->cgtime = |
1332 | cputime_add(psig->cgtime, | 1339 | cputime_add(psig->cgtime, |
1333 | cputime_add(p->gtime, | 1340 | cputime_add(p->gtime, |
@@ -1384,7 +1391,7 @@ static int wait_task_zombie(struct task_struct *p, int options, | |||
1384 | if (!retval && infop) | 1391 | if (!retval && infop) |
1385 | retval = put_user(pid, &infop->si_pid); | 1392 | retval = put_user(pid, &infop->si_pid); |
1386 | if (!retval && infop) | 1393 | if (!retval && infop) |
1387 | retval = put_user(p->uid, &infop->si_uid); | 1394 | retval = put_user(uid, &infop->si_uid); |
1388 | if (!retval) | 1395 | if (!retval) |
1389 | retval = pid; | 1396 | retval = pid; |
1390 | 1397 | ||
@@ -1449,7 +1456,8 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, | |||
1449 | if (!unlikely(options & WNOWAIT)) | 1456 | if (!unlikely(options & WNOWAIT)) |
1450 | p->exit_code = 0; | 1457 | p->exit_code = 0; |
1451 | 1458 | ||
1452 | uid = p->uid; | 1459 | /* don't need the RCU readlock here as we're holding a spinlock */ |
1460 | uid = __task_cred(p)->uid; | ||
1453 | unlock_sig: | 1461 | unlock_sig: |
1454 | spin_unlock_irq(&p->sighand->siglock); | 1462 | spin_unlock_irq(&p->sighand->siglock); |
1455 | if (!exit_code) | 1463 | if (!exit_code) |
@@ -1523,10 +1531,10 @@ static int wait_task_continued(struct task_struct *p, int options, | |||
1523 | } | 1531 | } |
1524 | if (!unlikely(options & WNOWAIT)) | 1532 | if (!unlikely(options & WNOWAIT)) |
1525 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; | 1533 | p->signal->flags &= ~SIGNAL_STOP_CONTINUED; |
1534 | uid = __task_cred(p)->uid; | ||
1526 | spin_unlock_irq(&p->sighand->siglock); | 1535 | spin_unlock_irq(&p->sighand->siglock); |
1527 | 1536 | ||
1528 | pid = task_pid_vnr(p); | 1537 | pid = task_pid_vnr(p); |
1529 | uid = p->uid; | ||
1530 | get_task_struct(p); | 1538 | get_task_struct(p); |
1531 | read_unlock(&tasklist_lock); | 1539 | read_unlock(&tasklist_lock); |
1532 | 1540 | ||
@@ -1672,6 +1680,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options, | |||
1672 | struct task_struct *tsk; | 1680 | struct task_struct *tsk; |
1673 | int retval; | 1681 | int retval; |
1674 | 1682 | ||
1683 | trace_sched_process_wait(pid); | ||
1684 | |||
1675 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1685 | add_wait_queue(¤t->signal->wait_chldexit,&wait); |
1676 | repeat: | 1686 | repeat: |
1677 | /* | 1687 | /* |