diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 85 |
1 files changed, 66 insertions, 19 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index bfee931ee3fb..4c20fff8c13a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/ftrace.h> | 49 | #include <linux/ftrace.h> |
50 | #include <linux/profile.h> | 50 | #include <linux/profile.h> |
51 | #include <linux/rmap.h> | 51 | #include <linux/rmap.h> |
52 | #include <linux/ksm.h> | ||
52 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
53 | #include <linux/tsacct_kern.h> | 54 | #include <linux/tsacct_kern.h> |
54 | #include <linux/cn_proc.h> | 55 | #include <linux/cn_proc.h> |
@@ -61,7 +62,8 @@ | |||
61 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
62 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
63 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
64 | #include <linux/perf_counter.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | ||
65 | 67 | ||
66 | #include <asm/pgtable.h> | 68 | #include <asm/pgtable.h> |
67 | #include <asm/pgalloc.h> | 69 | #include <asm/pgalloc.h> |
@@ -136,9 +138,17 @@ struct kmem_cache *vm_area_cachep; | |||
136 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 138 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
137 | static struct kmem_cache *mm_cachep; | 139 | static struct kmem_cache *mm_cachep; |
138 | 140 | ||
141 | static void account_kernel_stack(struct thread_info *ti, int account) | ||
142 | { | ||
143 | struct zone *zone = page_zone(virt_to_page(ti)); | ||
144 | |||
145 | mod_zone_page_state(zone, NR_KERNEL_STACK, account); | ||
146 | } | ||
147 | |||
139 | void free_task(struct task_struct *tsk) | 148 | void free_task(struct task_struct *tsk) |
140 | { | 149 | { |
141 | prop_local_destroy_single(&tsk->dirties); | 150 | prop_local_destroy_single(&tsk->dirties); |
151 | account_kernel_stack(tsk->stack, -1); | ||
142 | free_thread_info(tsk->stack); | 152 | free_thread_info(tsk->stack); |
143 | rt_mutex_debug_task_free(tsk); | 153 | rt_mutex_debug_task_free(tsk); |
144 | ftrace_graph_exit_task(tsk); | 154 | ftrace_graph_exit_task(tsk); |
@@ -253,6 +263,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
253 | tsk->btrace_seq = 0; | 263 | tsk->btrace_seq = 0; |
254 | #endif | 264 | #endif |
255 | tsk->splice_pipe = NULL; | 265 | tsk->splice_pipe = NULL; |
266 | |||
267 | account_kernel_stack(ti, 1); | ||
268 | |||
256 | return tsk; | 269 | return tsk; |
257 | 270 | ||
258 | out: | 271 | out: |
@@ -288,6 +301,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
288 | rb_link = &mm->mm_rb.rb_node; | 301 | rb_link = &mm->mm_rb.rb_node; |
289 | rb_parent = NULL; | 302 | rb_parent = NULL; |
290 | pprev = &mm->mmap; | 303 | pprev = &mm->mmap; |
304 | retval = ksm_fork(mm, oldmm); | ||
305 | if (retval) | ||
306 | goto out; | ||
291 | 307 | ||
292 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 308 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
293 | struct file *file; | 309 | struct file *file; |
@@ -418,22 +434,30 @@ __setup("coredump_filter=", coredump_filter_setup); | |||
418 | 434 | ||
419 | #include <linux/init_task.h> | 435 | #include <linux/init_task.h> |
420 | 436 | ||
437 | static void mm_init_aio(struct mm_struct *mm) | ||
438 | { | ||
439 | #ifdef CONFIG_AIO | ||
440 | spin_lock_init(&mm->ioctx_lock); | ||
441 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
442 | #endif | ||
443 | } | ||
444 | |||
421 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 445 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
422 | { | 446 | { |
423 | atomic_set(&mm->mm_users, 1); | 447 | atomic_set(&mm->mm_users, 1); |
424 | atomic_set(&mm->mm_count, 1); | 448 | atomic_set(&mm->mm_count, 1); |
425 | init_rwsem(&mm->mmap_sem); | 449 | init_rwsem(&mm->mmap_sem); |
426 | INIT_LIST_HEAD(&mm->mmlist); | 450 | INIT_LIST_HEAD(&mm->mmlist); |
427 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; | 451 | mm->flags = (current->mm) ? |
452 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | ||
428 | mm->core_state = NULL; | 453 | mm->core_state = NULL; |
429 | mm->nr_ptes = 0; | 454 | mm->nr_ptes = 0; |
430 | set_mm_counter(mm, file_rss, 0); | 455 | set_mm_counter(mm, file_rss, 0); |
431 | set_mm_counter(mm, anon_rss, 0); | 456 | set_mm_counter(mm, anon_rss, 0); |
432 | spin_lock_init(&mm->page_table_lock); | 457 | spin_lock_init(&mm->page_table_lock); |
433 | spin_lock_init(&mm->ioctx_lock); | ||
434 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
435 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 458 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
436 | mm->cached_hole_size = ~0UL; | 459 | mm->cached_hole_size = ~0UL; |
460 | mm_init_aio(mm); | ||
437 | mm_init_owner(mm, p); | 461 | mm_init_owner(mm, p); |
438 | 462 | ||
439 | if (likely(!mm_alloc_pgd(mm))) { | 463 | if (likely(!mm_alloc_pgd(mm))) { |
@@ -485,6 +509,7 @@ void mmput(struct mm_struct *mm) | |||
485 | 509 | ||
486 | if (atomic_dec_and_test(&mm->mm_users)) { | 510 | if (atomic_dec_and_test(&mm->mm_users)) { |
487 | exit_aio(mm); | 511 | exit_aio(mm); |
512 | ksm_exit(mm); | ||
488 | exit_mmap(mm); | 513 | exit_mmap(mm); |
489 | set_mm_exe_file(mm, NULL); | 514 | set_mm_exe_file(mm, NULL); |
490 | if (!list_empty(&mm->mmlist)) { | 515 | if (!list_empty(&mm->mmlist)) { |
@@ -493,6 +518,8 @@ void mmput(struct mm_struct *mm) | |||
493 | spin_unlock(&mmlist_lock); | 518 | spin_unlock(&mmlist_lock); |
494 | } | 519 | } |
495 | put_swap_token(mm); | 520 | put_swap_token(mm); |
521 | if (mm->binfmt) | ||
522 | module_put(mm->binfmt->module); | ||
496 | mmdrop(mm); | 523 | mmdrop(mm); |
497 | } | 524 | } |
498 | } | 525 | } |
@@ -543,12 +570,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
543 | 570 | ||
544 | /* Get rid of any futexes when releasing the mm */ | 571 | /* Get rid of any futexes when releasing the mm */ |
545 | #ifdef CONFIG_FUTEX | 572 | #ifdef CONFIG_FUTEX |
546 | if (unlikely(tsk->robust_list)) | 573 | if (unlikely(tsk->robust_list)) { |
547 | exit_robust_list(tsk); | 574 | exit_robust_list(tsk); |
575 | tsk->robust_list = NULL; | ||
576 | } | ||
548 | #ifdef CONFIG_COMPAT | 577 | #ifdef CONFIG_COMPAT |
549 | if (unlikely(tsk->compat_robust_list)) | 578 | if (unlikely(tsk->compat_robust_list)) { |
550 | compat_exit_robust_list(tsk); | 579 | compat_exit_robust_list(tsk); |
580 | tsk->compat_robust_list = NULL; | ||
581 | } | ||
551 | #endif | 582 | #endif |
583 | if (unlikely(!list_empty(&tsk->pi_state_list))) | ||
584 | exit_pi_state_list(tsk); | ||
552 | #endif | 585 | #endif |
553 | 586 | ||
554 | /* Get rid of any cached register state */ | 587 | /* Get rid of any cached register state */ |
@@ -618,9 +651,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
618 | mm->hiwater_rss = get_mm_rss(mm); | 651 | mm->hiwater_rss = get_mm_rss(mm); |
619 | mm->hiwater_vm = mm->total_vm; | 652 | mm->hiwater_vm = mm->total_vm; |
620 | 653 | ||
654 | if (mm->binfmt && !try_module_get(mm->binfmt->module)) | ||
655 | goto free_pt; | ||
656 | |||
621 | return mm; | 657 | return mm; |
622 | 658 | ||
623 | free_pt: | 659 | free_pt: |
660 | /* don't put binfmt in mmput, we haven't got module yet */ | ||
661 | mm->binfmt = NULL; | ||
624 | mmput(mm); | 662 | mmput(mm); |
625 | 663 | ||
626 | fail_nomem: | 664 | fail_nomem: |
@@ -788,10 +826,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
788 | thread_group_cputime_init(sig); | 826 | thread_group_cputime_init(sig); |
789 | 827 | ||
790 | /* Expiration times and increments. */ | 828 | /* Expiration times and increments. */ |
791 | sig->it_virt_expires = cputime_zero; | 829 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; |
792 | sig->it_virt_incr = cputime_zero; | 830 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; |
793 | sig->it_prof_expires = cputime_zero; | 831 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; |
794 | sig->it_prof_incr = cputime_zero; | 832 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; |
795 | 833 | ||
796 | /* Cached expiration times. */ | 834 | /* Cached expiration times. */ |
797 | sig->cputime_expires.prof_exp = cputime_zero; | 835 | sig->cputime_expires.prof_exp = cputime_zero; |
@@ -849,6 +887,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
849 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 887 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
850 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 888 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
851 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 889 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
890 | sig->maxrss = sig->cmaxrss = 0; | ||
852 | task_io_accounting_init(&sig->ioac); | 891 | task_io_accounting_init(&sig->ioac); |
853 | sig->sum_sched_runtime = 0; | 892 | sig->sum_sched_runtime = 0; |
854 | taskstats_tgid_init(sig); | 893 | taskstats_tgid_init(sig); |
@@ -863,6 +902,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
863 | 902 | ||
864 | tty_audit_fork(sig); | 903 | tty_audit_fork(sig); |
865 | 904 | ||
905 | sig->oom_adj = current->signal->oom_adj; | ||
906 | |||
866 | return 0; | 907 | return 0; |
867 | } | 908 | } |
868 | 909 | ||
@@ -958,6 +999,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
958 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) | 999 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) |
959 | return ERR_PTR(-EINVAL); | 1000 | return ERR_PTR(-EINVAL); |
960 | 1001 | ||
1002 | /* | ||
1003 | * Siblings of global init remain as zombies on exit since they are | ||
1004 | * not reaped by their parent (swapper). To solve this and to avoid | ||
1005 | * multi-rooted process trees, prevent global and container-inits | ||
1006 | * from creating siblings. | ||
1007 | */ | ||
1008 | if ((clone_flags & CLONE_PARENT) && | ||
1009 | current->signal->flags & SIGNAL_UNKILLABLE) | ||
1010 | return ERR_PTR(-EINVAL); | ||
1011 | |||
961 | retval = security_task_create(clone_flags); | 1012 | retval = security_task_create(clone_flags); |
962 | if (retval) | 1013 | if (retval) |
963 | goto fork_out; | 1014 | goto fork_out; |
@@ -999,9 +1050,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
999 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) | 1050 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) |
1000 | goto bad_fork_cleanup_count; | 1051 | goto bad_fork_cleanup_count; |
1001 | 1052 | ||
1002 | if (p->binfmt && !try_module_get(p->binfmt->module)) | ||
1003 | goto bad_fork_cleanup_put_domain; | ||
1004 | |||
1005 | p->did_exec = 0; | 1053 | p->did_exec = 0; |
1006 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1054 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1007 | copy_flags(clone_flags, p); | 1055 | copy_flags(clone_flags, p); |
@@ -1075,10 +1123,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1075 | 1123 | ||
1076 | p->bts = NULL; | 1124 | p->bts = NULL; |
1077 | 1125 | ||
1126 | p->stack_start = stack_start; | ||
1127 | |||
1078 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1128 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1079 | sched_fork(p, clone_flags); | 1129 | sched_fork(p, clone_flags); |
1080 | 1130 | ||
1081 | retval = perf_counter_init_task(p); | 1131 | retval = perf_event_init_task(p); |
1082 | if (retval) | 1132 | if (retval) |
1083 | goto bad_fork_cleanup_policy; | 1133 | goto bad_fork_cleanup_policy; |
1084 | 1134 | ||
@@ -1253,7 +1303,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1253 | write_unlock_irq(&tasklist_lock); | 1303 | write_unlock_irq(&tasklist_lock); |
1254 | proc_fork_connector(p); | 1304 | proc_fork_connector(p); |
1255 | cgroup_post_fork(p); | 1305 | cgroup_post_fork(p); |
1256 | perf_counter_fork(p); | 1306 | perf_event_fork(p); |
1257 | return p; | 1307 | return p; |
1258 | 1308 | ||
1259 | bad_fork_free_pid: | 1309 | bad_fork_free_pid: |
@@ -1280,16 +1330,13 @@ bad_fork_cleanup_semundo: | |||
1280 | bad_fork_cleanup_audit: | 1330 | bad_fork_cleanup_audit: |
1281 | audit_free(p); | 1331 | audit_free(p); |
1282 | bad_fork_cleanup_policy: | 1332 | bad_fork_cleanup_policy: |
1283 | perf_counter_free_task(p); | 1333 | perf_event_free_task(p); |
1284 | #ifdef CONFIG_NUMA | 1334 | #ifdef CONFIG_NUMA |
1285 | mpol_put(p->mempolicy); | 1335 | mpol_put(p->mempolicy); |
1286 | bad_fork_cleanup_cgroup: | 1336 | bad_fork_cleanup_cgroup: |
1287 | #endif | 1337 | #endif |
1288 | cgroup_exit(p, cgroup_callbacks_done); | 1338 | cgroup_exit(p, cgroup_callbacks_done); |
1289 | delayacct_tsk_free(p); | 1339 | delayacct_tsk_free(p); |
1290 | if (p->binfmt) | ||
1291 | module_put(p->binfmt->module); | ||
1292 | bad_fork_cleanup_put_domain: | ||
1293 | module_put(task_thread_info(p)->exec_domain->module); | 1340 | module_put(task_thread_info(p)->exec_domain->module); |
1294 | bad_fork_cleanup_count: | 1341 | bad_fork_cleanup_count: |
1295 | atomic_dec(&p->cred->user->processes); | 1342 | atomic_dec(&p->cred->user->processes); |