diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 75 |
1 files changed, 58 insertions, 17 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index bfee931ee3fb..266c6af6ef1b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/ftrace.h> | 49 | #include <linux/ftrace.h> |
50 | #include <linux/profile.h> | 50 | #include <linux/profile.h> |
51 | #include <linux/rmap.h> | 51 | #include <linux/rmap.h> |
52 | #include <linux/ksm.h> | ||
52 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
53 | #include <linux/tsacct_kern.h> | 54 | #include <linux/tsacct_kern.h> |
54 | #include <linux/cn_proc.h> | 55 | #include <linux/cn_proc.h> |
@@ -61,7 +62,8 @@ | |||
61 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
62 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
63 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
64 | #include <linux/perf_counter.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | ||
65 | 67 | ||
66 | #include <asm/pgtable.h> | 68 | #include <asm/pgtable.h> |
67 | #include <asm/pgalloc.h> | 69 | #include <asm/pgalloc.h> |
@@ -136,9 +138,17 @@ struct kmem_cache *vm_area_cachep; | |||
136 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 138 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
137 | static struct kmem_cache *mm_cachep; | 139 | static struct kmem_cache *mm_cachep; |
138 | 140 | ||
141 | static void account_kernel_stack(struct thread_info *ti, int account) | ||
142 | { | ||
143 | struct zone *zone = page_zone(virt_to_page(ti)); | ||
144 | |||
145 | mod_zone_page_state(zone, NR_KERNEL_STACK, account); | ||
146 | } | ||
147 | |||
139 | void free_task(struct task_struct *tsk) | 148 | void free_task(struct task_struct *tsk) |
140 | { | 149 | { |
141 | prop_local_destroy_single(&tsk->dirties); | 150 | prop_local_destroy_single(&tsk->dirties); |
151 | account_kernel_stack(tsk->stack, -1); | ||
142 | free_thread_info(tsk->stack); | 152 | free_thread_info(tsk->stack); |
143 | rt_mutex_debug_task_free(tsk); | 153 | rt_mutex_debug_task_free(tsk); |
144 | ftrace_graph_exit_task(tsk); | 154 | ftrace_graph_exit_task(tsk); |
@@ -253,6 +263,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
253 | tsk->btrace_seq = 0; | 263 | tsk->btrace_seq = 0; |
254 | #endif | 264 | #endif |
255 | tsk->splice_pipe = NULL; | 265 | tsk->splice_pipe = NULL; |
266 | |||
267 | account_kernel_stack(ti, 1); | ||
268 | |||
256 | return tsk; | 269 | return tsk; |
257 | 270 | ||
258 | out: | 271 | out: |
@@ -288,6 +301,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
288 | rb_link = &mm->mm_rb.rb_node; | 301 | rb_link = &mm->mm_rb.rb_node; |
289 | rb_parent = NULL; | 302 | rb_parent = NULL; |
290 | pprev = &mm->mmap; | 303 | pprev = &mm->mmap; |
304 | retval = ksm_fork(mm, oldmm); | ||
305 | if (retval) | ||
306 | goto out; | ||
291 | 307 | ||
292 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 308 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
293 | struct file *file; | 309 | struct file *file; |
@@ -418,22 +434,30 @@ __setup("coredump_filter=", coredump_filter_setup); | |||
418 | 434 | ||
419 | #include <linux/init_task.h> | 435 | #include <linux/init_task.h> |
420 | 436 | ||
437 | static void mm_init_aio(struct mm_struct *mm) | ||
438 | { | ||
439 | #ifdef CONFIG_AIO | ||
440 | spin_lock_init(&mm->ioctx_lock); | ||
441 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
442 | #endif | ||
443 | } | ||
444 | |||
421 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 445 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
422 | { | 446 | { |
423 | atomic_set(&mm->mm_users, 1); | 447 | atomic_set(&mm->mm_users, 1); |
424 | atomic_set(&mm->mm_count, 1); | 448 | atomic_set(&mm->mm_count, 1); |
425 | init_rwsem(&mm->mmap_sem); | 449 | init_rwsem(&mm->mmap_sem); |
426 | INIT_LIST_HEAD(&mm->mmlist); | 450 | INIT_LIST_HEAD(&mm->mmlist); |
427 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; | 451 | mm->flags = (current->mm) ? |
452 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | ||
428 | mm->core_state = NULL; | 453 | mm->core_state = NULL; |
429 | mm->nr_ptes = 0; | 454 | mm->nr_ptes = 0; |
430 | set_mm_counter(mm, file_rss, 0); | 455 | set_mm_counter(mm, file_rss, 0); |
431 | set_mm_counter(mm, anon_rss, 0); | 456 | set_mm_counter(mm, anon_rss, 0); |
432 | spin_lock_init(&mm->page_table_lock); | 457 | spin_lock_init(&mm->page_table_lock); |
433 | spin_lock_init(&mm->ioctx_lock); | ||
434 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
435 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 458 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
436 | mm->cached_hole_size = ~0UL; | 459 | mm->cached_hole_size = ~0UL; |
460 | mm_init_aio(mm); | ||
437 | mm_init_owner(mm, p); | 461 | mm_init_owner(mm, p); |
438 | 462 | ||
439 | if (likely(!mm_alloc_pgd(mm))) { | 463 | if (likely(!mm_alloc_pgd(mm))) { |
@@ -485,6 +509,7 @@ void mmput(struct mm_struct *mm) | |||
485 | 509 | ||
486 | if (atomic_dec_and_test(&mm->mm_users)) { | 510 | if (atomic_dec_and_test(&mm->mm_users)) { |
487 | exit_aio(mm); | 511 | exit_aio(mm); |
512 | ksm_exit(mm); | ||
488 | exit_mmap(mm); | 513 | exit_mmap(mm); |
489 | set_mm_exe_file(mm, NULL); | 514 | set_mm_exe_file(mm, NULL); |
490 | if (!list_empty(&mm->mmlist)) { | 515 | if (!list_empty(&mm->mmlist)) { |
@@ -493,6 +518,8 @@ void mmput(struct mm_struct *mm) | |||
493 | spin_unlock(&mmlist_lock); | 518 | spin_unlock(&mmlist_lock); |
494 | } | 519 | } |
495 | put_swap_token(mm); | 520 | put_swap_token(mm); |
521 | if (mm->binfmt) | ||
522 | module_put(mm->binfmt->module); | ||
496 | mmdrop(mm); | 523 | mmdrop(mm); |
497 | } | 524 | } |
498 | } | 525 | } |
@@ -618,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
618 | mm->hiwater_rss = get_mm_rss(mm); | 645 | mm->hiwater_rss = get_mm_rss(mm); |
619 | mm->hiwater_vm = mm->total_vm; | 646 | mm->hiwater_vm = mm->total_vm; |
620 | 647 | ||
648 | if (mm->binfmt && !try_module_get(mm->binfmt->module)) | ||
649 | goto free_pt; | ||
650 | |||
621 | return mm; | 651 | return mm; |
622 | 652 | ||
623 | free_pt: | 653 | free_pt: |
654 | /* don't put binfmt in mmput, we haven't got module yet */ | ||
655 | mm->binfmt = NULL; | ||
624 | mmput(mm); | 656 | mmput(mm); |
625 | 657 | ||
626 | fail_nomem: | 658 | fail_nomem: |
@@ -788,10 +820,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
788 | thread_group_cputime_init(sig); | 820 | thread_group_cputime_init(sig); |
789 | 821 | ||
790 | /* Expiration times and increments. */ | 822 | /* Expiration times and increments. */ |
791 | sig->it_virt_expires = cputime_zero; | 823 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; |
792 | sig->it_virt_incr = cputime_zero; | 824 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; |
793 | sig->it_prof_expires = cputime_zero; | 825 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; |
794 | sig->it_prof_incr = cputime_zero; | 826 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; |
795 | 827 | ||
796 | /* Cached expiration times. */ | 828 | /* Cached expiration times. */ |
797 | sig->cputime_expires.prof_exp = cputime_zero; | 829 | sig->cputime_expires.prof_exp = cputime_zero; |
@@ -849,6 +881,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
849 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 881 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
850 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 882 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
851 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 883 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
884 | sig->maxrss = sig->cmaxrss = 0; | ||
852 | task_io_accounting_init(&sig->ioac); | 885 | task_io_accounting_init(&sig->ioac); |
853 | sig->sum_sched_runtime = 0; | 886 | sig->sum_sched_runtime = 0; |
854 | taskstats_tgid_init(sig); | 887 | taskstats_tgid_init(sig); |
@@ -863,6 +896,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
863 | 896 | ||
864 | tty_audit_fork(sig); | 897 | tty_audit_fork(sig); |
865 | 898 | ||
899 | sig->oom_adj = current->signal->oom_adj; | ||
900 | |||
866 | return 0; | 901 | return 0; |
867 | } | 902 | } |
868 | 903 | ||
@@ -958,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
958 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) | 993 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) |
959 | return ERR_PTR(-EINVAL); | 994 | return ERR_PTR(-EINVAL); |
960 | 995 | ||
996 | /* | ||
997 | * Siblings of global init remain as zombies on exit since they are | ||
998 | * not reaped by their parent (swapper). To solve this and to avoid | ||
999 | * multi-rooted process trees, prevent global and container-inits | ||
1000 | * from creating siblings. | ||
1001 | */ | ||
1002 | if ((clone_flags & CLONE_PARENT) && | ||
1003 | current->signal->flags & SIGNAL_UNKILLABLE) | ||
1004 | return ERR_PTR(-EINVAL); | ||
1005 | |||
961 | retval = security_task_create(clone_flags); | 1006 | retval = security_task_create(clone_flags); |
962 | if (retval) | 1007 | if (retval) |
963 | goto fork_out; | 1008 | goto fork_out; |
@@ -999,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
999 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) | 1044 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) |
1000 | goto bad_fork_cleanup_count; | 1045 | goto bad_fork_cleanup_count; |
1001 | 1046 | ||
1002 | if (p->binfmt && !try_module_get(p->binfmt->module)) | ||
1003 | goto bad_fork_cleanup_put_domain; | ||
1004 | |||
1005 | p->did_exec = 0; | 1047 | p->did_exec = 0; |
1006 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1048 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1007 | copy_flags(clone_flags, p); | 1049 | copy_flags(clone_flags, p); |
@@ -1075,10 +1117,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1075 | 1117 | ||
1076 | p->bts = NULL; | 1118 | p->bts = NULL; |
1077 | 1119 | ||
1120 | p->stack_start = stack_start; | ||
1121 | |||
1078 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1122 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1079 | sched_fork(p, clone_flags); | 1123 | sched_fork(p, clone_flags); |
1080 | 1124 | ||
1081 | retval = perf_counter_init_task(p); | 1125 | retval = perf_event_init_task(p); |
1082 | if (retval) | 1126 | if (retval) |
1083 | goto bad_fork_cleanup_policy; | 1127 | goto bad_fork_cleanup_policy; |
1084 | 1128 | ||
@@ -1253,7 +1297,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1253 | write_unlock_irq(&tasklist_lock); | 1297 | write_unlock_irq(&tasklist_lock); |
1254 | proc_fork_connector(p); | 1298 | proc_fork_connector(p); |
1255 | cgroup_post_fork(p); | 1299 | cgroup_post_fork(p); |
1256 | perf_counter_fork(p); | 1300 | perf_event_fork(p); |
1257 | return p; | 1301 | return p; |
1258 | 1302 | ||
1259 | bad_fork_free_pid: | 1303 | bad_fork_free_pid: |
@@ -1280,16 +1324,13 @@ bad_fork_cleanup_semundo: | |||
1280 | bad_fork_cleanup_audit: | 1324 | bad_fork_cleanup_audit: |
1281 | audit_free(p); | 1325 | audit_free(p); |
1282 | bad_fork_cleanup_policy: | 1326 | bad_fork_cleanup_policy: |
1283 | perf_counter_free_task(p); | 1327 | perf_event_free_task(p); |
1284 | #ifdef CONFIG_NUMA | 1328 | #ifdef CONFIG_NUMA |
1285 | mpol_put(p->mempolicy); | 1329 | mpol_put(p->mempolicy); |
1286 | bad_fork_cleanup_cgroup: | 1330 | bad_fork_cleanup_cgroup: |
1287 | #endif | 1331 | #endif |
1288 | cgroup_exit(p, cgroup_callbacks_done); | 1332 | cgroup_exit(p, cgroup_callbacks_done); |
1289 | delayacct_tsk_free(p); | 1333 | delayacct_tsk_free(p); |
1290 | if (p->binfmt) | ||
1291 | module_put(p->binfmt->module); | ||
1292 | bad_fork_cleanup_put_domain: | ||
1293 | module_put(task_thread_info(p)->exec_domain->module); | 1334 | module_put(task_thread_info(p)->exec_domain->module); |
1294 | bad_fork_cleanup_count: | 1335 | bad_fork_cleanup_count: |
1295 | atomic_dec(&p->cred->user->processes); | 1336 | atomic_dec(&p->cred->user->processes); |