aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c85
1 files changed, 66 insertions, 19 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index bfee931ee3fb..4c20fff8c13a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/ftrace.h> 49#include <linux/ftrace.h>
50#include <linux/profile.h> 50#include <linux/profile.h>
51#include <linux/rmap.h> 51#include <linux/rmap.h>
52#include <linux/ksm.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
53#include <linux/tsacct_kern.h> 54#include <linux/tsacct_kern.h>
54#include <linux/cn_proc.h> 55#include <linux/cn_proc.h>
@@ -61,7 +62,8 @@
61#include <linux/blkdev.h> 62#include <linux/blkdev.h>
62#include <linux/fs_struct.h> 63#include <linux/fs_struct.h>
63#include <linux/magic.h> 64#include <linux/magic.h>
64#include <linux/perf_counter.h> 65#include <linux/perf_event.h>
66#include <linux/posix-timers.h>
65 67
66#include <asm/pgtable.h> 68#include <asm/pgtable.h>
67#include <asm/pgalloc.h> 69#include <asm/pgalloc.h>
@@ -136,9 +138,17 @@ struct kmem_cache *vm_area_cachep;
136/* SLAB cache for mm_struct structures (tsk->mm) */ 138/* SLAB cache for mm_struct structures (tsk->mm) */
137static struct kmem_cache *mm_cachep; 139static struct kmem_cache *mm_cachep;
138 140
141static void account_kernel_stack(struct thread_info *ti, int account)
142{
143 struct zone *zone = page_zone(virt_to_page(ti));
144
145 mod_zone_page_state(zone, NR_KERNEL_STACK, account);
146}
147
139void free_task(struct task_struct *tsk) 148void free_task(struct task_struct *tsk)
140{ 149{
141 prop_local_destroy_single(&tsk->dirties); 150 prop_local_destroy_single(&tsk->dirties);
151 account_kernel_stack(tsk->stack, -1);
142 free_thread_info(tsk->stack); 152 free_thread_info(tsk->stack);
143 rt_mutex_debug_task_free(tsk); 153 rt_mutex_debug_task_free(tsk);
144 ftrace_graph_exit_task(tsk); 154 ftrace_graph_exit_task(tsk);
@@ -253,6 +263,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
253 tsk->btrace_seq = 0; 263 tsk->btrace_seq = 0;
254#endif 264#endif
255 tsk->splice_pipe = NULL; 265 tsk->splice_pipe = NULL;
266
267 account_kernel_stack(ti, 1);
268
256 return tsk; 269 return tsk;
257 270
258out: 271out:
@@ -288,6 +301,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
288 rb_link = &mm->mm_rb.rb_node; 301 rb_link = &mm->mm_rb.rb_node;
289 rb_parent = NULL; 302 rb_parent = NULL;
290 pprev = &mm->mmap; 303 pprev = &mm->mmap;
304 retval = ksm_fork(mm, oldmm);
305 if (retval)
306 goto out;
291 307
292 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 308 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
293 struct file *file; 309 struct file *file;
@@ -418,22 +434,30 @@ __setup("coredump_filter=", coredump_filter_setup);
418 434
419#include <linux/init_task.h> 435#include <linux/init_task.h>
420 436
437static void mm_init_aio(struct mm_struct *mm)
438{
439#ifdef CONFIG_AIO
440 spin_lock_init(&mm->ioctx_lock);
441 INIT_HLIST_HEAD(&mm->ioctx_list);
442#endif
443}
444
421static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 445static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
422{ 446{
423 atomic_set(&mm->mm_users, 1); 447 atomic_set(&mm->mm_users, 1);
424 atomic_set(&mm->mm_count, 1); 448 atomic_set(&mm->mm_count, 1);
425 init_rwsem(&mm->mmap_sem); 449 init_rwsem(&mm->mmap_sem);
426 INIT_LIST_HEAD(&mm->mmlist); 450 INIT_LIST_HEAD(&mm->mmlist);
427 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; 451 mm->flags = (current->mm) ?
452 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
428 mm->core_state = NULL; 453 mm->core_state = NULL;
429 mm->nr_ptes = 0; 454 mm->nr_ptes = 0;
430 set_mm_counter(mm, file_rss, 0); 455 set_mm_counter(mm, file_rss, 0);
431 set_mm_counter(mm, anon_rss, 0); 456 set_mm_counter(mm, anon_rss, 0);
432 spin_lock_init(&mm->page_table_lock); 457 spin_lock_init(&mm->page_table_lock);
433 spin_lock_init(&mm->ioctx_lock);
434 INIT_HLIST_HEAD(&mm->ioctx_list);
435 mm->free_area_cache = TASK_UNMAPPED_BASE; 458 mm->free_area_cache = TASK_UNMAPPED_BASE;
436 mm->cached_hole_size = ~0UL; 459 mm->cached_hole_size = ~0UL;
460 mm_init_aio(mm);
437 mm_init_owner(mm, p); 461 mm_init_owner(mm, p);
438 462
439 if (likely(!mm_alloc_pgd(mm))) { 463 if (likely(!mm_alloc_pgd(mm))) {
@@ -485,6 +509,7 @@ void mmput(struct mm_struct *mm)
485 509
486 if (atomic_dec_and_test(&mm->mm_users)) { 510 if (atomic_dec_and_test(&mm->mm_users)) {
487 exit_aio(mm); 511 exit_aio(mm);
512 ksm_exit(mm);
488 exit_mmap(mm); 513 exit_mmap(mm);
489 set_mm_exe_file(mm, NULL); 514 set_mm_exe_file(mm, NULL);
490 if (!list_empty(&mm->mmlist)) { 515 if (!list_empty(&mm->mmlist)) {
@@ -493,6 +518,8 @@ void mmput(struct mm_struct *mm)
493 spin_unlock(&mmlist_lock); 518 spin_unlock(&mmlist_lock);
494 } 519 }
495 put_swap_token(mm); 520 put_swap_token(mm);
521 if (mm->binfmt)
522 module_put(mm->binfmt->module);
496 mmdrop(mm); 523 mmdrop(mm);
497 } 524 }
498} 525}
@@ -543,12 +570,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
543 570
544 /* Get rid of any futexes when releasing the mm */ 571 /* Get rid of any futexes when releasing the mm */
545#ifdef CONFIG_FUTEX 572#ifdef CONFIG_FUTEX
546 if (unlikely(tsk->robust_list)) 573 if (unlikely(tsk->robust_list)) {
547 exit_robust_list(tsk); 574 exit_robust_list(tsk);
575 tsk->robust_list = NULL;
576 }
548#ifdef CONFIG_COMPAT 577#ifdef CONFIG_COMPAT
549 if (unlikely(tsk->compat_robust_list)) 578 if (unlikely(tsk->compat_robust_list)) {
550 compat_exit_robust_list(tsk); 579 compat_exit_robust_list(tsk);
580 tsk->compat_robust_list = NULL;
581 }
551#endif 582#endif
583 if (unlikely(!list_empty(&tsk->pi_state_list)))
584 exit_pi_state_list(tsk);
552#endif 585#endif
553 586
554 /* Get rid of any cached register state */ 587 /* Get rid of any cached register state */
@@ -618,9 +651,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
618 mm->hiwater_rss = get_mm_rss(mm); 651 mm->hiwater_rss = get_mm_rss(mm);
619 mm->hiwater_vm = mm->total_vm; 652 mm->hiwater_vm = mm->total_vm;
620 653
654 if (mm->binfmt && !try_module_get(mm->binfmt->module))
655 goto free_pt;
656
621 return mm; 657 return mm;
622 658
623free_pt: 659free_pt:
660 /* don't put binfmt in mmput, we haven't got module yet */
661 mm->binfmt = NULL;
624 mmput(mm); 662 mmput(mm);
625 663
626fail_nomem: 664fail_nomem:
@@ -788,10 +826,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
788 thread_group_cputime_init(sig); 826 thread_group_cputime_init(sig);
789 827
790 /* Expiration times and increments. */ 828 /* Expiration times and increments. */
791 sig->it_virt_expires = cputime_zero; 829 sig->it[CPUCLOCK_PROF].expires = cputime_zero;
792 sig->it_virt_incr = cputime_zero; 830 sig->it[CPUCLOCK_PROF].incr = cputime_zero;
793 sig->it_prof_expires = cputime_zero; 831 sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
794 sig->it_prof_incr = cputime_zero; 832 sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
795 833
796 /* Cached expiration times. */ 834 /* Cached expiration times. */
797 sig->cputime_expires.prof_exp = cputime_zero; 835 sig->cputime_expires.prof_exp = cputime_zero;
@@ -849,6 +887,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
849 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 887 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
850 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 888 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
851 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 889 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
890 sig->maxrss = sig->cmaxrss = 0;
852 task_io_accounting_init(&sig->ioac); 891 task_io_accounting_init(&sig->ioac);
853 sig->sum_sched_runtime = 0; 892 sig->sum_sched_runtime = 0;
854 taskstats_tgid_init(sig); 893 taskstats_tgid_init(sig);
@@ -863,6 +902,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
863 902
864 tty_audit_fork(sig); 903 tty_audit_fork(sig);
865 904
905 sig->oom_adj = current->signal->oom_adj;
906
866 return 0; 907 return 0;
867} 908}
868 909
@@ -958,6 +999,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
958 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 999 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
959 return ERR_PTR(-EINVAL); 1000 return ERR_PTR(-EINVAL);
960 1001
1002 /*
1003 * Siblings of global init remain as zombies on exit since they are
1004 * not reaped by their parent (swapper). To solve this and to avoid
1005 * multi-rooted process trees, prevent global and container-inits
1006 * from creating siblings.
1007 */
1008 if ((clone_flags & CLONE_PARENT) &&
1009 current->signal->flags & SIGNAL_UNKILLABLE)
1010 return ERR_PTR(-EINVAL);
1011
961 retval = security_task_create(clone_flags); 1012 retval = security_task_create(clone_flags);
962 if (retval) 1013 if (retval)
963 goto fork_out; 1014 goto fork_out;
@@ -999,9 +1050,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
999 if (!try_module_get(task_thread_info(p)->exec_domain->module)) 1050 if (!try_module_get(task_thread_info(p)->exec_domain->module))
1000 goto bad_fork_cleanup_count; 1051 goto bad_fork_cleanup_count;
1001 1052
1002 if (p->binfmt && !try_module_get(p->binfmt->module))
1003 goto bad_fork_cleanup_put_domain;
1004
1005 p->did_exec = 0; 1053 p->did_exec = 0;
1006 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1054 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1007 copy_flags(clone_flags, p); 1055 copy_flags(clone_flags, p);
@@ -1075,10 +1123,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1075 1123
1076 p->bts = NULL; 1124 p->bts = NULL;
1077 1125
1126 p->stack_start = stack_start;
1127
1078 /* Perform scheduler related setup. Assign this task to a CPU. */ 1128 /* Perform scheduler related setup. Assign this task to a CPU. */
1079 sched_fork(p, clone_flags); 1129 sched_fork(p, clone_flags);
1080 1130
1081 retval = perf_counter_init_task(p); 1131 retval = perf_event_init_task(p);
1082 if (retval) 1132 if (retval)
1083 goto bad_fork_cleanup_policy; 1133 goto bad_fork_cleanup_policy;
1084 1134
@@ -1253,7 +1303,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1253 write_unlock_irq(&tasklist_lock); 1303 write_unlock_irq(&tasklist_lock);
1254 proc_fork_connector(p); 1304 proc_fork_connector(p);
1255 cgroup_post_fork(p); 1305 cgroup_post_fork(p);
1256 perf_counter_fork(p); 1306 perf_event_fork(p);
1257 return p; 1307 return p;
1258 1308
1259bad_fork_free_pid: 1309bad_fork_free_pid:
@@ -1280,16 +1330,13 @@ bad_fork_cleanup_semundo:
1280bad_fork_cleanup_audit: 1330bad_fork_cleanup_audit:
1281 audit_free(p); 1331 audit_free(p);
1282bad_fork_cleanup_policy: 1332bad_fork_cleanup_policy:
1283 perf_counter_free_task(p); 1333 perf_event_free_task(p);
1284#ifdef CONFIG_NUMA 1334#ifdef CONFIG_NUMA
1285 mpol_put(p->mempolicy); 1335 mpol_put(p->mempolicy);
1286bad_fork_cleanup_cgroup: 1336bad_fork_cleanup_cgroup:
1287#endif 1337#endif
1288 cgroup_exit(p, cgroup_callbacks_done); 1338 cgroup_exit(p, cgroup_callbacks_done);
1289 delayacct_tsk_free(p); 1339 delayacct_tsk_free(p);
1290 if (p->binfmt)
1291 module_put(p->binfmt->module);
1292bad_fork_cleanup_put_domain:
1293 module_put(task_thread_info(p)->exec_domain->module); 1340 module_put(task_thread_info(p)->exec_domain->module);
1294bad_fork_cleanup_count: 1341bad_fork_cleanup_count:
1295 atomic_dec(&p->cred->user->processes); 1342 atomic_dec(&p->cred->user->processes);