aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c108
1 files changed, 79 insertions, 29 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index e6c04d462ab2..1415dc4598ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/ftrace.h> 49#include <linux/ftrace.h>
50#include <linux/profile.h> 50#include <linux/profile.h>
51#include <linux/rmap.h> 51#include <linux/rmap.h>
52#include <linux/ksm.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
53#include <linux/tsacct_kern.h> 54#include <linux/tsacct_kern.h>
54#include <linux/cn_proc.h> 55#include <linux/cn_proc.h>
@@ -61,7 +62,9 @@
61#include <linux/blkdev.h> 62#include <linux/blkdev.h>
62#include <linux/fs_struct.h> 63#include <linux/fs_struct.h>
63#include <linux/magic.h> 64#include <linux/magic.h>
64#include <linux/perf_counter.h> 65#include <linux/perf_event.h>
66#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h>
65 68
66#include <asm/pgtable.h> 69#include <asm/pgtable.h>
67#include <asm/pgalloc.h> 70#include <asm/pgalloc.h>
@@ -89,7 +92,7 @@ int nr_processes(void)
89 int cpu; 92 int cpu;
90 int total = 0; 93 int total = 0;
91 94
92 for_each_online_cpu(cpu) 95 for_each_possible_cpu(cpu)
93 total += per_cpu(process_counts, cpu); 96 total += per_cpu(process_counts, cpu);
94 97
95 return total; 98 return total;
@@ -136,9 +139,17 @@ struct kmem_cache *vm_area_cachep;
136/* SLAB cache for mm_struct structures (tsk->mm) */ 139/* SLAB cache for mm_struct structures (tsk->mm) */
137static struct kmem_cache *mm_cachep; 140static struct kmem_cache *mm_cachep;
138 141
142static void account_kernel_stack(struct thread_info *ti, int account)
143{
144 struct zone *zone = page_zone(virt_to_page(ti));
145
146 mod_zone_page_state(zone, NR_KERNEL_STACK, account);
147}
148
139void free_task(struct task_struct *tsk) 149void free_task(struct task_struct *tsk)
140{ 150{
141 prop_local_destroy_single(&tsk->dirties); 151 prop_local_destroy_single(&tsk->dirties);
152 account_kernel_stack(tsk->stack, -1);
142 free_thread_info(tsk->stack); 153 free_thread_info(tsk->stack);
143 rt_mutex_debug_task_free(tsk); 154 rt_mutex_debug_task_free(tsk);
144 ftrace_graph_exit_task(tsk); 155 ftrace_graph_exit_task(tsk);
@@ -152,8 +163,7 @@ void __put_task_struct(struct task_struct *tsk)
152 WARN_ON(atomic_read(&tsk->usage)); 163 WARN_ON(atomic_read(&tsk->usage));
153 WARN_ON(tsk == current); 164 WARN_ON(tsk == current);
154 165
155 put_cred(tsk->real_cred); 166 exit_creds(tsk);
156 put_cred(tsk->cred);
157 delayacct_tsk_free(tsk); 167 delayacct_tsk_free(tsk);
158 168
159 if (!profile_handoff_task(tsk)) 169 if (!profile_handoff_task(tsk))
@@ -240,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
240 goto out; 250 goto out;
241 251
242 setup_thread_stack(tsk, orig); 252 setup_thread_stack(tsk, orig);
253 clear_user_return_notifier(tsk);
243 stackend = end_of_stack(tsk); 254 stackend = end_of_stack(tsk);
244 *stackend = STACK_END_MAGIC; /* for overflow detection */ 255 *stackend = STACK_END_MAGIC; /* for overflow detection */
245 256
@@ -254,6 +265,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
254 tsk->btrace_seq = 0; 265 tsk->btrace_seq = 0;
255#endif 266#endif
256 tsk->splice_pipe = NULL; 267 tsk->splice_pipe = NULL;
268
269 account_kernel_stack(ti, 1);
270
257 return tsk; 271 return tsk;
258 272
259out: 273out:
@@ -289,6 +303,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
289 rb_link = &mm->mm_rb.rb_node; 303 rb_link = &mm->mm_rb.rb_node;
290 rb_parent = NULL; 304 rb_parent = NULL;
291 pprev = &mm->mmap; 305 pprev = &mm->mmap;
306 retval = ksm_fork(mm, oldmm);
307 if (retval)
308 goto out;
292 309
293 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 310 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
294 struct file *file; 311 struct file *file;
@@ -419,22 +436,30 @@ __setup("coredump_filter=", coredump_filter_setup);
419 436
420#include <linux/init_task.h> 437#include <linux/init_task.h>
421 438
439static void mm_init_aio(struct mm_struct *mm)
440{
441#ifdef CONFIG_AIO
442 spin_lock_init(&mm->ioctx_lock);
443 INIT_HLIST_HEAD(&mm->ioctx_list);
444#endif
445}
446
422static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 447static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
423{ 448{
424 atomic_set(&mm->mm_users, 1); 449 atomic_set(&mm->mm_users, 1);
425 atomic_set(&mm->mm_count, 1); 450 atomic_set(&mm->mm_count, 1);
426 init_rwsem(&mm->mmap_sem); 451 init_rwsem(&mm->mmap_sem);
427 INIT_LIST_HEAD(&mm->mmlist); 452 INIT_LIST_HEAD(&mm->mmlist);
428 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; 453 mm->flags = (current->mm) ?
454 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
429 mm->core_state = NULL; 455 mm->core_state = NULL;
430 mm->nr_ptes = 0; 456 mm->nr_ptes = 0;
431 set_mm_counter(mm, file_rss, 0); 457 set_mm_counter(mm, file_rss, 0);
432 set_mm_counter(mm, anon_rss, 0); 458 set_mm_counter(mm, anon_rss, 0);
433 spin_lock_init(&mm->page_table_lock); 459 spin_lock_init(&mm->page_table_lock);
434 spin_lock_init(&mm->ioctx_lock);
435 INIT_HLIST_HEAD(&mm->ioctx_list);
436 mm->free_area_cache = TASK_UNMAPPED_BASE; 460 mm->free_area_cache = TASK_UNMAPPED_BASE;
437 mm->cached_hole_size = ~0UL; 461 mm->cached_hole_size = ~0UL;
462 mm_init_aio(mm);
438 mm_init_owner(mm, p); 463 mm_init_owner(mm, p);
439 464
440 if (likely(!mm_alloc_pgd(mm))) { 465 if (likely(!mm_alloc_pgd(mm))) {
@@ -486,6 +511,7 @@ void mmput(struct mm_struct *mm)
486 511
487 if (atomic_dec_and_test(&mm->mm_users)) { 512 if (atomic_dec_and_test(&mm->mm_users)) {
488 exit_aio(mm); 513 exit_aio(mm);
514 ksm_exit(mm);
489 exit_mmap(mm); 515 exit_mmap(mm);
490 set_mm_exe_file(mm, NULL); 516 set_mm_exe_file(mm, NULL);
491 if (!list_empty(&mm->mmlist)) { 517 if (!list_empty(&mm->mmlist)) {
@@ -494,6 +520,8 @@ void mmput(struct mm_struct *mm)
494 spin_unlock(&mmlist_lock); 520 spin_unlock(&mmlist_lock);
495 } 521 }
496 put_swap_token(mm); 522 put_swap_token(mm);
523 if (mm->binfmt)
524 module_put(mm->binfmt->module);
497 mmdrop(mm); 525 mmdrop(mm);
498 } 526 }
499} 527}
@@ -544,12 +572,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
544 572
545 /* Get rid of any futexes when releasing the mm */ 573 /* Get rid of any futexes when releasing the mm */
546#ifdef CONFIG_FUTEX 574#ifdef CONFIG_FUTEX
547 if (unlikely(tsk->robust_list)) 575 if (unlikely(tsk->robust_list)) {
548 exit_robust_list(tsk); 576 exit_robust_list(tsk);
577 tsk->robust_list = NULL;
578 }
549#ifdef CONFIG_COMPAT 579#ifdef CONFIG_COMPAT
550 if (unlikely(tsk->compat_robust_list)) 580 if (unlikely(tsk->compat_robust_list)) {
551 compat_exit_robust_list(tsk); 581 compat_exit_robust_list(tsk);
582 tsk->compat_robust_list = NULL;
583 }
552#endif 584#endif
585 if (unlikely(!list_empty(&tsk->pi_state_list)))
586 exit_pi_state_list(tsk);
553#endif 587#endif
554 588
555 /* Get rid of any cached register state */ 589 /* Get rid of any cached register state */
@@ -619,9 +653,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
619 mm->hiwater_rss = get_mm_rss(mm); 653 mm->hiwater_rss = get_mm_rss(mm);
620 mm->hiwater_vm = mm->total_vm; 654 mm->hiwater_vm = mm->total_vm;
621 655
656 if (mm->binfmt && !try_module_get(mm->binfmt->module))
657 goto free_pt;
658
622 return mm; 659 return mm;
623 660
624free_pt: 661free_pt:
662 /* don't put binfmt in mmput, we haven't got module yet */
663 mm->binfmt = NULL;
625 mmput(mm); 664 mmput(mm);
626 665
627fail_nomem: 666fail_nomem:
@@ -789,10 +828,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
789 thread_group_cputime_init(sig); 828 thread_group_cputime_init(sig);
790 829
791 /* Expiration times and increments. */ 830 /* Expiration times and increments. */
792 sig->it_virt_expires = cputime_zero; 831 sig->it[CPUCLOCK_PROF].expires = cputime_zero;
793 sig->it_virt_incr = cputime_zero; 832 sig->it[CPUCLOCK_PROF].incr = cputime_zero;
794 sig->it_prof_expires = cputime_zero; 833 sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
795 sig->it_prof_incr = cputime_zero; 834 sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
796 835
797 /* Cached expiration times. */ 836 /* Cached expiration times. */
798 sig->cputime_expires.prof_exp = cputime_zero; 837 sig->cputime_expires.prof_exp = cputime_zero;
@@ -847,9 +886,13 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
847 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 886 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
848 sig->gtime = cputime_zero; 887 sig->gtime = cputime_zero;
849 sig->cgtime = cputime_zero; 888 sig->cgtime = cputime_zero;
889#ifndef CONFIG_VIRT_CPU_ACCOUNTING
890 sig->prev_utime = sig->prev_stime = cputime_zero;
891#endif
850 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 892 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
851 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 893 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
852 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 894 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
895 sig->maxrss = sig->cmaxrss = 0;
853 task_io_accounting_init(&sig->ioac); 896 task_io_accounting_init(&sig->ioac);
854 sig->sum_sched_runtime = 0; 897 sig->sum_sched_runtime = 0;
855 taskstats_tgid_init(sig); 898 taskstats_tgid_init(sig);
@@ -864,6 +907,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
864 907
865 tty_audit_fork(sig); 908 tty_audit_fork(sig);
866 909
910 sig->oom_adj = current->signal->oom_adj;
911
867 return 0; 912 return 0;
868} 913}
869 914
@@ -959,6 +1004,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
959 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 1004 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
960 return ERR_PTR(-EINVAL); 1005 return ERR_PTR(-EINVAL);
961 1006
1007 /*
1008 * Siblings of global init remain as zombies on exit since they are
1009 * not reaped by their parent (swapper). To solve this and to avoid
1010 * multi-rooted process trees, prevent global and container-inits
1011 * from creating siblings.
1012 */
1013 if ((clone_flags & CLONE_PARENT) &&
1014 current->signal->flags & SIGNAL_UNKILLABLE)
1015 return ERR_PTR(-EINVAL);
1016
962 retval = security_task_create(clone_flags); 1017 retval = security_task_create(clone_flags);
963 if (retval) 1018 if (retval)
964 goto fork_out; 1019 goto fork_out;
@@ -1000,18 +1055,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1000 if (!try_module_get(task_thread_info(p)->exec_domain->module)) 1055 if (!try_module_get(task_thread_info(p)->exec_domain->module))
1001 goto bad_fork_cleanup_count; 1056 goto bad_fork_cleanup_count;
1002 1057
1003 if (p->binfmt && !try_module_get(p->binfmt->module))
1004 goto bad_fork_cleanup_put_domain;
1005
1006 p->did_exec = 0; 1058 p->did_exec = 0;
1007 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1059 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1008 copy_flags(clone_flags, p); 1060 copy_flags(clone_flags, p);
1009 INIT_LIST_HEAD(&p->children); 1061 INIT_LIST_HEAD(&p->children);
1010 INIT_LIST_HEAD(&p->sibling); 1062 INIT_LIST_HEAD(&p->sibling);
1011#ifdef CONFIG_PREEMPT_RCU 1063 rcu_copy_process(p);
1012 p->rcu_read_lock_nesting = 0;
1013 p->rcu_flipctr_idx = 0;
1014#endif /* #ifdef CONFIG_PREEMPT_RCU */
1015 p->vfork_done = NULL; 1064 p->vfork_done = NULL;
1016 spin_lock_init(&p->alloc_lock); 1065 spin_lock_init(&p->alloc_lock);
1017 1066
@@ -1022,8 +1071,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1022 p->gtime = cputime_zero; 1071 p->gtime = cputime_zero;
1023 p->utimescaled = cputime_zero; 1072 p->utimescaled = cputime_zero;
1024 p->stimescaled = cputime_zero; 1073 p->stimescaled = cputime_zero;
1074#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1025 p->prev_utime = cputime_zero; 1075 p->prev_utime = cputime_zero;
1026 p->prev_stime = cputime_zero; 1076 p->prev_stime = cputime_zero;
1077#endif
1027 1078
1028 p->default_timer_slack_ns = current->timer_slack_ns; 1079 p->default_timer_slack_ns = current->timer_slack_ns;
1029 1080
@@ -1079,10 +1130,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1079 1130
1080 p->bts = NULL; 1131 p->bts = NULL;
1081 1132
1133 p->stack_start = stack_start;
1134
1082 /* Perform scheduler related setup. Assign this task to a CPU. */ 1135 /* Perform scheduler related setup. Assign this task to a CPU. */
1083 sched_fork(p, clone_flags); 1136 sched_fork(p, clone_flags);
1084 1137
1085 retval = perf_counter_init_task(p); 1138 retval = perf_event_init_task(p);
1086 if (retval) 1139 if (retval)
1087 goto bad_fork_cleanup_policy; 1140 goto bad_fork_cleanup_policy;
1088 1141
@@ -1257,14 +1310,15 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1257 write_unlock_irq(&tasklist_lock); 1310 write_unlock_irq(&tasklist_lock);
1258 proc_fork_connector(p); 1311 proc_fork_connector(p);
1259 cgroup_post_fork(p); 1312 cgroup_post_fork(p);
1260 perf_counter_fork(p); 1313 perf_event_fork(p);
1261 return p; 1314 return p;
1262 1315
1263bad_fork_free_pid: 1316bad_fork_free_pid:
1264 if (pid != &init_struct_pid) 1317 if (pid != &init_struct_pid)
1265 free_pid(pid); 1318 free_pid(pid);
1266bad_fork_cleanup_io: 1319bad_fork_cleanup_io:
1267 put_io_context(p->io_context); 1320 if (p->io_context)
1321 exit_io_context(p);
1268bad_fork_cleanup_namespaces: 1322bad_fork_cleanup_namespaces:
1269 exit_task_namespaces(p); 1323 exit_task_namespaces(p);
1270bad_fork_cleanup_mm: 1324bad_fork_cleanup_mm:
@@ -1284,21 +1338,17 @@ bad_fork_cleanup_semundo:
1284bad_fork_cleanup_audit: 1338bad_fork_cleanup_audit:
1285 audit_free(p); 1339 audit_free(p);
1286bad_fork_cleanup_policy: 1340bad_fork_cleanup_policy:
1287 perf_counter_free_task(p); 1341 perf_event_free_task(p);
1288#ifdef CONFIG_NUMA 1342#ifdef CONFIG_NUMA
1289 mpol_put(p->mempolicy); 1343 mpol_put(p->mempolicy);
1290bad_fork_cleanup_cgroup: 1344bad_fork_cleanup_cgroup:
1291#endif 1345#endif
1292 cgroup_exit(p, cgroup_callbacks_done); 1346 cgroup_exit(p, cgroup_callbacks_done);
1293 delayacct_tsk_free(p); 1347 delayacct_tsk_free(p);
1294 if (p->binfmt)
1295 module_put(p->binfmt->module);
1296bad_fork_cleanup_put_domain:
1297 module_put(task_thread_info(p)->exec_domain->module); 1348 module_put(task_thread_info(p)->exec_domain->module);
1298bad_fork_cleanup_count: 1349bad_fork_cleanup_count:
1299 atomic_dec(&p->cred->user->processes); 1350 atomic_dec(&p->cred->user->processes);
1300 put_cred(p->real_cred); 1351 exit_creds(p);
1301 put_cred(p->cred);
1302bad_fork_free: 1352bad_fork_free:
1303 free_task(p); 1353 free_task(p);
1304fork_out: 1354fork_out: