diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 108 |
1 files changed, 79 insertions, 29 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index e6c04d462ab2..1415dc4598ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/ftrace.h> | 49 | #include <linux/ftrace.h> |
| 50 | #include <linux/profile.h> | 50 | #include <linux/profile.h> |
| 51 | #include <linux/rmap.h> | 51 | #include <linux/rmap.h> |
| 52 | #include <linux/ksm.h> | ||
| 52 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
| 53 | #include <linux/tsacct_kern.h> | 54 | #include <linux/tsacct_kern.h> |
| 54 | #include <linux/cn_proc.h> | 55 | #include <linux/cn_proc.h> |
| @@ -61,7 +62,9 @@ | |||
| 61 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
| 62 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
| 63 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
| 64 | #include <linux/perf_counter.h> | 65 | #include <linux/perf_event.h> |
| 66 | #include <linux/posix-timers.h> | ||
| 67 | #include <linux/user-return-notifier.h> | ||
| 65 | 68 | ||
| 66 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
| 67 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
| @@ -89,7 +92,7 @@ int nr_processes(void) | |||
| 89 | int cpu; | 92 | int cpu; |
| 90 | int total = 0; | 93 | int total = 0; |
| 91 | 94 | ||
| 92 | for_each_online_cpu(cpu) | 95 | for_each_possible_cpu(cpu) |
| 93 | total += per_cpu(process_counts, cpu); | 96 | total += per_cpu(process_counts, cpu); |
| 94 | 97 | ||
| 95 | return total; | 98 | return total; |
| @@ -136,9 +139,17 @@ struct kmem_cache *vm_area_cachep; | |||
| 136 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 139 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
| 137 | static struct kmem_cache *mm_cachep; | 140 | static struct kmem_cache *mm_cachep; |
| 138 | 141 | ||
| 142 | static void account_kernel_stack(struct thread_info *ti, int account) | ||
| 143 | { | ||
| 144 | struct zone *zone = page_zone(virt_to_page(ti)); | ||
| 145 | |||
| 146 | mod_zone_page_state(zone, NR_KERNEL_STACK, account); | ||
| 147 | } | ||
| 148 | |||
| 139 | void free_task(struct task_struct *tsk) | 149 | void free_task(struct task_struct *tsk) |
| 140 | { | 150 | { |
| 141 | prop_local_destroy_single(&tsk->dirties); | 151 | prop_local_destroy_single(&tsk->dirties); |
| 152 | account_kernel_stack(tsk->stack, -1); | ||
| 142 | free_thread_info(tsk->stack); | 153 | free_thread_info(tsk->stack); |
| 143 | rt_mutex_debug_task_free(tsk); | 154 | rt_mutex_debug_task_free(tsk); |
| 144 | ftrace_graph_exit_task(tsk); | 155 | ftrace_graph_exit_task(tsk); |
| @@ -152,8 +163,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
| 152 | WARN_ON(atomic_read(&tsk->usage)); | 163 | WARN_ON(atomic_read(&tsk->usage)); |
| 153 | WARN_ON(tsk == current); | 164 | WARN_ON(tsk == current); |
| 154 | 165 | ||
| 155 | put_cred(tsk->real_cred); | 166 | exit_creds(tsk); |
| 156 | put_cred(tsk->cred); | ||
| 157 | delayacct_tsk_free(tsk); | 167 | delayacct_tsk_free(tsk); |
| 158 | 168 | ||
| 159 | if (!profile_handoff_task(tsk)) | 169 | if (!profile_handoff_task(tsk)) |
| @@ -240,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 240 | goto out; | 250 | goto out; |
| 241 | 251 | ||
| 242 | setup_thread_stack(tsk, orig); | 252 | setup_thread_stack(tsk, orig); |
| 253 | clear_user_return_notifier(tsk); | ||
| 243 | stackend = end_of_stack(tsk); | 254 | stackend = end_of_stack(tsk); |
| 244 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 255 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
| 245 | 256 | ||
| @@ -254,6 +265,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 254 | tsk->btrace_seq = 0; | 265 | tsk->btrace_seq = 0; |
| 255 | #endif | 266 | #endif |
| 256 | tsk->splice_pipe = NULL; | 267 | tsk->splice_pipe = NULL; |
| 268 | |||
| 269 | account_kernel_stack(ti, 1); | ||
| 270 | |||
| 257 | return tsk; | 271 | return tsk; |
| 258 | 272 | ||
| 259 | out: | 273 | out: |
| @@ -289,6 +303,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 289 | rb_link = &mm->mm_rb.rb_node; | 303 | rb_link = &mm->mm_rb.rb_node; |
| 290 | rb_parent = NULL; | 304 | rb_parent = NULL; |
| 291 | pprev = &mm->mmap; | 305 | pprev = &mm->mmap; |
| 306 | retval = ksm_fork(mm, oldmm); | ||
| 307 | if (retval) | ||
| 308 | goto out; | ||
| 292 | 309 | ||
| 293 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 310 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
| 294 | struct file *file; | 311 | struct file *file; |
| @@ -419,22 +436,30 @@ __setup("coredump_filter=", coredump_filter_setup); | |||
| 419 | 436 | ||
| 420 | #include <linux/init_task.h> | 437 | #include <linux/init_task.h> |
| 421 | 438 | ||
| 439 | static void mm_init_aio(struct mm_struct *mm) | ||
| 440 | { | ||
| 441 | #ifdef CONFIG_AIO | ||
| 442 | spin_lock_init(&mm->ioctx_lock); | ||
| 443 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
| 444 | #endif | ||
| 445 | } | ||
| 446 | |||
| 422 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 447 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
| 423 | { | 448 | { |
| 424 | atomic_set(&mm->mm_users, 1); | 449 | atomic_set(&mm->mm_users, 1); |
| 425 | atomic_set(&mm->mm_count, 1); | 450 | atomic_set(&mm->mm_count, 1); |
| 426 | init_rwsem(&mm->mmap_sem); | 451 | init_rwsem(&mm->mmap_sem); |
| 427 | INIT_LIST_HEAD(&mm->mmlist); | 452 | INIT_LIST_HEAD(&mm->mmlist); |
| 428 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; | 453 | mm->flags = (current->mm) ? |
| 454 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | ||
| 429 | mm->core_state = NULL; | 455 | mm->core_state = NULL; |
| 430 | mm->nr_ptes = 0; | 456 | mm->nr_ptes = 0; |
| 431 | set_mm_counter(mm, file_rss, 0); | 457 | set_mm_counter(mm, file_rss, 0); |
| 432 | set_mm_counter(mm, anon_rss, 0); | 458 | set_mm_counter(mm, anon_rss, 0); |
| 433 | spin_lock_init(&mm->page_table_lock); | 459 | spin_lock_init(&mm->page_table_lock); |
| 434 | spin_lock_init(&mm->ioctx_lock); | ||
| 435 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
| 436 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 460 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
| 437 | mm->cached_hole_size = ~0UL; | 461 | mm->cached_hole_size = ~0UL; |
| 462 | mm_init_aio(mm); | ||
| 438 | mm_init_owner(mm, p); | 463 | mm_init_owner(mm, p); |
| 439 | 464 | ||
| 440 | if (likely(!mm_alloc_pgd(mm))) { | 465 | if (likely(!mm_alloc_pgd(mm))) { |
| @@ -486,6 +511,7 @@ void mmput(struct mm_struct *mm) | |||
| 486 | 511 | ||
| 487 | if (atomic_dec_and_test(&mm->mm_users)) { | 512 | if (atomic_dec_and_test(&mm->mm_users)) { |
| 488 | exit_aio(mm); | 513 | exit_aio(mm); |
| 514 | ksm_exit(mm); | ||
| 489 | exit_mmap(mm); | 515 | exit_mmap(mm); |
| 490 | set_mm_exe_file(mm, NULL); | 516 | set_mm_exe_file(mm, NULL); |
| 491 | if (!list_empty(&mm->mmlist)) { | 517 | if (!list_empty(&mm->mmlist)) { |
| @@ -494,6 +520,8 @@ void mmput(struct mm_struct *mm) | |||
| 494 | spin_unlock(&mmlist_lock); | 520 | spin_unlock(&mmlist_lock); |
| 495 | } | 521 | } |
| 496 | put_swap_token(mm); | 522 | put_swap_token(mm); |
| 523 | if (mm->binfmt) | ||
| 524 | module_put(mm->binfmt->module); | ||
| 497 | mmdrop(mm); | 525 | mmdrop(mm); |
| 498 | } | 526 | } |
| 499 | } | 527 | } |
| @@ -544,12 +572,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
| 544 | 572 | ||
| 545 | /* Get rid of any futexes when releasing the mm */ | 573 | /* Get rid of any futexes when releasing the mm */ |
| 546 | #ifdef CONFIG_FUTEX | 574 | #ifdef CONFIG_FUTEX |
| 547 | if (unlikely(tsk->robust_list)) | 575 | if (unlikely(tsk->robust_list)) { |
| 548 | exit_robust_list(tsk); | 576 | exit_robust_list(tsk); |
| 577 | tsk->robust_list = NULL; | ||
| 578 | } | ||
| 549 | #ifdef CONFIG_COMPAT | 579 | #ifdef CONFIG_COMPAT |
| 550 | if (unlikely(tsk->compat_robust_list)) | 580 | if (unlikely(tsk->compat_robust_list)) { |
| 551 | compat_exit_robust_list(tsk); | 581 | compat_exit_robust_list(tsk); |
| 582 | tsk->compat_robust_list = NULL; | ||
| 583 | } | ||
| 552 | #endif | 584 | #endif |
| 585 | if (unlikely(!list_empty(&tsk->pi_state_list))) | ||
| 586 | exit_pi_state_list(tsk); | ||
| 553 | #endif | 587 | #endif |
| 554 | 588 | ||
| 555 | /* Get rid of any cached register state */ | 589 | /* Get rid of any cached register state */ |
| @@ -619,9 +653,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
| 619 | mm->hiwater_rss = get_mm_rss(mm); | 653 | mm->hiwater_rss = get_mm_rss(mm); |
| 620 | mm->hiwater_vm = mm->total_vm; | 654 | mm->hiwater_vm = mm->total_vm; |
| 621 | 655 | ||
| 656 | if (mm->binfmt && !try_module_get(mm->binfmt->module)) | ||
| 657 | goto free_pt; | ||
| 658 | |||
| 622 | return mm; | 659 | return mm; |
| 623 | 660 | ||
| 624 | free_pt: | 661 | free_pt: |
| 662 | /* don't put binfmt in mmput, we haven't got module yet */ | ||
| 663 | mm->binfmt = NULL; | ||
| 625 | mmput(mm); | 664 | mmput(mm); |
| 626 | 665 | ||
| 627 | fail_nomem: | 666 | fail_nomem: |
| @@ -789,10 +828,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
| 789 | thread_group_cputime_init(sig); | 828 | thread_group_cputime_init(sig); |
| 790 | 829 | ||
| 791 | /* Expiration times and increments. */ | 830 | /* Expiration times and increments. */ |
| 792 | sig->it_virt_expires = cputime_zero; | 831 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; |
| 793 | sig->it_virt_incr = cputime_zero; | 832 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; |
| 794 | sig->it_prof_expires = cputime_zero; | 833 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; |
| 795 | sig->it_prof_incr = cputime_zero; | 834 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; |
| 796 | 835 | ||
| 797 | /* Cached expiration times. */ | 836 | /* Cached expiration times. */ |
| 798 | sig->cputime_expires.prof_exp = cputime_zero; | 837 | sig->cputime_expires.prof_exp = cputime_zero; |
| @@ -847,9 +886,13 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 847 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | 886 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; |
| 848 | sig->gtime = cputime_zero; | 887 | sig->gtime = cputime_zero; |
| 849 | sig->cgtime = cputime_zero; | 888 | sig->cgtime = cputime_zero; |
| 889 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 890 | sig->prev_utime = sig->prev_stime = cputime_zero; | ||
| 891 | #endif | ||
| 850 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 892 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
| 851 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 893 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
| 852 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 894 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
| 895 | sig->maxrss = sig->cmaxrss = 0; | ||
| 853 | task_io_accounting_init(&sig->ioac); | 896 | task_io_accounting_init(&sig->ioac); |
| 854 | sig->sum_sched_runtime = 0; | 897 | sig->sum_sched_runtime = 0; |
| 855 | taskstats_tgid_init(sig); | 898 | taskstats_tgid_init(sig); |
| @@ -864,6 +907,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 864 | 907 | ||
| 865 | tty_audit_fork(sig); | 908 | tty_audit_fork(sig); |
| 866 | 909 | ||
| 910 | sig->oom_adj = current->signal->oom_adj; | ||
| 911 | |||
| 867 | return 0; | 912 | return 0; |
| 868 | } | 913 | } |
| 869 | 914 | ||
| @@ -959,6 +1004,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 959 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) | 1004 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) |
| 960 | return ERR_PTR(-EINVAL); | 1005 | return ERR_PTR(-EINVAL); |
| 961 | 1006 | ||
| 1007 | /* | ||
| 1008 | * Siblings of global init remain as zombies on exit since they are | ||
| 1009 | * not reaped by their parent (swapper). To solve this and to avoid | ||
| 1010 | * multi-rooted process trees, prevent global and container-inits | ||
| 1011 | * from creating siblings. | ||
| 1012 | */ | ||
| 1013 | if ((clone_flags & CLONE_PARENT) && | ||
| 1014 | current->signal->flags & SIGNAL_UNKILLABLE) | ||
| 1015 | return ERR_PTR(-EINVAL); | ||
| 1016 | |||
| 962 | retval = security_task_create(clone_flags); | 1017 | retval = security_task_create(clone_flags); |
| 963 | if (retval) | 1018 | if (retval) |
| 964 | goto fork_out; | 1019 | goto fork_out; |
| @@ -1000,18 +1055,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1000 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) | 1055 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) |
| 1001 | goto bad_fork_cleanup_count; | 1056 | goto bad_fork_cleanup_count; |
| 1002 | 1057 | ||
| 1003 | if (p->binfmt && !try_module_get(p->binfmt->module)) | ||
| 1004 | goto bad_fork_cleanup_put_domain; | ||
| 1005 | |||
| 1006 | p->did_exec = 0; | 1058 | p->did_exec = 0; |
| 1007 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1059 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
| 1008 | copy_flags(clone_flags, p); | 1060 | copy_flags(clone_flags, p); |
| 1009 | INIT_LIST_HEAD(&p->children); | 1061 | INIT_LIST_HEAD(&p->children); |
| 1010 | INIT_LIST_HEAD(&p->sibling); | 1062 | INIT_LIST_HEAD(&p->sibling); |
| 1011 | #ifdef CONFIG_PREEMPT_RCU | 1063 | rcu_copy_process(p); |
| 1012 | p->rcu_read_lock_nesting = 0; | ||
| 1013 | p->rcu_flipctr_idx = 0; | ||
| 1014 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
| 1015 | p->vfork_done = NULL; | 1064 | p->vfork_done = NULL; |
| 1016 | spin_lock_init(&p->alloc_lock); | 1065 | spin_lock_init(&p->alloc_lock); |
| 1017 | 1066 | ||
| @@ -1022,8 +1071,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1022 | p->gtime = cputime_zero; | 1071 | p->gtime = cputime_zero; |
| 1023 | p->utimescaled = cputime_zero; | 1072 | p->utimescaled = cputime_zero; |
| 1024 | p->stimescaled = cputime_zero; | 1073 | p->stimescaled = cputime_zero; |
| 1074 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 1025 | p->prev_utime = cputime_zero; | 1075 | p->prev_utime = cputime_zero; |
| 1026 | p->prev_stime = cputime_zero; | 1076 | p->prev_stime = cputime_zero; |
| 1077 | #endif | ||
| 1027 | 1078 | ||
| 1028 | p->default_timer_slack_ns = current->timer_slack_ns; | 1079 | p->default_timer_slack_ns = current->timer_slack_ns; |
| 1029 | 1080 | ||
| @@ -1079,10 +1130,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1079 | 1130 | ||
| 1080 | p->bts = NULL; | 1131 | p->bts = NULL; |
| 1081 | 1132 | ||
| 1133 | p->stack_start = stack_start; | ||
| 1134 | |||
| 1082 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1135 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
| 1083 | sched_fork(p, clone_flags); | 1136 | sched_fork(p, clone_flags); |
| 1084 | 1137 | ||
| 1085 | retval = perf_counter_init_task(p); | 1138 | retval = perf_event_init_task(p); |
| 1086 | if (retval) | 1139 | if (retval) |
| 1087 | goto bad_fork_cleanup_policy; | 1140 | goto bad_fork_cleanup_policy; |
| 1088 | 1141 | ||
| @@ -1257,14 +1310,15 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1257 | write_unlock_irq(&tasklist_lock); | 1310 | write_unlock_irq(&tasklist_lock); |
| 1258 | proc_fork_connector(p); | 1311 | proc_fork_connector(p); |
| 1259 | cgroup_post_fork(p); | 1312 | cgroup_post_fork(p); |
| 1260 | perf_counter_fork(p); | 1313 | perf_event_fork(p); |
| 1261 | return p; | 1314 | return p; |
| 1262 | 1315 | ||
| 1263 | bad_fork_free_pid: | 1316 | bad_fork_free_pid: |
| 1264 | if (pid != &init_struct_pid) | 1317 | if (pid != &init_struct_pid) |
| 1265 | free_pid(pid); | 1318 | free_pid(pid); |
| 1266 | bad_fork_cleanup_io: | 1319 | bad_fork_cleanup_io: |
| 1267 | put_io_context(p->io_context); | 1320 | if (p->io_context) |
| 1321 | exit_io_context(p); | ||
| 1268 | bad_fork_cleanup_namespaces: | 1322 | bad_fork_cleanup_namespaces: |
| 1269 | exit_task_namespaces(p); | 1323 | exit_task_namespaces(p); |
| 1270 | bad_fork_cleanup_mm: | 1324 | bad_fork_cleanup_mm: |
| @@ -1284,21 +1338,17 @@ bad_fork_cleanup_semundo: | |||
| 1284 | bad_fork_cleanup_audit: | 1338 | bad_fork_cleanup_audit: |
| 1285 | audit_free(p); | 1339 | audit_free(p); |
| 1286 | bad_fork_cleanup_policy: | 1340 | bad_fork_cleanup_policy: |
| 1287 | perf_counter_free_task(p); | 1341 | perf_event_free_task(p); |
| 1288 | #ifdef CONFIG_NUMA | 1342 | #ifdef CONFIG_NUMA |
| 1289 | mpol_put(p->mempolicy); | 1343 | mpol_put(p->mempolicy); |
| 1290 | bad_fork_cleanup_cgroup: | 1344 | bad_fork_cleanup_cgroup: |
| 1291 | #endif | 1345 | #endif |
| 1292 | cgroup_exit(p, cgroup_callbacks_done); | 1346 | cgroup_exit(p, cgroup_callbacks_done); |
| 1293 | delayacct_tsk_free(p); | 1347 | delayacct_tsk_free(p); |
| 1294 | if (p->binfmt) | ||
| 1295 | module_put(p->binfmt->module); | ||
| 1296 | bad_fork_cleanup_put_domain: | ||
| 1297 | module_put(task_thread_info(p)->exec_domain->module); | 1348 | module_put(task_thread_info(p)->exec_domain->module); |
| 1298 | bad_fork_cleanup_count: | 1349 | bad_fork_cleanup_count: |
| 1299 | atomic_dec(&p->cred->user->processes); | 1350 | atomic_dec(&p->cred->user->processes); |
| 1300 | put_cred(p->real_cred); | 1351 | exit_creds(p); |
| 1301 | put_cred(p->cred); | ||
| 1302 | bad_fork_free: | 1352 | bad_fork_free: |
| 1303 | free_task(p); | 1353 | free_task(p); |
| 1304 | fork_out: | 1354 | fork_out: |
