aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c277
1 files changed, 158 insertions, 119 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 47c15840a381..1415dc4598ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/mnt_namespace.h>
21#include <linux/personality.h> 20#include <linux/personality.h>
22#include <linux/mempolicy.h> 21#include <linux/mempolicy.h>
23#include <linux/sem.h> 22#include <linux/sem.h>
@@ -50,6 +49,7 @@
50#include <linux/ftrace.h> 49#include <linux/ftrace.h>
51#include <linux/profile.h> 50#include <linux/profile.h>
52#include <linux/rmap.h> 51#include <linux/rmap.h>
52#include <linux/ksm.h>
53#include <linux/acct.h> 53#include <linux/acct.h>
54#include <linux/tsacct_kern.h> 54#include <linux/tsacct_kern.h>
55#include <linux/cn_proc.h> 55#include <linux/cn_proc.h>
@@ -60,8 +60,11 @@
60#include <linux/tty.h> 60#include <linux/tty.h>
61#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
62#include <linux/blkdev.h> 62#include <linux/blkdev.h>
63#include <trace/sched.h> 63#include <linux/fs_struct.h>
64#include <linux/magic.h> 64#include <linux/magic.h>
65#include <linux/perf_event.h>
66#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h>
65 68
66#include <asm/pgtable.h> 69#include <asm/pgtable.h>
67#include <asm/pgalloc.h> 70#include <asm/pgalloc.h>
@@ -70,6 +73,8 @@
70#include <asm/cacheflush.h> 73#include <asm/cacheflush.h>
71#include <asm/tlbflush.h> 74#include <asm/tlbflush.h>
72 75
76#include <trace/events/sched.h>
77
73/* 78/*
74 * Protected counters by write_lock_irq(&tasklist_lock) 79 * Protected counters by write_lock_irq(&tasklist_lock)
75 */ 80 */
@@ -82,14 +87,12 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
82 87
83__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
84 89
85DEFINE_TRACE(sched_process_fork);
86
87int nr_processes(void) 90int nr_processes(void)
88{ 91{
89 int cpu; 92 int cpu;
90 int total = 0; 93 int total = 0;
91 94
92 for_each_online_cpu(cpu) 95 for_each_possible_cpu(cpu)
93 total += per_cpu(process_counts, cpu); 96 total += per_cpu(process_counts, cpu);
94 97
95 return total; 98 return total;
@@ -136,9 +139,17 @@ struct kmem_cache *vm_area_cachep;
136/* SLAB cache for mm_struct structures (tsk->mm) */ 139/* SLAB cache for mm_struct structures (tsk->mm) */
137static struct kmem_cache *mm_cachep; 140static struct kmem_cache *mm_cachep;
138 141
142static void account_kernel_stack(struct thread_info *ti, int account)
143{
144 struct zone *zone = page_zone(virt_to_page(ti));
145
146 mod_zone_page_state(zone, NR_KERNEL_STACK, account);
147}
148
139void free_task(struct task_struct *tsk) 149void free_task(struct task_struct *tsk)
140{ 150{
141 prop_local_destroy_single(&tsk->dirties); 151 prop_local_destroy_single(&tsk->dirties);
152 account_kernel_stack(tsk->stack, -1);
142 free_thread_info(tsk->stack); 153 free_thread_info(tsk->stack);
143 rt_mutex_debug_task_free(tsk); 154 rt_mutex_debug_task_free(tsk);
144 ftrace_graph_exit_task(tsk); 155 ftrace_graph_exit_task(tsk);
@@ -152,8 +163,7 @@ void __put_task_struct(struct task_struct *tsk)
152 WARN_ON(atomic_read(&tsk->usage)); 163 WARN_ON(atomic_read(&tsk->usage));
153 WARN_ON(tsk == current); 164 WARN_ON(tsk == current);
154 165
155 put_cred(tsk->real_cred); 166 exit_creds(tsk);
156 put_cred(tsk->cred);
157 delayacct_tsk_free(tsk); 167 delayacct_tsk_free(tsk);
158 168
159 if (!profile_handoff_task(tsk)) 169 if (!profile_handoff_task(tsk))
@@ -177,7 +187,7 @@ void __init fork_init(unsigned long mempages)
177 /* create a slab on which task_structs can be allocated */ 187 /* create a slab on which task_structs can be allocated */
178 task_struct_cachep = 188 task_struct_cachep =
179 kmem_cache_create("task_struct", sizeof(struct task_struct), 189 kmem_cache_create("task_struct", sizeof(struct task_struct),
180 ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); 190 ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
181#endif 191#endif
182 192
183 /* do the arch specific task caches init */ 193 /* do the arch specific task caches init */
@@ -240,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
240 goto out; 250 goto out;
241 251
242 setup_thread_stack(tsk, orig); 252 setup_thread_stack(tsk, orig);
253 clear_user_return_notifier(tsk);
243 stackend = end_of_stack(tsk); 254 stackend = end_of_stack(tsk);
244 *stackend = STACK_END_MAGIC; /* for overflow detection */ 255 *stackend = STACK_END_MAGIC; /* for overflow detection */
245 256
@@ -254,6 +265,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
254 tsk->btrace_seq = 0; 265 tsk->btrace_seq = 0;
255#endif 266#endif
256 tsk->splice_pipe = NULL; 267 tsk->splice_pipe = NULL;
268
269 account_kernel_stack(ti, 1);
270
257 return tsk; 271 return tsk;
258 272
259out: 273out:
@@ -289,6 +303,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
289 rb_link = &mm->mm_rb.rb_node; 303 rb_link = &mm->mm_rb.rb_node;
290 rb_parent = NULL; 304 rb_parent = NULL;
291 pprev = &mm->mmap; 305 pprev = &mm->mmap;
306 retval = ksm_fork(mm, oldmm);
307 if (retval)
308 goto out;
292 309
293 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 310 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
294 struct file *file; 311 struct file *file;
@@ -419,22 +436,30 @@ __setup("coredump_filter=", coredump_filter_setup);
419 436
420#include <linux/init_task.h> 437#include <linux/init_task.h>
421 438
439static void mm_init_aio(struct mm_struct *mm)
440{
441#ifdef CONFIG_AIO
442 spin_lock_init(&mm->ioctx_lock);
443 INIT_HLIST_HEAD(&mm->ioctx_list);
444#endif
445}
446
422static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 447static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
423{ 448{
424 atomic_set(&mm->mm_users, 1); 449 atomic_set(&mm->mm_users, 1);
425 atomic_set(&mm->mm_count, 1); 450 atomic_set(&mm->mm_count, 1);
426 init_rwsem(&mm->mmap_sem); 451 init_rwsem(&mm->mmap_sem);
427 INIT_LIST_HEAD(&mm->mmlist); 452 INIT_LIST_HEAD(&mm->mmlist);
428 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; 453 mm->flags = (current->mm) ?
454 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
429 mm->core_state = NULL; 455 mm->core_state = NULL;
430 mm->nr_ptes = 0; 456 mm->nr_ptes = 0;
431 set_mm_counter(mm, file_rss, 0); 457 set_mm_counter(mm, file_rss, 0);
432 set_mm_counter(mm, anon_rss, 0); 458 set_mm_counter(mm, anon_rss, 0);
433 spin_lock_init(&mm->page_table_lock); 459 spin_lock_init(&mm->page_table_lock);
434 spin_lock_init(&mm->ioctx_lock);
435 INIT_HLIST_HEAD(&mm->ioctx_list);
436 mm->free_area_cache = TASK_UNMAPPED_BASE; 460 mm->free_area_cache = TASK_UNMAPPED_BASE;
437 mm->cached_hole_size = ~0UL; 461 mm->cached_hole_size = ~0UL;
462 mm_init_aio(mm);
438 mm_init_owner(mm, p); 463 mm_init_owner(mm, p);
439 464
440 if (likely(!mm_alloc_pgd(mm))) { 465 if (likely(!mm_alloc_pgd(mm))) {
@@ -486,6 +511,7 @@ void mmput(struct mm_struct *mm)
486 511
487 if (atomic_dec_and_test(&mm->mm_users)) { 512 if (atomic_dec_and_test(&mm->mm_users)) {
488 exit_aio(mm); 513 exit_aio(mm);
514 ksm_exit(mm);
489 exit_mmap(mm); 515 exit_mmap(mm);
490 set_mm_exe_file(mm, NULL); 516 set_mm_exe_file(mm, NULL);
491 if (!list_empty(&mm->mmlist)) { 517 if (!list_empty(&mm->mmlist)) {
@@ -494,6 +520,8 @@ void mmput(struct mm_struct *mm)
494 spin_unlock(&mmlist_lock); 520 spin_unlock(&mmlist_lock);
495 } 521 }
496 put_swap_token(mm); 522 put_swap_token(mm);
523 if (mm->binfmt)
524 module_put(mm->binfmt->module);
497 mmdrop(mm); 525 mmdrop(mm);
498 } 526 }
499} 527}
@@ -544,12 +572,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
544 572
545 /* Get rid of any futexes when releasing the mm */ 573 /* Get rid of any futexes when releasing the mm */
546#ifdef CONFIG_FUTEX 574#ifdef CONFIG_FUTEX
547 if (unlikely(tsk->robust_list)) 575 if (unlikely(tsk->robust_list)) {
548 exit_robust_list(tsk); 576 exit_robust_list(tsk);
577 tsk->robust_list = NULL;
578 }
549#ifdef CONFIG_COMPAT 579#ifdef CONFIG_COMPAT
550 if (unlikely(tsk->compat_robust_list)) 580 if (unlikely(tsk->compat_robust_list)) {
551 compat_exit_robust_list(tsk); 581 compat_exit_robust_list(tsk);
582 tsk->compat_robust_list = NULL;
583 }
552#endif 584#endif
585 if (unlikely(!list_empty(&tsk->pi_state_list)))
586 exit_pi_state_list(tsk);
553#endif 587#endif
554 588
555 /* Get rid of any cached register state */ 589 /* Get rid of any cached register state */
@@ -567,18 +601,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
567 * the value intact in a core dump, and to save the unnecessary 601 * the value intact in a core dump, and to save the unnecessary
568 * trouble otherwise. Userland only wants this done for a sys_exit. 602 * trouble otherwise. Userland only wants this done for a sys_exit.
569 */ 603 */
570 if (tsk->clear_child_tid 604 if (tsk->clear_child_tid) {
571 && !(tsk->flags & PF_SIGNALED) 605 if (!(tsk->flags & PF_SIGNALED) &&
572 && atomic_read(&mm->mm_users) > 1) { 606 atomic_read(&mm->mm_users) > 1) {
573 u32 __user * tidptr = tsk->clear_child_tid; 607 /*
608 * We don't check the error code - if userspace has
609 * not set up a proper pointer then tough luck.
610 */
611 put_user(0, tsk->clear_child_tid);
612 sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
613 1, NULL, NULL, 0);
614 }
574 tsk->clear_child_tid = NULL; 615 tsk->clear_child_tid = NULL;
575
576 /*
577 * We don't check the error code - if userspace has
578 * not set up a proper pointer then tough luck.
579 */
580 put_user(0, tidptr);
581 sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
582 } 616 }
583} 617}
584 618
@@ -619,9 +653,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
619 mm->hiwater_rss = get_mm_rss(mm); 653 mm->hiwater_rss = get_mm_rss(mm);
620 mm->hiwater_vm = mm->total_vm; 654 mm->hiwater_vm = mm->total_vm;
621 655
656 if (mm->binfmt && !try_module_get(mm->binfmt->module))
657 goto free_pt;
658
622 return mm; 659 return mm;
623 660
624free_pt: 661free_pt:
662 /* don't put binfmt in mmput, we haven't got module yet */
663 mm->binfmt = NULL;
625 mmput(mm); 664 mmput(mm);
626 665
627fail_nomem: 666fail_nomem:
@@ -644,6 +683,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
644 683
645 tsk->min_flt = tsk->maj_flt = 0; 684 tsk->min_flt = tsk->maj_flt = 0;
646 tsk->nvcsw = tsk->nivcsw = 0; 685 tsk->nvcsw = tsk->nivcsw = 0;
686#ifdef CONFIG_DETECT_HUNG_TASK
687 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
688#endif
647 689
648 tsk->mm = NULL; 690 tsk->mm = NULL;
649 tsk->active_mm = NULL; 691 tsk->active_mm = NULL;
@@ -681,38 +723,21 @@ fail_nomem:
681 return retval; 723 return retval;
682} 724}
683 725
684static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
685{
686 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
687 /* We don't need to lock fs - think why ;-) */
688 if (fs) {
689 atomic_set(&fs->count, 1);
690 rwlock_init(&fs->lock);
691 fs->umask = old->umask;
692 read_lock(&old->lock);
693 fs->root = old->root;
694 path_get(&old->root);
695 fs->pwd = old->pwd;
696 path_get(&old->pwd);
697 read_unlock(&old->lock);
698 }
699 return fs;
700}
701
702struct fs_struct *copy_fs_struct(struct fs_struct *old)
703{
704 return __copy_fs_struct(old);
705}
706
707EXPORT_SYMBOL_GPL(copy_fs_struct);
708
709static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) 726static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
710{ 727{
728 struct fs_struct *fs = current->fs;
711 if (clone_flags & CLONE_FS) { 729 if (clone_flags & CLONE_FS) {
712 atomic_inc(&current->fs->count); 730 /* tsk->fs is already what we want */
731 write_lock(&fs->lock);
732 if (fs->in_exec) {
733 write_unlock(&fs->lock);
734 return -EAGAIN;
735 }
736 fs->users++;
737 write_unlock(&fs->lock);
713 return 0; 738 return 0;
714 } 739 }
715 tsk->fs = __copy_fs_struct(current->fs); 740 tsk->fs = copy_fs_struct(fs);
716 if (!tsk->fs) 741 if (!tsk->fs)
717 return -ENOMEM; 742 return -ENOMEM;
718 return 0; 743 return 0;
@@ -803,16 +828,22 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
803 thread_group_cputime_init(sig); 828 thread_group_cputime_init(sig);
804 829
805 /* Expiration times and increments. */ 830 /* Expiration times and increments. */
806 sig->it_virt_expires = cputime_zero; 831 sig->it[CPUCLOCK_PROF].expires = cputime_zero;
807 sig->it_virt_incr = cputime_zero; 832 sig->it[CPUCLOCK_PROF].incr = cputime_zero;
808 sig->it_prof_expires = cputime_zero; 833 sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
809 sig->it_prof_incr = cputime_zero; 834 sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
810 835
811 /* Cached expiration times. */ 836 /* Cached expiration times. */
812 sig->cputime_expires.prof_exp = cputime_zero; 837 sig->cputime_expires.prof_exp = cputime_zero;
813 sig->cputime_expires.virt_exp = cputime_zero; 838 sig->cputime_expires.virt_exp = cputime_zero;
814 sig->cputime_expires.sched_exp = 0; 839 sig->cputime_expires.sched_exp = 0;
815 840
841 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
842 sig->cputime_expires.prof_exp =
843 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
844 sig->cputimer.running = 1;
845 }
846
816 /* The timer lists. */ 847 /* The timer lists. */
817 INIT_LIST_HEAD(&sig->cpu_timers[0]); 848 INIT_LIST_HEAD(&sig->cpu_timers[0]);
818 INIT_LIST_HEAD(&sig->cpu_timers[1]); 849 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -823,16 +854,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
823{ 854{
824 struct signal_struct *sig; 855 struct signal_struct *sig;
825 856
826 if (clone_flags & CLONE_THREAD) { 857 if (clone_flags & CLONE_THREAD)
827 atomic_inc(&current->signal->count);
828 atomic_inc(&current->signal->live);
829 return 0; 858 return 0;
830 }
831 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
832
833 if (sig)
834 posix_cpu_timers_init_group(sig);
835 859
860 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
836 tsk->signal = sig; 861 tsk->signal = sig;
837 if (!sig) 862 if (!sig)
838 return -ENOMEM; 863 return -ENOMEM;
@@ -841,6 +866,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
841 atomic_set(&sig->live, 1); 866 atomic_set(&sig->live, 1);
842 init_waitqueue_head(&sig->wait_chldexit); 867 init_waitqueue_head(&sig->wait_chldexit);
843 sig->flags = 0; 868 sig->flags = 0;
869 if (clone_flags & CLONE_NEWPID)
870 sig->flags |= SIGNAL_UNKILLABLE;
844 sig->group_exit_code = 0; 871 sig->group_exit_code = 0;
845 sig->group_exit_task = NULL; 872 sig->group_exit_task = NULL;
846 sig->group_stop_count = 0; 873 sig->group_stop_count = 0;
@@ -859,9 +886,13 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
859 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 886 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
860 sig->gtime = cputime_zero; 887 sig->gtime = cputime_zero;
861 sig->cgtime = cputime_zero; 888 sig->cgtime = cputime_zero;
889#ifndef CONFIG_VIRT_CPU_ACCOUNTING
890 sig->prev_utime = sig->prev_stime = cputime_zero;
891#endif
862 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 892 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
863 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 893 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
864 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 894 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
895 sig->maxrss = sig->cmaxrss = 0;
865 task_io_accounting_init(&sig->ioac); 896 task_io_accounting_init(&sig->ioac);
866 sig->sum_sched_runtime = 0; 897 sig->sum_sched_runtime = 0;
867 taskstats_tgid_init(sig); 898 taskstats_tgid_init(sig);
@@ -870,10 +901,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
870 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 901 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
871 task_unlock(current->group_leader); 902 task_unlock(current->group_leader);
872 903
904 posix_cpu_timers_init_group(sig);
905
873 acct_init_pacct(&sig->pacct); 906 acct_init_pacct(&sig->pacct);
874 907
875 tty_audit_fork(sig); 908 tty_audit_fork(sig);
876 909
910 sig->oom_adj = current->signal->oom_adj;
911
877 return 0; 912 return 0;
878} 913}
879 914
@@ -884,16 +919,6 @@ void __cleanup_signal(struct signal_struct *sig)
884 kmem_cache_free(signal_cachep, sig); 919 kmem_cache_free(signal_cachep, sig);
885} 920}
886 921
887static void cleanup_signal(struct task_struct *tsk)
888{
889 struct signal_struct *sig = tsk->signal;
890
891 atomic_dec(&sig->live);
892
893 if (atomic_dec_and_test(&sig->count))
894 __cleanup_signal(sig);
895}
896
897static void copy_flags(unsigned long clone_flags, struct task_struct *p) 922static void copy_flags(unsigned long clone_flags, struct task_struct *p)
898{ 923{
899 unsigned long new_flags = p->flags; 924 unsigned long new_flags = p->flags;
@@ -979,6 +1004,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
979 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 1004 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
980 return ERR_PTR(-EINVAL); 1005 return ERR_PTR(-EINVAL);
981 1006
1007 /*
1008 * Siblings of global init remain as zombies on exit since they are
1009 * not reaped by their parent (swapper). To solve this and to avoid
1010 * multi-rooted process trees, prevent global and container-inits
1011 * from creating siblings.
1012 */
1013 if ((clone_flags & CLONE_PARENT) &&
1014 current->signal->flags & SIGNAL_UNKILLABLE)
1015 return ERR_PTR(-EINVAL);
1016
982 retval = security_task_create(clone_flags); 1017 retval = security_task_create(clone_flags);
983 if (retval) 1018 if (retval)
984 goto fork_out; 1019 goto fork_out;
@@ -988,6 +1023,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
988 if (!p) 1023 if (!p)
989 goto fork_out; 1024 goto fork_out;
990 1025
1026 ftrace_graph_init_task(p);
1027
991 rt_mutex_init_task(p); 1028 rt_mutex_init_task(p);
992 1029
993#ifdef CONFIG_PROVE_LOCKING 1030#ifdef CONFIG_PROVE_LOCKING
@@ -1018,22 +1055,15 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1018 if (!try_module_get(task_thread_info(p)->exec_domain->module)) 1055 if (!try_module_get(task_thread_info(p)->exec_domain->module))
1019 goto bad_fork_cleanup_count; 1056 goto bad_fork_cleanup_count;
1020 1057
1021 if (p->binfmt && !try_module_get(p->binfmt->module))
1022 goto bad_fork_cleanup_put_domain;
1023
1024 p->did_exec = 0; 1058 p->did_exec = 0;
1025 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1059 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1026 copy_flags(clone_flags, p); 1060 copy_flags(clone_flags, p);
1027 INIT_LIST_HEAD(&p->children); 1061 INIT_LIST_HEAD(&p->children);
1028 INIT_LIST_HEAD(&p->sibling); 1062 INIT_LIST_HEAD(&p->sibling);
1029#ifdef CONFIG_PREEMPT_RCU 1063 rcu_copy_process(p);
1030 p->rcu_read_lock_nesting = 0;
1031 p->rcu_flipctr_idx = 0;
1032#endif /* #ifdef CONFIG_PREEMPT_RCU */
1033 p->vfork_done = NULL; 1064 p->vfork_done = NULL;
1034 spin_lock_init(&p->alloc_lock); 1065 spin_lock_init(&p->alloc_lock);
1035 1066
1036 clear_tsk_thread_flag(p, TIF_SIGPENDING);
1037 init_sigpending(&p->pending); 1067 init_sigpending(&p->pending);
1038 1068
1039 p->utime = cputime_zero; 1069 p->utime = cputime_zero;
@@ -1041,16 +1071,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1041 p->gtime = cputime_zero; 1071 p->gtime = cputime_zero;
1042 p->utimescaled = cputime_zero; 1072 p->utimescaled = cputime_zero;
1043 p->stimescaled = cputime_zero; 1073 p->stimescaled = cputime_zero;
1074#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1044 p->prev_utime = cputime_zero; 1075 p->prev_utime = cputime_zero;
1045 p->prev_stime = cputime_zero; 1076 p->prev_stime = cputime_zero;
1077#endif
1046 1078
1047 p->default_timer_slack_ns = current->timer_slack_ns; 1079 p->default_timer_slack_ns = current->timer_slack_ns;
1048 1080
1049#ifdef CONFIG_DETECT_SOFTLOCKUP
1050 p->last_switch_count = 0;
1051 p->last_switch_timestamp = 0;
1052#endif
1053
1054 task_io_accounting_init(&p->ioac); 1081 task_io_accounting_init(&p->ioac);
1055 acct_clear_integrals(p); 1082 acct_clear_integrals(p);
1056 1083
@@ -1100,12 +1127,18 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1100#ifdef CONFIG_DEBUG_MUTEXES 1127#ifdef CONFIG_DEBUG_MUTEXES
1101 p->blocked_on = NULL; /* not blocked yet */ 1128 p->blocked_on = NULL; /* not blocked yet */
1102#endif 1129#endif
1103 if (unlikely(current->ptrace)) 1130
1104 ptrace_fork(p, clone_flags); 1131 p->bts = NULL;
1132
1133 p->stack_start = stack_start;
1105 1134
1106 /* Perform scheduler related setup. Assign this task to a CPU. */ 1135 /* Perform scheduler related setup. Assign this task to a CPU. */
1107 sched_fork(p, clone_flags); 1136 sched_fork(p, clone_flags);
1108 1137
1138 retval = perf_event_init_task(p);
1139 if (retval)
1140 goto bad_fork_cleanup_policy;
1141
1109 if ((retval = audit_alloc(p))) 1142 if ((retval = audit_alloc(p)))
1110 goto bad_fork_cleanup_policy; 1143 goto bad_fork_cleanup_policy;
1111 /* copy all the process information */ 1144 /* copy all the process information */
@@ -1125,7 +1158,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1125 goto bad_fork_cleanup_mm; 1158 goto bad_fork_cleanup_mm;
1126 if ((retval = copy_io(clone_flags, p))) 1159 if ((retval = copy_io(clone_flags, p)))
1127 goto bad_fork_cleanup_namespaces; 1160 goto bad_fork_cleanup_namespaces;
1128 retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); 1161 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
1129 if (retval) 1162 if (retval)
1130 goto bad_fork_cleanup_io; 1163 goto bad_fork_cleanup_io;
1131 1164
@@ -1142,8 +1175,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1142 } 1175 }
1143 } 1176 }
1144 1177
1145 ftrace_graph_init_task(p);
1146
1147 p->pid = pid_nr(pid); 1178 p->pid = pid_nr(pid);
1148 p->tgid = p->pid; 1179 p->tgid = p->pid;
1149 if (clone_flags & CLONE_THREAD) 1180 if (clone_flags & CLONE_THREAD)
@@ -1152,7 +1183,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1152 if (current->nsproxy != p->nsproxy) { 1183 if (current->nsproxy != p->nsproxy) {
1153 retval = ns_cgroup_clone(p, pid); 1184 retval = ns_cgroup_clone(p, pid);
1154 if (retval) 1185 if (retval)
1155 goto bad_fork_free_graph; 1186 goto bad_fork_free_pid;
1156 } 1187 }
1157 1188
1158 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1189 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1244,10 +1275,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1244 spin_unlock(&current->sighand->siglock); 1275 spin_unlock(&current->sighand->siglock);
1245 write_unlock_irq(&tasklist_lock); 1276 write_unlock_irq(&tasklist_lock);
1246 retval = -ERESTARTNOINTR; 1277 retval = -ERESTARTNOINTR;
1247 goto bad_fork_free_graph; 1278 goto bad_fork_free_pid;
1248 } 1279 }
1249 1280
1250 if (clone_flags & CLONE_THREAD) { 1281 if (clone_flags & CLONE_THREAD) {
1282 atomic_inc(&current->signal->count);
1283 atomic_inc(&current->signal->live);
1251 p->group_leader = current->group_leader; 1284 p->group_leader = current->group_leader;
1252 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1285 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1253 } 1286 }
@@ -1263,8 +1296,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1263 p->signal->leader_pid = pid; 1296 p->signal->leader_pid = pid;
1264 tty_kref_put(p->signal->tty); 1297 tty_kref_put(p->signal->tty);
1265 p->signal->tty = tty_kref_get(current->signal->tty); 1298 p->signal->tty = tty_kref_get(current->signal->tty);
1266 set_task_pgrp(p, task_pgrp_nr(current));
1267 set_task_session(p, task_session_nr(current));
1268 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1299 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1269 attach_pid(p, PIDTYPE_SID, task_session(current)); 1300 attach_pid(p, PIDTYPE_SID, task_session(current));
1270 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1301 list_add_tail_rcu(&p->tasks, &init_task.tasks);
@@ -1279,22 +1310,23 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1279 write_unlock_irq(&tasklist_lock); 1310 write_unlock_irq(&tasklist_lock);
1280 proc_fork_connector(p); 1311 proc_fork_connector(p);
1281 cgroup_post_fork(p); 1312 cgroup_post_fork(p);
1313 perf_event_fork(p);
1282 return p; 1314 return p;
1283 1315
1284bad_fork_free_graph:
1285 ftrace_graph_exit_task(p);
1286bad_fork_free_pid: 1316bad_fork_free_pid:
1287 if (pid != &init_struct_pid) 1317 if (pid != &init_struct_pid)
1288 free_pid(pid); 1318 free_pid(pid);
1289bad_fork_cleanup_io: 1319bad_fork_cleanup_io:
1290 put_io_context(p->io_context); 1320 if (p->io_context)
1321 exit_io_context(p);
1291bad_fork_cleanup_namespaces: 1322bad_fork_cleanup_namespaces:
1292 exit_task_namespaces(p); 1323 exit_task_namespaces(p);
1293bad_fork_cleanup_mm: 1324bad_fork_cleanup_mm:
1294 if (p->mm) 1325 if (p->mm)
1295 mmput(p->mm); 1326 mmput(p->mm);
1296bad_fork_cleanup_signal: 1327bad_fork_cleanup_signal:
1297 cleanup_signal(p); 1328 if (!(clone_flags & CLONE_THREAD))
1329 __cleanup_signal(p->signal);
1298bad_fork_cleanup_sighand: 1330bad_fork_cleanup_sighand:
1299 __cleanup_sighand(p->sighand); 1331 __cleanup_sighand(p->sighand);
1300bad_fork_cleanup_fs: 1332bad_fork_cleanup_fs:
@@ -1306,20 +1338,17 @@ bad_fork_cleanup_semundo:
1306bad_fork_cleanup_audit: 1338bad_fork_cleanup_audit:
1307 audit_free(p); 1339 audit_free(p);
1308bad_fork_cleanup_policy: 1340bad_fork_cleanup_policy:
1341 perf_event_free_task(p);
1309#ifdef CONFIG_NUMA 1342#ifdef CONFIG_NUMA
1310 mpol_put(p->mempolicy); 1343 mpol_put(p->mempolicy);
1311bad_fork_cleanup_cgroup: 1344bad_fork_cleanup_cgroup:
1312#endif 1345#endif
1313 cgroup_exit(p, cgroup_callbacks_done); 1346 cgroup_exit(p, cgroup_callbacks_done);
1314 delayacct_tsk_free(p); 1347 delayacct_tsk_free(p);
1315 if (p->binfmt)
1316 module_put(p->binfmt->module);
1317bad_fork_cleanup_put_domain:
1318 module_put(task_thread_info(p)->exec_domain->module); 1348 module_put(task_thread_info(p)->exec_domain->module);
1319bad_fork_cleanup_count: 1349bad_fork_cleanup_count:
1320 atomic_dec(&p->cred->user->processes); 1350 atomic_dec(&p->cred->user->processes);
1321 put_cred(p->real_cred); 1351 exit_creds(p);
1322 put_cred(p->cred);
1323bad_fork_free: 1352bad_fork_free:
1324 free_task(p); 1353 free_task(p);
1325fork_out: 1354fork_out:
@@ -1422,7 +1451,7 @@ long do_fork(unsigned long clone_flags,
1422 } 1451 }
1423 1452
1424 audit_finish_fork(p); 1453 audit_finish_fork(p);
1425 tracehook_report_clone(trace, regs, clone_flags, nr, p); 1454 tracehook_report_clone(regs, clone_flags, nr, p);
1426 1455
1427 /* 1456 /*
1428 * We set PF_STARTING at creation in case tracing wants to 1457 * We set PF_STARTING at creation in case tracing wants to
@@ -1474,20 +1503,21 @@ void __init proc_caches_init(void)
1474{ 1503{
1475 sighand_cachep = kmem_cache_create("sighand_cache", 1504 sighand_cachep = kmem_cache_create("sighand_cache",
1476 sizeof(struct sighand_struct), 0, 1505 sizeof(struct sighand_struct), 0,
1477 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, 1506 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
1478 sighand_ctor); 1507 SLAB_NOTRACK, sighand_ctor);
1479 signal_cachep = kmem_cache_create("signal_cache", 1508 signal_cachep = kmem_cache_create("signal_cache",
1480 sizeof(struct signal_struct), 0, 1509 sizeof(struct signal_struct), 0,
1481 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1510 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1482 files_cachep = kmem_cache_create("files_cache", 1511 files_cachep = kmem_cache_create("files_cache",
1483 sizeof(struct files_struct), 0, 1512 sizeof(struct files_struct), 0,
1484 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1513 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1485 fs_cachep = kmem_cache_create("fs_cache", 1514 fs_cachep = kmem_cache_create("fs_cache",
1486 sizeof(struct fs_struct), 0, 1515 sizeof(struct fs_struct), 0,
1487 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1516 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1488 mm_cachep = kmem_cache_create("mm_struct", 1517 mm_cachep = kmem_cache_create("mm_struct",
1489 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1518 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1490 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1519 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1520 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
1491 mmap_init(); 1521 mmap_init();
1492} 1522}
1493 1523
@@ -1543,12 +1573,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1543{ 1573{
1544 struct fs_struct *fs = current->fs; 1574 struct fs_struct *fs = current->fs;
1545 1575
1546 if ((unshare_flags & CLONE_FS) && 1576 if (!(unshare_flags & CLONE_FS) || !fs)
1547 (fs && atomic_read(&fs->count) > 1)) { 1577 return 0;
1548 *new_fsp = __copy_fs_struct(current->fs); 1578
1549 if (!*new_fsp) 1579 /* don't need lock here; in the worst case we'll do useless copy */
1550 return -ENOMEM; 1580 if (fs->users == 1)
1551 } 1581 return 0;
1582
1583 *new_fsp = copy_fs_struct(fs);
1584 if (!*new_fsp)
1585 return -ENOMEM;
1552 1586
1553 return 0; 1587 return 0;
1554} 1588}
@@ -1664,8 +1698,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1664 1698
1665 if (new_fs) { 1699 if (new_fs) {
1666 fs = current->fs; 1700 fs = current->fs;
1701 write_lock(&fs->lock);
1667 current->fs = new_fs; 1702 current->fs = new_fs;
1668 new_fs = fs; 1703 if (--fs->users)
1704 new_fs = NULL;
1705 else
1706 new_fs = fs;
1707 write_unlock(&fs->lock);
1669 } 1708 }
1670 1709
1671 if (new_mm) { 1710 if (new_mm) {
@@ -1704,7 +1743,7 @@ bad_unshare_cleanup_sigh:
1704 1743
1705bad_unshare_cleanup_fs: 1744bad_unshare_cleanup_fs:
1706 if (new_fs) 1745 if (new_fs)
1707 put_fs_struct(new_fs); 1746 free_fs_struct(new_fs);
1708 1747
1709bad_unshare_cleanup_thread: 1748bad_unshare_cleanup_thread:
1710bad_unshare_out: 1749bad_unshare_out: