diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 277 |
1 files changed, 158 insertions, 119 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 47c15840a381..1415dc4598ae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/vmalloc.h> | 18 | #include <linux/vmalloc.h> |
19 | #include <linux/completion.h> | 19 | #include <linux/completion.h> |
20 | #include <linux/mnt_namespace.h> | ||
21 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
22 | #include <linux/mempolicy.h> | 21 | #include <linux/mempolicy.h> |
23 | #include <linux/sem.h> | 22 | #include <linux/sem.h> |
@@ -50,6 +49,7 @@ | |||
50 | #include <linux/ftrace.h> | 49 | #include <linux/ftrace.h> |
51 | #include <linux/profile.h> | 50 | #include <linux/profile.h> |
52 | #include <linux/rmap.h> | 51 | #include <linux/rmap.h> |
52 | #include <linux/ksm.h> | ||
53 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
54 | #include <linux/tsacct_kern.h> | 54 | #include <linux/tsacct_kern.h> |
55 | #include <linux/cn_proc.h> | 55 | #include <linux/cn_proc.h> |
@@ -60,8 +60,11 @@ | |||
60 | #include <linux/tty.h> | 60 | #include <linux/tty.h> |
61 | #include <linux/proc_fs.h> | 61 | #include <linux/proc_fs.h> |
62 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
63 | #include <trace/sched.h> | 63 | #include <linux/fs_struct.h> |
64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
65 | #include <linux/perf_event.h> | ||
66 | #include <linux/posix-timers.h> | ||
67 | #include <linux/user-return-notifier.h> | ||
65 | 68 | ||
66 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
67 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
@@ -70,6 +73,8 @@ | |||
70 | #include <asm/cacheflush.h> | 73 | #include <asm/cacheflush.h> |
71 | #include <asm/tlbflush.h> | 74 | #include <asm/tlbflush.h> |
72 | 75 | ||
76 | #include <trace/events/sched.h> | ||
77 | |||
73 | /* | 78 | /* |
74 | * Protected counters by write_lock_irq(&tasklist_lock) | 79 | * Protected counters by write_lock_irq(&tasklist_lock) |
75 | */ | 80 | */ |
@@ -82,14 +87,12 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
82 | 87 | ||
83 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
84 | 89 | ||
85 | DEFINE_TRACE(sched_process_fork); | ||
86 | |||
87 | int nr_processes(void) | 90 | int nr_processes(void) |
88 | { | 91 | { |
89 | int cpu; | 92 | int cpu; |
90 | int total = 0; | 93 | int total = 0; |
91 | 94 | ||
92 | for_each_online_cpu(cpu) | 95 | for_each_possible_cpu(cpu) |
93 | total += per_cpu(process_counts, cpu); | 96 | total += per_cpu(process_counts, cpu); |
94 | 97 | ||
95 | return total; | 98 | return total; |
@@ -136,9 +139,17 @@ struct kmem_cache *vm_area_cachep; | |||
136 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 139 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
137 | static struct kmem_cache *mm_cachep; | 140 | static struct kmem_cache *mm_cachep; |
138 | 141 | ||
142 | static void account_kernel_stack(struct thread_info *ti, int account) | ||
143 | { | ||
144 | struct zone *zone = page_zone(virt_to_page(ti)); | ||
145 | |||
146 | mod_zone_page_state(zone, NR_KERNEL_STACK, account); | ||
147 | } | ||
148 | |||
139 | void free_task(struct task_struct *tsk) | 149 | void free_task(struct task_struct *tsk) |
140 | { | 150 | { |
141 | prop_local_destroy_single(&tsk->dirties); | 151 | prop_local_destroy_single(&tsk->dirties); |
152 | account_kernel_stack(tsk->stack, -1); | ||
142 | free_thread_info(tsk->stack); | 153 | free_thread_info(tsk->stack); |
143 | rt_mutex_debug_task_free(tsk); | 154 | rt_mutex_debug_task_free(tsk); |
144 | ftrace_graph_exit_task(tsk); | 155 | ftrace_graph_exit_task(tsk); |
@@ -152,8 +163,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
152 | WARN_ON(atomic_read(&tsk->usage)); | 163 | WARN_ON(atomic_read(&tsk->usage)); |
153 | WARN_ON(tsk == current); | 164 | WARN_ON(tsk == current); |
154 | 165 | ||
155 | put_cred(tsk->real_cred); | 166 | exit_creds(tsk); |
156 | put_cred(tsk->cred); | ||
157 | delayacct_tsk_free(tsk); | 167 | delayacct_tsk_free(tsk); |
158 | 168 | ||
159 | if (!profile_handoff_task(tsk)) | 169 | if (!profile_handoff_task(tsk)) |
@@ -177,7 +187,7 @@ void __init fork_init(unsigned long mempages) | |||
177 | /* create a slab on which task_structs can be allocated */ | 187 | /* create a slab on which task_structs can be allocated */ |
178 | task_struct_cachep = | 188 | task_struct_cachep = |
179 | kmem_cache_create("task_struct", sizeof(struct task_struct), | 189 | kmem_cache_create("task_struct", sizeof(struct task_struct), |
180 | ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); | 190 | ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); |
181 | #endif | 191 | #endif |
182 | 192 | ||
183 | /* do the arch specific task caches init */ | 193 | /* do the arch specific task caches init */ |
@@ -240,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
240 | goto out; | 250 | goto out; |
241 | 251 | ||
242 | setup_thread_stack(tsk, orig); | 252 | setup_thread_stack(tsk, orig); |
253 | clear_user_return_notifier(tsk); | ||
243 | stackend = end_of_stack(tsk); | 254 | stackend = end_of_stack(tsk); |
244 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 255 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
245 | 256 | ||
@@ -254,6 +265,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
254 | tsk->btrace_seq = 0; | 265 | tsk->btrace_seq = 0; |
255 | #endif | 266 | #endif |
256 | tsk->splice_pipe = NULL; | 267 | tsk->splice_pipe = NULL; |
268 | |||
269 | account_kernel_stack(ti, 1); | ||
270 | |||
257 | return tsk; | 271 | return tsk; |
258 | 272 | ||
259 | out: | 273 | out: |
@@ -289,6 +303,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
289 | rb_link = &mm->mm_rb.rb_node; | 303 | rb_link = &mm->mm_rb.rb_node; |
290 | rb_parent = NULL; | 304 | rb_parent = NULL; |
291 | pprev = &mm->mmap; | 305 | pprev = &mm->mmap; |
306 | retval = ksm_fork(mm, oldmm); | ||
307 | if (retval) | ||
308 | goto out; | ||
292 | 309 | ||
293 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 310 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
294 | struct file *file; | 311 | struct file *file; |
@@ -419,22 +436,30 @@ __setup("coredump_filter=", coredump_filter_setup); | |||
419 | 436 | ||
420 | #include <linux/init_task.h> | 437 | #include <linux/init_task.h> |
421 | 438 | ||
439 | static void mm_init_aio(struct mm_struct *mm) | ||
440 | { | ||
441 | #ifdef CONFIG_AIO | ||
442 | spin_lock_init(&mm->ioctx_lock); | ||
443 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
444 | #endif | ||
445 | } | ||
446 | |||
422 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 447 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
423 | { | 448 | { |
424 | atomic_set(&mm->mm_users, 1); | 449 | atomic_set(&mm->mm_users, 1); |
425 | atomic_set(&mm->mm_count, 1); | 450 | atomic_set(&mm->mm_count, 1); |
426 | init_rwsem(&mm->mmap_sem); | 451 | init_rwsem(&mm->mmap_sem); |
427 | INIT_LIST_HEAD(&mm->mmlist); | 452 | INIT_LIST_HEAD(&mm->mmlist); |
428 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; | 453 | mm->flags = (current->mm) ? |
454 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | ||
429 | mm->core_state = NULL; | 455 | mm->core_state = NULL; |
430 | mm->nr_ptes = 0; | 456 | mm->nr_ptes = 0; |
431 | set_mm_counter(mm, file_rss, 0); | 457 | set_mm_counter(mm, file_rss, 0); |
432 | set_mm_counter(mm, anon_rss, 0); | 458 | set_mm_counter(mm, anon_rss, 0); |
433 | spin_lock_init(&mm->page_table_lock); | 459 | spin_lock_init(&mm->page_table_lock); |
434 | spin_lock_init(&mm->ioctx_lock); | ||
435 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
436 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 460 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
437 | mm->cached_hole_size = ~0UL; | 461 | mm->cached_hole_size = ~0UL; |
462 | mm_init_aio(mm); | ||
438 | mm_init_owner(mm, p); | 463 | mm_init_owner(mm, p); |
439 | 464 | ||
440 | if (likely(!mm_alloc_pgd(mm))) { | 465 | if (likely(!mm_alloc_pgd(mm))) { |
@@ -486,6 +511,7 @@ void mmput(struct mm_struct *mm) | |||
486 | 511 | ||
487 | if (atomic_dec_and_test(&mm->mm_users)) { | 512 | if (atomic_dec_and_test(&mm->mm_users)) { |
488 | exit_aio(mm); | 513 | exit_aio(mm); |
514 | ksm_exit(mm); | ||
489 | exit_mmap(mm); | 515 | exit_mmap(mm); |
490 | set_mm_exe_file(mm, NULL); | 516 | set_mm_exe_file(mm, NULL); |
491 | if (!list_empty(&mm->mmlist)) { | 517 | if (!list_empty(&mm->mmlist)) { |
@@ -494,6 +520,8 @@ void mmput(struct mm_struct *mm) | |||
494 | spin_unlock(&mmlist_lock); | 520 | spin_unlock(&mmlist_lock); |
495 | } | 521 | } |
496 | put_swap_token(mm); | 522 | put_swap_token(mm); |
523 | if (mm->binfmt) | ||
524 | module_put(mm->binfmt->module); | ||
497 | mmdrop(mm); | 525 | mmdrop(mm); |
498 | } | 526 | } |
499 | } | 527 | } |
@@ -544,12 +572,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
544 | 572 | ||
545 | /* Get rid of any futexes when releasing the mm */ | 573 | /* Get rid of any futexes when releasing the mm */ |
546 | #ifdef CONFIG_FUTEX | 574 | #ifdef CONFIG_FUTEX |
547 | if (unlikely(tsk->robust_list)) | 575 | if (unlikely(tsk->robust_list)) { |
548 | exit_robust_list(tsk); | 576 | exit_robust_list(tsk); |
577 | tsk->robust_list = NULL; | ||
578 | } | ||
549 | #ifdef CONFIG_COMPAT | 579 | #ifdef CONFIG_COMPAT |
550 | if (unlikely(tsk->compat_robust_list)) | 580 | if (unlikely(tsk->compat_robust_list)) { |
551 | compat_exit_robust_list(tsk); | 581 | compat_exit_robust_list(tsk); |
582 | tsk->compat_robust_list = NULL; | ||
583 | } | ||
552 | #endif | 584 | #endif |
585 | if (unlikely(!list_empty(&tsk->pi_state_list))) | ||
586 | exit_pi_state_list(tsk); | ||
553 | #endif | 587 | #endif |
554 | 588 | ||
555 | /* Get rid of any cached register state */ | 589 | /* Get rid of any cached register state */ |
@@ -567,18 +601,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
567 | * the value intact in a core dump, and to save the unnecessary | 601 | * the value intact in a core dump, and to save the unnecessary |
568 | * trouble otherwise. Userland only wants this done for a sys_exit. | 602 | * trouble otherwise. Userland only wants this done for a sys_exit. |
569 | */ | 603 | */ |
570 | if (tsk->clear_child_tid | 604 | if (tsk->clear_child_tid) { |
571 | && !(tsk->flags & PF_SIGNALED) | 605 | if (!(tsk->flags & PF_SIGNALED) && |
572 | && atomic_read(&mm->mm_users) > 1) { | 606 | atomic_read(&mm->mm_users) > 1) { |
573 | u32 __user * tidptr = tsk->clear_child_tid; | 607 | /* |
608 | * We don't check the error code - if userspace has | ||
609 | * not set up a proper pointer then tough luck. | ||
610 | */ | ||
611 | put_user(0, tsk->clear_child_tid); | ||
612 | sys_futex(tsk->clear_child_tid, FUTEX_WAKE, | ||
613 | 1, NULL, NULL, 0); | ||
614 | } | ||
574 | tsk->clear_child_tid = NULL; | 615 | tsk->clear_child_tid = NULL; |
575 | |||
576 | /* | ||
577 | * We don't check the error code - if userspace has | ||
578 | * not set up a proper pointer then tough luck. | ||
579 | */ | ||
580 | put_user(0, tidptr); | ||
581 | sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); | ||
582 | } | 616 | } |
583 | } | 617 | } |
584 | 618 | ||
@@ -619,9 +653,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
619 | mm->hiwater_rss = get_mm_rss(mm); | 653 | mm->hiwater_rss = get_mm_rss(mm); |
620 | mm->hiwater_vm = mm->total_vm; | 654 | mm->hiwater_vm = mm->total_vm; |
621 | 655 | ||
656 | if (mm->binfmt && !try_module_get(mm->binfmt->module)) | ||
657 | goto free_pt; | ||
658 | |||
622 | return mm; | 659 | return mm; |
623 | 660 | ||
624 | free_pt: | 661 | free_pt: |
662 | /* don't put binfmt in mmput, we haven't got module yet */ | ||
663 | mm->binfmt = NULL; | ||
625 | mmput(mm); | 664 | mmput(mm); |
626 | 665 | ||
627 | fail_nomem: | 666 | fail_nomem: |
@@ -644,6 +683,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
644 | 683 | ||
645 | tsk->min_flt = tsk->maj_flt = 0; | 684 | tsk->min_flt = tsk->maj_flt = 0; |
646 | tsk->nvcsw = tsk->nivcsw = 0; | 685 | tsk->nvcsw = tsk->nivcsw = 0; |
686 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
687 | tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; | ||
688 | #endif | ||
647 | 689 | ||
648 | tsk->mm = NULL; | 690 | tsk->mm = NULL; |
649 | tsk->active_mm = NULL; | 691 | tsk->active_mm = NULL; |
@@ -681,38 +723,21 @@ fail_nomem: | |||
681 | return retval; | 723 | return retval; |
682 | } | 724 | } |
683 | 725 | ||
684 | static struct fs_struct *__copy_fs_struct(struct fs_struct *old) | ||
685 | { | ||
686 | struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); | ||
687 | /* We don't need to lock fs - think why ;-) */ | ||
688 | if (fs) { | ||
689 | atomic_set(&fs->count, 1); | ||
690 | rwlock_init(&fs->lock); | ||
691 | fs->umask = old->umask; | ||
692 | read_lock(&old->lock); | ||
693 | fs->root = old->root; | ||
694 | path_get(&old->root); | ||
695 | fs->pwd = old->pwd; | ||
696 | path_get(&old->pwd); | ||
697 | read_unlock(&old->lock); | ||
698 | } | ||
699 | return fs; | ||
700 | } | ||
701 | |||
702 | struct fs_struct *copy_fs_struct(struct fs_struct *old) | ||
703 | { | ||
704 | return __copy_fs_struct(old); | ||
705 | } | ||
706 | |||
707 | EXPORT_SYMBOL_GPL(copy_fs_struct); | ||
708 | |||
709 | static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) | 726 | static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) |
710 | { | 727 | { |
728 | struct fs_struct *fs = current->fs; | ||
711 | if (clone_flags & CLONE_FS) { | 729 | if (clone_flags & CLONE_FS) { |
712 | atomic_inc(¤t->fs->count); | 730 | /* tsk->fs is already what we want */ |
731 | write_lock(&fs->lock); | ||
732 | if (fs->in_exec) { | ||
733 | write_unlock(&fs->lock); | ||
734 | return -EAGAIN; | ||
735 | } | ||
736 | fs->users++; | ||
737 | write_unlock(&fs->lock); | ||
713 | return 0; | 738 | return 0; |
714 | } | 739 | } |
715 | tsk->fs = __copy_fs_struct(current->fs); | 740 | tsk->fs = copy_fs_struct(fs); |
716 | if (!tsk->fs) | 741 | if (!tsk->fs) |
717 | return -ENOMEM; | 742 | return -ENOMEM; |
718 | return 0; | 743 | return 0; |
@@ -803,16 +828,22 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
803 | thread_group_cputime_init(sig); | 828 | thread_group_cputime_init(sig); |
804 | 829 | ||
805 | /* Expiration times and increments. */ | 830 | /* Expiration times and increments. */ |
806 | sig->it_virt_expires = cputime_zero; | 831 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; |
807 | sig->it_virt_incr = cputime_zero; | 832 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; |
808 | sig->it_prof_expires = cputime_zero; | 833 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; |
809 | sig->it_prof_incr = cputime_zero; | 834 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; |
810 | 835 | ||
811 | /* Cached expiration times. */ | 836 | /* Cached expiration times. */ |
812 | sig->cputime_expires.prof_exp = cputime_zero; | 837 | sig->cputime_expires.prof_exp = cputime_zero; |
813 | sig->cputime_expires.virt_exp = cputime_zero; | 838 | sig->cputime_expires.virt_exp = cputime_zero; |
814 | sig->cputime_expires.sched_exp = 0; | 839 | sig->cputime_expires.sched_exp = 0; |
815 | 840 | ||
841 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
842 | sig->cputime_expires.prof_exp = | ||
843 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
844 | sig->cputimer.running = 1; | ||
845 | } | ||
846 | |||
816 | /* The timer lists. */ | 847 | /* The timer lists. */ |
817 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 848 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
818 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 849 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
@@ -823,16 +854,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
823 | { | 854 | { |
824 | struct signal_struct *sig; | 855 | struct signal_struct *sig; |
825 | 856 | ||
826 | if (clone_flags & CLONE_THREAD) { | 857 | if (clone_flags & CLONE_THREAD) |
827 | atomic_inc(¤t->signal->count); | ||
828 | atomic_inc(¤t->signal->live); | ||
829 | return 0; | 858 | return 0; |
830 | } | ||
831 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
832 | |||
833 | if (sig) | ||
834 | posix_cpu_timers_init_group(sig); | ||
835 | 859 | ||
860 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
836 | tsk->signal = sig; | 861 | tsk->signal = sig; |
837 | if (!sig) | 862 | if (!sig) |
838 | return -ENOMEM; | 863 | return -ENOMEM; |
@@ -841,6 +866,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
841 | atomic_set(&sig->live, 1); | 866 | atomic_set(&sig->live, 1); |
842 | init_waitqueue_head(&sig->wait_chldexit); | 867 | init_waitqueue_head(&sig->wait_chldexit); |
843 | sig->flags = 0; | 868 | sig->flags = 0; |
869 | if (clone_flags & CLONE_NEWPID) | ||
870 | sig->flags |= SIGNAL_UNKILLABLE; | ||
844 | sig->group_exit_code = 0; | 871 | sig->group_exit_code = 0; |
845 | sig->group_exit_task = NULL; | 872 | sig->group_exit_task = NULL; |
846 | sig->group_stop_count = 0; | 873 | sig->group_stop_count = 0; |
@@ -859,9 +886,13 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
859 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | 886 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; |
860 | sig->gtime = cputime_zero; | 887 | sig->gtime = cputime_zero; |
861 | sig->cgtime = cputime_zero; | 888 | sig->cgtime = cputime_zero; |
889 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
890 | sig->prev_utime = sig->prev_stime = cputime_zero; | ||
891 | #endif | ||
862 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 892 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
863 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 893 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
864 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 894 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
895 | sig->maxrss = sig->cmaxrss = 0; | ||
865 | task_io_accounting_init(&sig->ioac); | 896 | task_io_accounting_init(&sig->ioac); |
866 | sig->sum_sched_runtime = 0; | 897 | sig->sum_sched_runtime = 0; |
867 | taskstats_tgid_init(sig); | 898 | taskstats_tgid_init(sig); |
@@ -870,10 +901,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
870 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 901 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
871 | task_unlock(current->group_leader); | 902 | task_unlock(current->group_leader); |
872 | 903 | ||
904 | posix_cpu_timers_init_group(sig); | ||
905 | |||
873 | acct_init_pacct(&sig->pacct); | 906 | acct_init_pacct(&sig->pacct); |
874 | 907 | ||
875 | tty_audit_fork(sig); | 908 | tty_audit_fork(sig); |
876 | 909 | ||
910 | sig->oom_adj = current->signal->oom_adj; | ||
911 | |||
877 | return 0; | 912 | return 0; |
878 | } | 913 | } |
879 | 914 | ||
@@ -884,16 +919,6 @@ void __cleanup_signal(struct signal_struct *sig) | |||
884 | kmem_cache_free(signal_cachep, sig); | 919 | kmem_cache_free(signal_cachep, sig); |
885 | } | 920 | } |
886 | 921 | ||
887 | static void cleanup_signal(struct task_struct *tsk) | ||
888 | { | ||
889 | struct signal_struct *sig = tsk->signal; | ||
890 | |||
891 | atomic_dec(&sig->live); | ||
892 | |||
893 | if (atomic_dec_and_test(&sig->count)) | ||
894 | __cleanup_signal(sig); | ||
895 | } | ||
896 | |||
897 | static void copy_flags(unsigned long clone_flags, struct task_struct *p) | 922 | static void copy_flags(unsigned long clone_flags, struct task_struct *p) |
898 | { | 923 | { |
899 | unsigned long new_flags = p->flags; | 924 | unsigned long new_flags = p->flags; |
@@ -979,6 +1004,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
979 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) | 1004 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) |
980 | return ERR_PTR(-EINVAL); | 1005 | return ERR_PTR(-EINVAL); |
981 | 1006 | ||
1007 | /* | ||
1008 | * Siblings of global init remain as zombies on exit since they are | ||
1009 | * not reaped by their parent (swapper). To solve this and to avoid | ||
1010 | * multi-rooted process trees, prevent global and container-inits | ||
1011 | * from creating siblings. | ||
1012 | */ | ||
1013 | if ((clone_flags & CLONE_PARENT) && | ||
1014 | current->signal->flags & SIGNAL_UNKILLABLE) | ||
1015 | return ERR_PTR(-EINVAL); | ||
1016 | |||
982 | retval = security_task_create(clone_flags); | 1017 | retval = security_task_create(clone_flags); |
983 | if (retval) | 1018 | if (retval) |
984 | goto fork_out; | 1019 | goto fork_out; |
@@ -988,6 +1023,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
988 | if (!p) | 1023 | if (!p) |
989 | goto fork_out; | 1024 | goto fork_out; |
990 | 1025 | ||
1026 | ftrace_graph_init_task(p); | ||
1027 | |||
991 | rt_mutex_init_task(p); | 1028 | rt_mutex_init_task(p); |
992 | 1029 | ||
993 | #ifdef CONFIG_PROVE_LOCKING | 1030 | #ifdef CONFIG_PROVE_LOCKING |
@@ -1018,22 +1055,15 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1018 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) | 1055 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) |
1019 | goto bad_fork_cleanup_count; | 1056 | goto bad_fork_cleanup_count; |
1020 | 1057 | ||
1021 | if (p->binfmt && !try_module_get(p->binfmt->module)) | ||
1022 | goto bad_fork_cleanup_put_domain; | ||
1023 | |||
1024 | p->did_exec = 0; | 1058 | p->did_exec = 0; |
1025 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1059 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1026 | copy_flags(clone_flags, p); | 1060 | copy_flags(clone_flags, p); |
1027 | INIT_LIST_HEAD(&p->children); | 1061 | INIT_LIST_HEAD(&p->children); |
1028 | INIT_LIST_HEAD(&p->sibling); | 1062 | INIT_LIST_HEAD(&p->sibling); |
1029 | #ifdef CONFIG_PREEMPT_RCU | 1063 | rcu_copy_process(p); |
1030 | p->rcu_read_lock_nesting = 0; | ||
1031 | p->rcu_flipctr_idx = 0; | ||
1032 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||
1033 | p->vfork_done = NULL; | 1064 | p->vfork_done = NULL; |
1034 | spin_lock_init(&p->alloc_lock); | 1065 | spin_lock_init(&p->alloc_lock); |
1035 | 1066 | ||
1036 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1037 | init_sigpending(&p->pending); | 1067 | init_sigpending(&p->pending); |
1038 | 1068 | ||
1039 | p->utime = cputime_zero; | 1069 | p->utime = cputime_zero; |
@@ -1041,16 +1071,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1041 | p->gtime = cputime_zero; | 1071 | p->gtime = cputime_zero; |
1042 | p->utimescaled = cputime_zero; | 1072 | p->utimescaled = cputime_zero; |
1043 | p->stimescaled = cputime_zero; | 1073 | p->stimescaled = cputime_zero; |
1074 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
1044 | p->prev_utime = cputime_zero; | 1075 | p->prev_utime = cputime_zero; |
1045 | p->prev_stime = cputime_zero; | 1076 | p->prev_stime = cputime_zero; |
1077 | #endif | ||
1046 | 1078 | ||
1047 | p->default_timer_slack_ns = current->timer_slack_ns; | 1079 | p->default_timer_slack_ns = current->timer_slack_ns; |
1048 | 1080 | ||
1049 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
1050 | p->last_switch_count = 0; | ||
1051 | p->last_switch_timestamp = 0; | ||
1052 | #endif | ||
1053 | |||
1054 | task_io_accounting_init(&p->ioac); | 1081 | task_io_accounting_init(&p->ioac); |
1055 | acct_clear_integrals(p); | 1082 | acct_clear_integrals(p); |
1056 | 1083 | ||
@@ -1100,12 +1127,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1100 | #ifdef CONFIG_DEBUG_MUTEXES | 1127 | #ifdef CONFIG_DEBUG_MUTEXES |
1101 | p->blocked_on = NULL; /* not blocked yet */ | 1128 | p->blocked_on = NULL; /* not blocked yet */ |
1102 | #endif | 1129 | #endif |
1103 | if (unlikely(current->ptrace)) | 1130 | |
1104 | ptrace_fork(p, clone_flags); | 1131 | p->bts = NULL; |
1132 | |||
1133 | p->stack_start = stack_start; | ||
1105 | 1134 | ||
1106 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1135 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1107 | sched_fork(p, clone_flags); | 1136 | sched_fork(p, clone_flags); |
1108 | 1137 | ||
1138 | retval = perf_event_init_task(p); | ||
1139 | if (retval) | ||
1140 | goto bad_fork_cleanup_policy; | ||
1141 | |||
1109 | if ((retval = audit_alloc(p))) | 1142 | if ((retval = audit_alloc(p))) |
1110 | goto bad_fork_cleanup_policy; | 1143 | goto bad_fork_cleanup_policy; |
1111 | /* copy all the process information */ | 1144 | /* copy all the process information */ |
@@ -1125,7 +1158,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1125 | goto bad_fork_cleanup_mm; | 1158 | goto bad_fork_cleanup_mm; |
1126 | if ((retval = copy_io(clone_flags, p))) | 1159 | if ((retval = copy_io(clone_flags, p))) |
1127 | goto bad_fork_cleanup_namespaces; | 1160 | goto bad_fork_cleanup_namespaces; |
1128 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | 1161 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); |
1129 | if (retval) | 1162 | if (retval) |
1130 | goto bad_fork_cleanup_io; | 1163 | goto bad_fork_cleanup_io; |
1131 | 1164 | ||
@@ -1142,8 +1175,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1142 | } | 1175 | } |
1143 | } | 1176 | } |
1144 | 1177 | ||
1145 | ftrace_graph_init_task(p); | ||
1146 | |||
1147 | p->pid = pid_nr(pid); | 1178 | p->pid = pid_nr(pid); |
1148 | p->tgid = p->pid; | 1179 | p->tgid = p->pid; |
1149 | if (clone_flags & CLONE_THREAD) | 1180 | if (clone_flags & CLONE_THREAD) |
@@ -1152,7 +1183,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1152 | if (current->nsproxy != p->nsproxy) { | 1183 | if (current->nsproxy != p->nsproxy) { |
1153 | retval = ns_cgroup_clone(p, pid); | 1184 | retval = ns_cgroup_clone(p, pid); |
1154 | if (retval) | 1185 | if (retval) |
1155 | goto bad_fork_free_graph; | 1186 | goto bad_fork_free_pid; |
1156 | } | 1187 | } |
1157 | 1188 | ||
1158 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1189 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
@@ -1244,10 +1275,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1244 | spin_unlock(¤t->sighand->siglock); | 1275 | spin_unlock(¤t->sighand->siglock); |
1245 | write_unlock_irq(&tasklist_lock); | 1276 | write_unlock_irq(&tasklist_lock); |
1246 | retval = -ERESTARTNOINTR; | 1277 | retval = -ERESTARTNOINTR; |
1247 | goto bad_fork_free_graph; | 1278 | goto bad_fork_free_pid; |
1248 | } | 1279 | } |
1249 | 1280 | ||
1250 | if (clone_flags & CLONE_THREAD) { | 1281 | if (clone_flags & CLONE_THREAD) { |
1282 | atomic_inc(¤t->signal->count); | ||
1283 | atomic_inc(¤t->signal->live); | ||
1251 | p->group_leader = current->group_leader; | 1284 | p->group_leader = current->group_leader; |
1252 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1285 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1253 | } | 1286 | } |
@@ -1263,8 +1296,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1263 | p->signal->leader_pid = pid; | 1296 | p->signal->leader_pid = pid; |
1264 | tty_kref_put(p->signal->tty); | 1297 | tty_kref_put(p->signal->tty); |
1265 | p->signal->tty = tty_kref_get(current->signal->tty); | 1298 | p->signal->tty = tty_kref_get(current->signal->tty); |
1266 | set_task_pgrp(p, task_pgrp_nr(current)); | ||
1267 | set_task_session(p, task_session_nr(current)); | ||
1268 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1299 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1269 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1300 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1270 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1301 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
@@ -1279,22 +1310,23 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1279 | write_unlock_irq(&tasklist_lock); | 1310 | write_unlock_irq(&tasklist_lock); |
1280 | proc_fork_connector(p); | 1311 | proc_fork_connector(p); |
1281 | cgroup_post_fork(p); | 1312 | cgroup_post_fork(p); |
1313 | perf_event_fork(p); | ||
1282 | return p; | 1314 | return p; |
1283 | 1315 | ||
1284 | bad_fork_free_graph: | ||
1285 | ftrace_graph_exit_task(p); | ||
1286 | bad_fork_free_pid: | 1316 | bad_fork_free_pid: |
1287 | if (pid != &init_struct_pid) | 1317 | if (pid != &init_struct_pid) |
1288 | free_pid(pid); | 1318 | free_pid(pid); |
1289 | bad_fork_cleanup_io: | 1319 | bad_fork_cleanup_io: |
1290 | put_io_context(p->io_context); | 1320 | if (p->io_context) |
1321 | exit_io_context(p); | ||
1291 | bad_fork_cleanup_namespaces: | 1322 | bad_fork_cleanup_namespaces: |
1292 | exit_task_namespaces(p); | 1323 | exit_task_namespaces(p); |
1293 | bad_fork_cleanup_mm: | 1324 | bad_fork_cleanup_mm: |
1294 | if (p->mm) | 1325 | if (p->mm) |
1295 | mmput(p->mm); | 1326 | mmput(p->mm); |
1296 | bad_fork_cleanup_signal: | 1327 | bad_fork_cleanup_signal: |
1297 | cleanup_signal(p); | 1328 | if (!(clone_flags & CLONE_THREAD)) |
1329 | __cleanup_signal(p->signal); | ||
1298 | bad_fork_cleanup_sighand: | 1330 | bad_fork_cleanup_sighand: |
1299 | __cleanup_sighand(p->sighand); | 1331 | __cleanup_sighand(p->sighand); |
1300 | bad_fork_cleanup_fs: | 1332 | bad_fork_cleanup_fs: |
@@ -1306,20 +1338,17 @@ bad_fork_cleanup_semundo: | |||
1306 | bad_fork_cleanup_audit: | 1338 | bad_fork_cleanup_audit: |
1307 | audit_free(p); | 1339 | audit_free(p); |
1308 | bad_fork_cleanup_policy: | 1340 | bad_fork_cleanup_policy: |
1341 | perf_event_free_task(p); | ||
1309 | #ifdef CONFIG_NUMA | 1342 | #ifdef CONFIG_NUMA |
1310 | mpol_put(p->mempolicy); | 1343 | mpol_put(p->mempolicy); |
1311 | bad_fork_cleanup_cgroup: | 1344 | bad_fork_cleanup_cgroup: |
1312 | #endif | 1345 | #endif |
1313 | cgroup_exit(p, cgroup_callbacks_done); | 1346 | cgroup_exit(p, cgroup_callbacks_done); |
1314 | delayacct_tsk_free(p); | 1347 | delayacct_tsk_free(p); |
1315 | if (p->binfmt) | ||
1316 | module_put(p->binfmt->module); | ||
1317 | bad_fork_cleanup_put_domain: | ||
1318 | module_put(task_thread_info(p)->exec_domain->module); | 1348 | module_put(task_thread_info(p)->exec_domain->module); |
1319 | bad_fork_cleanup_count: | 1349 | bad_fork_cleanup_count: |
1320 | atomic_dec(&p->cred->user->processes); | 1350 | atomic_dec(&p->cred->user->processes); |
1321 | put_cred(p->real_cred); | 1351 | exit_creds(p); |
1322 | put_cred(p->cred); | ||
1323 | bad_fork_free: | 1352 | bad_fork_free: |
1324 | free_task(p); | 1353 | free_task(p); |
1325 | fork_out: | 1354 | fork_out: |
@@ -1422,7 +1451,7 @@ long do_fork(unsigned long clone_flags, | |||
1422 | } | 1451 | } |
1423 | 1452 | ||
1424 | audit_finish_fork(p); | 1453 | audit_finish_fork(p); |
1425 | tracehook_report_clone(trace, regs, clone_flags, nr, p); | 1454 | tracehook_report_clone(regs, clone_flags, nr, p); |
1426 | 1455 | ||
1427 | /* | 1456 | /* |
1428 | * We set PF_STARTING at creation in case tracing wants to | 1457 | * We set PF_STARTING at creation in case tracing wants to |
@@ -1474,20 +1503,21 @@ void __init proc_caches_init(void) | |||
1474 | { | 1503 | { |
1475 | sighand_cachep = kmem_cache_create("sighand_cache", | 1504 | sighand_cachep = kmem_cache_create("sighand_cache", |
1476 | sizeof(struct sighand_struct), 0, | 1505 | sizeof(struct sighand_struct), 0, |
1477 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, | 1506 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| |
1478 | sighand_ctor); | 1507 | SLAB_NOTRACK, sighand_ctor); |
1479 | signal_cachep = kmem_cache_create("signal_cache", | 1508 | signal_cachep = kmem_cache_create("signal_cache", |
1480 | sizeof(struct signal_struct), 0, | 1509 | sizeof(struct signal_struct), 0, |
1481 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1510 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1482 | files_cachep = kmem_cache_create("files_cache", | 1511 | files_cachep = kmem_cache_create("files_cache", |
1483 | sizeof(struct files_struct), 0, | 1512 | sizeof(struct files_struct), 0, |
1484 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1513 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1485 | fs_cachep = kmem_cache_create("fs_cache", | 1514 | fs_cachep = kmem_cache_create("fs_cache", |
1486 | sizeof(struct fs_struct), 0, | 1515 | sizeof(struct fs_struct), 0, |
1487 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1516 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1488 | mm_cachep = kmem_cache_create("mm_struct", | 1517 | mm_cachep = kmem_cache_create("mm_struct", |
1489 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1518 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1490 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1519 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1520 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); | ||
1491 | mmap_init(); | 1521 | mmap_init(); |
1492 | } | 1522 | } |
1493 | 1523 | ||
@@ -1543,12 +1573,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
1543 | { | 1573 | { |
1544 | struct fs_struct *fs = current->fs; | 1574 | struct fs_struct *fs = current->fs; |
1545 | 1575 | ||
1546 | if ((unshare_flags & CLONE_FS) && | 1576 | if (!(unshare_flags & CLONE_FS) || !fs) |
1547 | (fs && atomic_read(&fs->count) > 1)) { | 1577 | return 0; |
1548 | *new_fsp = __copy_fs_struct(current->fs); | 1578 | |
1549 | if (!*new_fsp) | 1579 | /* don't need lock here; in the worst case we'll do useless copy */ |
1550 | return -ENOMEM; | 1580 | if (fs->users == 1) |
1551 | } | 1581 | return 0; |
1582 | |||
1583 | *new_fsp = copy_fs_struct(fs); | ||
1584 | if (!*new_fsp) | ||
1585 | return -ENOMEM; | ||
1552 | 1586 | ||
1553 | return 0; | 1587 | return 0; |
1554 | } | 1588 | } |
@@ -1664,8 +1698,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1664 | 1698 | ||
1665 | if (new_fs) { | 1699 | if (new_fs) { |
1666 | fs = current->fs; | 1700 | fs = current->fs; |
1701 | write_lock(&fs->lock); | ||
1667 | current->fs = new_fs; | 1702 | current->fs = new_fs; |
1668 | new_fs = fs; | 1703 | if (--fs->users) |
1704 | new_fs = NULL; | ||
1705 | else | ||
1706 | new_fs = fs; | ||
1707 | write_unlock(&fs->lock); | ||
1669 | } | 1708 | } |
1670 | 1709 | ||
1671 | if (new_mm) { | 1710 | if (new_mm) { |
@@ -1704,7 +1743,7 @@ bad_unshare_cleanup_sigh: | |||
1704 | 1743 | ||
1705 | bad_unshare_cleanup_fs: | 1744 | bad_unshare_cleanup_fs: |
1706 | if (new_fs) | 1745 | if (new_fs) |
1707 | put_fs_struct(new_fs); | 1746 | free_fs_struct(new_fs); |
1708 | 1747 | ||
1709 | bad_unshare_cleanup_thread: | 1748 | bad_unshare_cleanup_thread: |
1710 | bad_unshare_out: | 1749 | bad_unshare_out: |