diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 111 |
1 files changed, 46 insertions, 65 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 266c6af6ef1b..4799c5f0e6d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | ||
67 | 68 | ||
68 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
69 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
@@ -86,12 +87,20 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
86 | 87 | ||
87 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
88 | 89 | ||
90 | #ifdef CONFIG_PROVE_RCU | ||
91 | int lockdep_tasklist_lock_is_held(void) | ||
92 | { | ||
93 | return lockdep_is_held(&tasklist_lock); | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); | ||
96 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
97 | |||
89 | int nr_processes(void) | 98 | int nr_processes(void) |
90 | { | 99 | { |
91 | int cpu; | 100 | int cpu; |
92 | int total = 0; | 101 | int total = 0; |
93 | 102 | ||
94 | for_each_online_cpu(cpu) | 103 | for_each_possible_cpu(cpu) |
95 | total += per_cpu(process_counts, cpu); | 104 | total += per_cpu(process_counts, cpu); |
96 | 105 | ||
97 | return total; | 106 | return total; |
@@ -249,6 +258,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
249 | goto out; | 258 | goto out; |
250 | 259 | ||
251 | setup_thread_stack(tsk, orig); | 260 | setup_thread_stack(tsk, orig); |
261 | clear_user_return_notifier(tsk); | ||
252 | stackend = end_of_stack(tsk); | 262 | stackend = end_of_stack(tsk); |
253 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 263 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
254 | 264 | ||
@@ -326,15 +336,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
326 | if (!tmp) | 336 | if (!tmp) |
327 | goto fail_nomem; | 337 | goto fail_nomem; |
328 | *tmp = *mpnt; | 338 | *tmp = *mpnt; |
339 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | ||
329 | pol = mpol_dup(vma_policy(mpnt)); | 340 | pol = mpol_dup(vma_policy(mpnt)); |
330 | retval = PTR_ERR(pol); | 341 | retval = PTR_ERR(pol); |
331 | if (IS_ERR(pol)) | 342 | if (IS_ERR(pol)) |
332 | goto fail_nomem_policy; | 343 | goto fail_nomem_policy; |
333 | vma_set_policy(tmp, pol); | 344 | vma_set_policy(tmp, pol); |
345 | if (anon_vma_fork(tmp, mpnt)) | ||
346 | goto fail_nomem_anon_vma_fork; | ||
334 | tmp->vm_flags &= ~VM_LOCKED; | 347 | tmp->vm_flags &= ~VM_LOCKED; |
335 | tmp->vm_mm = mm; | 348 | tmp->vm_mm = mm; |
336 | tmp->vm_next = NULL; | 349 | tmp->vm_next = NULL; |
337 | anon_vma_link(tmp); | ||
338 | file = tmp->vm_file; | 350 | file = tmp->vm_file; |
339 | if (file) { | 351 | if (file) { |
340 | struct inode *inode = file->f_path.dentry->d_inode; | 352 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -389,6 +401,8 @@ out: | |||
389 | flush_tlb_mm(oldmm); | 401 | flush_tlb_mm(oldmm); |
390 | up_write(&oldmm->mmap_sem); | 402 | up_write(&oldmm->mmap_sem); |
391 | return retval; | 403 | return retval; |
404 | fail_nomem_anon_vma_fork: | ||
405 | mpol_put(pol); | ||
392 | fail_nomem_policy: | 406 | fail_nomem_policy: |
393 | kmem_cache_free(vm_area_cachep, tmp); | 407 | kmem_cache_free(vm_area_cachep, tmp); |
394 | fail_nomem: | 408 | fail_nomem: |
@@ -452,8 +466,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
452 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | 466 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; |
453 | mm->core_state = NULL; | 467 | mm->core_state = NULL; |
454 | mm->nr_ptes = 0; | 468 | mm->nr_ptes = 0; |
455 | set_mm_counter(mm, file_rss, 0); | 469 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
456 | set_mm_counter(mm, anon_rss, 0); | ||
457 | spin_lock_init(&mm->page_table_lock); | 470 | spin_lock_init(&mm->page_table_lock); |
458 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 471 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
459 | mm->cached_hole_size = ~0UL; | 472 | mm->cached_hole_size = ~0UL; |
@@ -570,12 +583,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
570 | 583 | ||
571 | /* Get rid of any futexes when releasing the mm */ | 584 | /* Get rid of any futexes when releasing the mm */ |
572 | #ifdef CONFIG_FUTEX | 585 | #ifdef CONFIG_FUTEX |
573 | if (unlikely(tsk->robust_list)) | 586 | if (unlikely(tsk->robust_list)) { |
574 | exit_robust_list(tsk); | 587 | exit_robust_list(tsk); |
588 | tsk->robust_list = NULL; | ||
589 | } | ||
575 | #ifdef CONFIG_COMPAT | 590 | #ifdef CONFIG_COMPAT |
576 | if (unlikely(tsk->compat_robust_list)) | 591 | if (unlikely(tsk->compat_robust_list)) { |
577 | compat_exit_robust_list(tsk); | 592 | compat_exit_robust_list(tsk); |
593 | tsk->compat_robust_list = NULL; | ||
594 | } | ||
578 | #endif | 595 | #endif |
596 | if (unlikely(!list_empty(&tsk->pi_state_list))) | ||
597 | exit_pi_state_list(tsk); | ||
579 | #endif | 598 | #endif |
580 | 599 | ||
581 | /* Get rid of any cached register state */ | 600 | /* Get rid of any cached register state */ |
@@ -816,23 +835,14 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
816 | */ | 835 | */ |
817 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | 836 | static void posix_cpu_timers_init_group(struct signal_struct *sig) |
818 | { | 837 | { |
838 | unsigned long cpu_limit; | ||
839 | |||
819 | /* Thread group counters. */ | 840 | /* Thread group counters. */ |
820 | thread_group_cputime_init(sig); | 841 | thread_group_cputime_init(sig); |
821 | 842 | ||
822 | /* Expiration times and increments. */ | 843 | cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
823 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; | 844 | if (cpu_limit != RLIM_INFINITY) { |
824 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; | 845 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
825 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; | ||
826 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; | ||
827 | |||
828 | /* Cached expiration times. */ | ||
829 | sig->cputime_expires.prof_exp = cputime_zero; | ||
830 | sig->cputime_expires.virt_exp = cputime_zero; | ||
831 | sig->cputime_expires.sched_exp = 0; | ||
832 | |||
833 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
834 | sig->cputime_expires.prof_exp = | ||
835 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
836 | sig->cputimer.running = 1; | 846 | sig->cputimer.running = 1; |
837 | } | 847 | } |
838 | 848 | ||
@@ -849,7 +859,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
849 | if (clone_flags & CLONE_THREAD) | 859 | if (clone_flags & CLONE_THREAD) |
850 | return 0; | 860 | return 0; |
851 | 861 | ||
852 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 862 | sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); |
853 | tsk->signal = sig; | 863 | tsk->signal = sig; |
854 | if (!sig) | 864 | if (!sig) |
855 | return -ENOMEM; | 865 | return -ENOMEM; |
@@ -857,43 +867,21 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
857 | atomic_set(&sig->count, 1); | 867 | atomic_set(&sig->count, 1); |
858 | atomic_set(&sig->live, 1); | 868 | atomic_set(&sig->live, 1); |
859 | init_waitqueue_head(&sig->wait_chldexit); | 869 | init_waitqueue_head(&sig->wait_chldexit); |
860 | sig->flags = 0; | ||
861 | if (clone_flags & CLONE_NEWPID) | 870 | if (clone_flags & CLONE_NEWPID) |
862 | sig->flags |= SIGNAL_UNKILLABLE; | 871 | sig->flags |= SIGNAL_UNKILLABLE; |
863 | sig->group_exit_code = 0; | ||
864 | sig->group_exit_task = NULL; | ||
865 | sig->group_stop_count = 0; | ||
866 | sig->curr_target = tsk; | 872 | sig->curr_target = tsk; |
867 | init_sigpending(&sig->shared_pending); | 873 | init_sigpending(&sig->shared_pending); |
868 | INIT_LIST_HEAD(&sig->posix_timers); | 874 | INIT_LIST_HEAD(&sig->posix_timers); |
869 | 875 | ||
870 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 876 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
871 | sig->it_real_incr.tv64 = 0; | ||
872 | sig->real_timer.function = it_real_fn; | 877 | sig->real_timer.function = it_real_fn; |
873 | 878 | ||
874 | sig->leader = 0; /* session leadership doesn't inherit */ | ||
875 | sig->tty_old_pgrp = NULL; | ||
876 | sig->tty = NULL; | ||
877 | |||
878 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | ||
879 | sig->gtime = cputime_zero; | ||
880 | sig->cgtime = cputime_zero; | ||
881 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | ||
882 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | ||
883 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | ||
884 | sig->maxrss = sig->cmaxrss = 0; | ||
885 | task_io_accounting_init(&sig->ioac); | ||
886 | sig->sum_sched_runtime = 0; | ||
887 | taskstats_tgid_init(sig); | ||
888 | |||
889 | task_lock(current->group_leader); | 879 | task_lock(current->group_leader); |
890 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 880 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
891 | task_unlock(current->group_leader); | 881 | task_unlock(current->group_leader); |
892 | 882 | ||
893 | posix_cpu_timers_init_group(sig); | 883 | posix_cpu_timers_init_group(sig); |
894 | 884 | ||
895 | acct_init_pacct(&sig->pacct); | ||
896 | |||
897 | tty_audit_fork(sig); | 885 | tty_audit_fork(sig); |
898 | 886 | ||
899 | sig->oom_adj = current->signal->oom_adj; | 887 | sig->oom_adj = current->signal->oom_adj; |
@@ -928,9 +916,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) | |||
928 | 916 | ||
929 | static void rt_mutex_init_task(struct task_struct *p) | 917 | static void rt_mutex_init_task(struct task_struct *p) |
930 | { | 918 | { |
931 | spin_lock_init(&p->pi_lock); | 919 | raw_spin_lock_init(&p->pi_lock); |
932 | #ifdef CONFIG_RT_MUTEXES | 920 | #ifdef CONFIG_RT_MUTEXES |
933 | plist_head_init(&p->pi_waiters, &p->pi_lock); | 921 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); |
934 | p->pi_blocked_on = NULL; | 922 | p->pi_blocked_on = NULL; |
935 | #endif | 923 | #endif |
936 | } | 924 | } |
@@ -1022,7 +1010,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1022 | #endif | 1010 | #endif |
1023 | retval = -EAGAIN; | 1011 | retval = -EAGAIN; |
1024 | if (atomic_read(&p->real_cred->user->processes) >= | 1012 | if (atomic_read(&p->real_cred->user->processes) >= |
1025 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1013 | task_rlimit(p, RLIMIT_NPROC)) { |
1026 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1014 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
1027 | p->real_cred->user != INIT_USER) | 1015 | p->real_cred->user != INIT_USER) |
1028 | goto bad_fork_free; | 1016 | goto bad_fork_free; |
@@ -1060,8 +1048,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1060 | p->gtime = cputime_zero; | 1048 | p->gtime = cputime_zero; |
1061 | p->utimescaled = cputime_zero; | 1049 | p->utimescaled = cputime_zero; |
1062 | p->stimescaled = cputime_zero; | 1050 | p->stimescaled = cputime_zero; |
1051 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
1063 | p->prev_utime = cputime_zero; | 1052 | p->prev_utime = cputime_zero; |
1064 | p->prev_stime = cputime_zero; | 1053 | p->prev_stime = cputime_zero; |
1054 | #endif | ||
1065 | 1055 | ||
1066 | p->default_timer_slack_ns = current->timer_slack_ns; | 1056 | p->default_timer_slack_ns = current->timer_slack_ns; |
1067 | 1057 | ||
@@ -1114,6 +1104,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1114 | #ifdef CONFIG_DEBUG_MUTEXES | 1104 | #ifdef CONFIG_DEBUG_MUTEXES |
1115 | p->blocked_on = NULL; /* not blocked yet */ | 1105 | p->blocked_on = NULL; /* not blocked yet */ |
1116 | #endif | 1106 | #endif |
1107 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
1108 | p->memcg_batch.do_batch = 0; | ||
1109 | p->memcg_batch.memcg = NULL; | ||
1110 | #endif | ||
1117 | 1111 | ||
1118 | p->bts = NULL; | 1112 | p->bts = NULL; |
1119 | 1113 | ||
@@ -1193,9 +1187,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1193 | p->sas_ss_sp = p->sas_ss_size = 0; | 1187 | p->sas_ss_sp = p->sas_ss_size = 0; |
1194 | 1188 | ||
1195 | /* | 1189 | /* |
1196 | * Syscall tracing should be turned off in the child regardless | 1190 | * Syscall tracing and stepping should be turned off in the |
1197 | * of CLONE_PTRACE. | 1191 | * child regardless of CLONE_PTRACE. |
1198 | */ | 1192 | */ |
1193 | user_disable_single_step(p); | ||
1199 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); | 1194 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
1200 | #ifdef TIF_SYSCALL_EMU | 1195 | #ifdef TIF_SYSCALL_EMU |
1201 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); | 1196 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); |
@@ -1223,21 +1218,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1223 | /* Need tasklist lock for parent etc handling! */ | 1218 | /* Need tasklist lock for parent etc handling! */ |
1224 | write_lock_irq(&tasklist_lock); | 1219 | write_lock_irq(&tasklist_lock); |
1225 | 1220 | ||
1226 | /* | ||
1227 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1228 | * not be changed, nor will its assigned CPU. | ||
1229 | * | ||
1230 | * The cpus_allowed mask of the parent may have changed after it was | ||
1231 | * copied first time - so re-copy it here, then check the child's CPU | ||
1232 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1233 | * parent's CPU). This avoids alot of nasty races. | ||
1234 | */ | ||
1235 | p->cpus_allowed = current->cpus_allowed; | ||
1236 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1237 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1238 | !cpu_online(task_cpu(p)))) | ||
1239 | set_task_cpu(p, smp_processor_id()); | ||
1240 | |||
1241 | /* CLONE_PARENT re-uses the old parent */ | 1221 | /* CLONE_PARENT re-uses the old parent */ |
1242 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1222 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1243 | p->real_parent = current->real_parent; | 1223 | p->real_parent = current->real_parent; |
@@ -1273,7 +1253,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1273 | } | 1253 | } |
1274 | 1254 | ||
1275 | if (likely(p->pid)) { | 1255 | if (likely(p->pid)) { |
1276 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1277 | tracehook_finish_clone(p, clone_flags, trace); | 1256 | tracehook_finish_clone(p, clone_flags, trace); |
1278 | 1257 | ||
1279 | if (thread_group_leader(p)) { | 1258 | if (thread_group_leader(p)) { |
@@ -1285,6 +1264,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1285 | p->signal->tty = tty_kref_get(current->signal->tty); | 1264 | p->signal->tty = tty_kref_get(current->signal->tty); |
1286 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1265 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1287 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1266 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1267 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1288 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1268 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1289 | __get_cpu_var(process_counts)++; | 1269 | __get_cpu_var(process_counts)++; |
1290 | } | 1270 | } |
@@ -1304,7 +1284,8 @@ bad_fork_free_pid: | |||
1304 | if (pid != &init_struct_pid) | 1284 | if (pid != &init_struct_pid) |
1305 | free_pid(pid); | 1285 | free_pid(pid); |
1306 | bad_fork_cleanup_io: | 1286 | bad_fork_cleanup_io: |
1307 | put_io_context(p->io_context); | 1287 | if (p->io_context) |
1288 | exit_io_context(p); | ||
1308 | bad_fork_cleanup_namespaces: | 1289 | bad_fork_cleanup_namespaces: |
1309 | exit_task_namespaces(p); | 1290 | exit_task_namespaces(p); |
1310 | bad_fork_cleanup_mm: | 1291 | bad_fork_cleanup_mm: |