diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 104 |
1 files changed, 40 insertions, 64 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 9fad346d7029..166eb780dd7d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | ||
67 | 68 | ||
68 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
69 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
@@ -89,6 +90,14 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
89 | 90 | ||
90 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 91 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
91 | 92 | ||
93 | #ifdef CONFIG_PROVE_RCU | ||
94 | int lockdep_tasklist_lock_is_held(void) | ||
95 | { | ||
96 | return lockdep_is_held(&tasklist_lock); | ||
97 | } | ||
98 | EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); | ||
99 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
100 | |||
92 | int nr_processes(void) | 101 | int nr_processes(void) |
93 | { | 102 | { |
94 | int cpu; | 103 | int cpu; |
@@ -256,6 +265,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
256 | goto out; | 265 | goto out; |
257 | 266 | ||
258 | setup_thread_stack(tsk, orig); | 267 | setup_thread_stack(tsk, orig); |
268 | clear_user_return_notifier(tsk); | ||
259 | stackend = end_of_stack(tsk); | 269 | stackend = end_of_stack(tsk); |
260 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 270 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
261 | 271 | ||
@@ -333,15 +343,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
333 | if (!tmp) | 343 | if (!tmp) |
334 | goto fail_nomem; | 344 | goto fail_nomem; |
335 | *tmp = *mpnt; | 345 | *tmp = *mpnt; |
346 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | ||
336 | pol = mpol_dup(vma_policy(mpnt)); | 347 | pol = mpol_dup(vma_policy(mpnt)); |
337 | retval = PTR_ERR(pol); | 348 | retval = PTR_ERR(pol); |
338 | if (IS_ERR(pol)) | 349 | if (IS_ERR(pol)) |
339 | goto fail_nomem_policy; | 350 | goto fail_nomem_policy; |
340 | vma_set_policy(tmp, pol); | 351 | vma_set_policy(tmp, pol); |
352 | if (anon_vma_fork(tmp, mpnt)) | ||
353 | goto fail_nomem_anon_vma_fork; | ||
341 | tmp->vm_flags &= ~VM_LOCKED; | 354 | tmp->vm_flags &= ~VM_LOCKED; |
342 | tmp->vm_mm = mm; | 355 | tmp->vm_mm = mm; |
343 | tmp->vm_next = NULL; | 356 | tmp->vm_next = NULL; |
344 | anon_vma_link(tmp); | ||
345 | file = tmp->vm_file; | 357 | file = tmp->vm_file; |
346 | if (file) { | 358 | if (file) { |
347 | struct inode *inode = file->f_path.dentry->d_inode; | 359 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -396,6 +408,8 @@ out: | |||
396 | flush_tlb_mm(oldmm); | 408 | flush_tlb_mm(oldmm); |
397 | up_write(&oldmm->mmap_sem); | 409 | up_write(&oldmm->mmap_sem); |
398 | return retval; | 410 | return retval; |
411 | fail_nomem_anon_vma_fork: | ||
412 | mpol_put(pol); | ||
399 | fail_nomem_policy: | 413 | fail_nomem_policy: |
400 | kmem_cache_free(vm_area_cachep, tmp); | 414 | kmem_cache_free(vm_area_cachep, tmp); |
401 | fail_nomem: | 415 | fail_nomem: |
@@ -459,8 +473,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
459 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | 473 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; |
460 | mm->core_state = NULL; | 474 | mm->core_state = NULL; |
461 | mm->nr_ptes = 0; | 475 | mm->nr_ptes = 0; |
462 | set_mm_counter(mm, file_rss, 0); | 476 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
463 | set_mm_counter(mm, anon_rss, 0); | ||
464 | spin_lock_init(&mm->page_table_lock); | 477 | spin_lock_init(&mm->page_table_lock); |
465 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 478 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
466 | mm->cached_hole_size = ~0UL; | 479 | mm->cached_hole_size = ~0UL; |
@@ -829,23 +842,14 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
829 | */ | 842 | */ |
830 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | 843 | static void posix_cpu_timers_init_group(struct signal_struct *sig) |
831 | { | 844 | { |
845 | unsigned long cpu_limit; | ||
846 | |||
832 | /* Thread group counters. */ | 847 | /* Thread group counters. */ |
833 | thread_group_cputime_init(sig); | 848 | thread_group_cputime_init(sig); |
834 | 849 | ||
835 | /* Expiration times and increments. */ | 850 | cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
836 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; | 851 | if (cpu_limit != RLIM_INFINITY) { |
837 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; | 852 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
838 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; | ||
839 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; | ||
840 | |||
841 | /* Cached expiration times. */ | ||
842 | sig->cputime_expires.prof_exp = cputime_zero; | ||
843 | sig->cputime_expires.virt_exp = cputime_zero; | ||
844 | sig->cputime_expires.sched_exp = 0; | ||
845 | |||
846 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
847 | sig->cputime_expires.prof_exp = | ||
848 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
849 | sig->cputimer.running = 1; | 853 | sig->cputimer.running = 1; |
850 | } | 854 | } |
851 | 855 | ||
@@ -862,7 +866,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
862 | if (clone_flags & CLONE_THREAD) | 866 | if (clone_flags & CLONE_THREAD) |
863 | return 0; | 867 | return 0; |
864 | 868 | ||
865 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 869 | sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); |
866 | tsk->signal = sig; | 870 | tsk->signal = sig; |
867 | if (!sig) | 871 | if (!sig) |
868 | return -ENOMEM; | 872 | return -ENOMEM; |
@@ -870,43 +874,21 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
870 | atomic_set(&sig->count, 1); | 874 | atomic_set(&sig->count, 1); |
871 | atomic_set(&sig->live, 1); | 875 | atomic_set(&sig->live, 1); |
872 | init_waitqueue_head(&sig->wait_chldexit); | 876 | init_waitqueue_head(&sig->wait_chldexit); |
873 | sig->flags = 0; | ||
874 | if (clone_flags & CLONE_NEWPID) | 877 | if (clone_flags & CLONE_NEWPID) |
875 | sig->flags |= SIGNAL_UNKILLABLE; | 878 | sig->flags |= SIGNAL_UNKILLABLE; |
876 | sig->group_exit_code = 0; | ||
877 | sig->group_exit_task = NULL; | ||
878 | sig->group_stop_count = 0; | ||
879 | sig->curr_target = tsk; | 879 | sig->curr_target = tsk; |
880 | init_sigpending(&sig->shared_pending); | 880 | init_sigpending(&sig->shared_pending); |
881 | INIT_LIST_HEAD(&sig->posix_timers); | 881 | INIT_LIST_HEAD(&sig->posix_timers); |
882 | 882 | ||
883 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 883 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
884 | sig->it_real_incr.tv64 = 0; | ||
885 | sig->real_timer.function = it_real_fn; | 884 | sig->real_timer.function = it_real_fn; |
886 | 885 | ||
887 | sig->leader = 0; /* session leadership doesn't inherit */ | ||
888 | sig->tty_old_pgrp = NULL; | ||
889 | sig->tty = NULL; | ||
890 | |||
891 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | ||
892 | sig->gtime = cputime_zero; | ||
893 | sig->cgtime = cputime_zero; | ||
894 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | ||
895 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | ||
896 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | ||
897 | sig->maxrss = sig->cmaxrss = 0; | ||
898 | task_io_accounting_init(&sig->ioac); | ||
899 | sig->sum_sched_runtime = 0; | ||
900 | taskstats_tgid_init(sig); | ||
901 | |||
902 | task_lock(current->group_leader); | 886 | task_lock(current->group_leader); |
903 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 887 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
904 | task_unlock(current->group_leader); | 888 | task_unlock(current->group_leader); |
905 | 889 | ||
906 | posix_cpu_timers_init_group(sig); | 890 | posix_cpu_timers_init_group(sig); |
907 | 891 | ||
908 | acct_init_pacct(&sig->pacct); | ||
909 | |||
910 | tty_audit_fork(sig); | 892 | tty_audit_fork(sig); |
911 | 893 | ||
912 | sig->oom_adj = current->signal->oom_adj; | 894 | sig->oom_adj = current->signal->oom_adj; |
@@ -941,9 +923,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) | |||
941 | 923 | ||
942 | static void rt_mutex_init_task(struct task_struct *p) | 924 | static void rt_mutex_init_task(struct task_struct *p) |
943 | { | 925 | { |
944 | spin_lock_init(&p->pi_lock); | 926 | raw_spin_lock_init(&p->pi_lock); |
945 | #ifdef CONFIG_RT_MUTEXES | 927 | #ifdef CONFIG_RT_MUTEXES |
946 | plist_head_init(&p->pi_waiters, &p->pi_lock); | 928 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); |
947 | p->pi_blocked_on = NULL; | 929 | p->pi_blocked_on = NULL; |
948 | #endif | 930 | #endif |
949 | } | 931 | } |
@@ -1035,7 +1017,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1035 | #endif | 1017 | #endif |
1036 | retval = -EAGAIN; | 1018 | retval = -EAGAIN; |
1037 | if (atomic_read(&p->real_cred->user->processes) >= | 1019 | if (atomic_read(&p->real_cred->user->processes) >= |
1038 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1020 | task_rlimit(p, RLIMIT_NPROC)) { |
1039 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1021 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
1040 | p->real_cred->user != INIT_USER) | 1022 | p->real_cred->user != INIT_USER) |
1041 | goto bad_fork_free; | 1023 | goto bad_fork_free; |
@@ -1073,8 +1055,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1073 | p->gtime = cputime_zero; | 1055 | p->gtime = cputime_zero; |
1074 | p->utimescaled = cputime_zero; | 1056 | p->utimescaled = cputime_zero; |
1075 | p->stimescaled = cputime_zero; | 1057 | p->stimescaled = cputime_zero; |
1058 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
1076 | p->prev_utime = cputime_zero; | 1059 | p->prev_utime = cputime_zero; |
1077 | p->prev_stime = cputime_zero; | 1060 | p->prev_stime = cputime_zero; |
1061 | #endif | ||
1062 | #if defined(SPLIT_RSS_COUNTING) | ||
1063 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); | ||
1064 | #endif | ||
1078 | 1065 | ||
1079 | p->default_timer_slack_ns = current->timer_slack_ns; | 1066 | p->default_timer_slack_ns = current->timer_slack_ns; |
1080 | 1067 | ||
@@ -1127,11 +1114,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1127 | #ifdef CONFIG_DEBUG_MUTEXES | 1114 | #ifdef CONFIG_DEBUG_MUTEXES |
1128 | p->blocked_on = NULL; /* not blocked yet */ | 1115 | p->blocked_on = NULL; /* not blocked yet */ |
1129 | #endif | 1116 | #endif |
1117 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
1118 | p->memcg_batch.do_batch = 0; | ||
1119 | p->memcg_batch.memcg = NULL; | ||
1120 | #endif | ||
1130 | 1121 | ||
1131 | p->bts = NULL; | 1122 | p->bts = NULL; |
1132 | 1123 | ||
1133 | p->stack_start = stack_start; | ||
1134 | |||
1135 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1124 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1136 | sched_fork(p, clone_flags); | 1125 | sched_fork(p, clone_flags); |
1137 | 1126 | ||
@@ -1206,9 +1195,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1206 | p->sas_ss_sp = p->sas_ss_size = 0; | 1195 | p->sas_ss_sp = p->sas_ss_size = 0; |
1207 | 1196 | ||
1208 | /* | 1197 | /* |
1209 | * Syscall tracing should be turned off in the child regardless | 1198 | * Syscall tracing and stepping should be turned off in the |
1210 | * of CLONE_PTRACE. | 1199 | * child regardless of CLONE_PTRACE. |
1211 | */ | 1200 | */ |
1201 | user_disable_single_step(p); | ||
1212 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); | 1202 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
1213 | #ifdef TIF_SYSCALL_EMU | 1203 | #ifdef TIF_SYSCALL_EMU |
1214 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); | 1204 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); |
@@ -1236,21 +1226,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1236 | /* Need tasklist lock for parent etc handling! */ | 1226 | /* Need tasklist lock for parent etc handling! */ |
1237 | write_lock_irq(&tasklist_lock); | 1227 | write_lock_irq(&tasklist_lock); |
1238 | 1228 | ||
1239 | /* | ||
1240 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1241 | * not be changed, nor will its assigned CPU. | ||
1242 | * | ||
1243 | * The cpus_allowed mask of the parent may have changed after it was | ||
1244 | * copied first time - so re-copy it here, then check the child's CPU | ||
1245 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1246 | * parent's CPU). This avoids alot of nasty races. | ||
1247 | */ | ||
1248 | p->cpus_allowed = current->cpus_allowed; | ||
1249 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1250 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1251 | !cpu_online(task_cpu(p)))) | ||
1252 | set_task_cpu(p, smp_processor_id()); | ||
1253 | |||
1254 | /* CLONE_PARENT re-uses the old parent */ | 1229 | /* CLONE_PARENT re-uses the old parent */ |
1255 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1230 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1256 | p->real_parent = current->real_parent; | 1231 | p->real_parent = current->real_parent; |
@@ -1286,7 +1261,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1286 | } | 1261 | } |
1287 | 1262 | ||
1288 | if (likely(p->pid)) { | 1263 | if (likely(p->pid)) { |
1289 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1290 | tracehook_finish_clone(p, clone_flags, trace); | 1264 | tracehook_finish_clone(p, clone_flags, trace); |
1291 | 1265 | ||
1292 | if (thread_group_leader(p)) { | 1266 | if (thread_group_leader(p)) { |
@@ -1298,6 +1272,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1298 | p->signal->tty = tty_kref_get(current->signal->tty); | 1272 | p->signal->tty = tty_kref_get(current->signal->tty); |
1299 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1273 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1300 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1274 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1275 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1301 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1276 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1302 | __get_cpu_var(process_counts)++; | 1277 | __get_cpu_var(process_counts)++; |
1303 | } | 1278 | } |
@@ -1317,7 +1292,8 @@ bad_fork_free_pid: | |||
1317 | if (pid != &init_struct_pid) | 1292 | if (pid != &init_struct_pid) |
1318 | free_pid(pid); | 1293 | free_pid(pid); |
1319 | bad_fork_cleanup_io: | 1294 | bad_fork_cleanup_io: |
1320 | put_io_context(p->io_context); | 1295 | if (p->io_context) |
1296 | exit_io_context(p); | ||
1321 | bad_fork_cleanup_namespaces: | 1297 | bad_fork_cleanup_namespaces: |
1322 | exit_task_namespaces(p); | 1298 | exit_task_namespaces(p); |
1323 | bad_fork_cleanup_mm: | 1299 | bad_fork_cleanup_mm: |