diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 60 |
1 files changed, 32 insertions, 28 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c49257c..b0ec34abc0bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | ||
67 | 68 | ||
68 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
69 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
@@ -85,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */ | |||
85 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; | 86 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; |
86 | 87 | ||
87 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
89 | EXPORT_SYMBOL_GPL(tasklist_lock); | ||
88 | 90 | ||
89 | int nr_processes(void) | 91 | int nr_processes(void) |
90 | { | 92 | { |
@@ -249,6 +251,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
249 | goto out; | 251 | goto out; |
250 | 252 | ||
251 | setup_thread_stack(tsk, orig); | 253 | setup_thread_stack(tsk, orig); |
254 | clear_user_return_notifier(tsk); | ||
252 | stackend = end_of_stack(tsk); | 255 | stackend = end_of_stack(tsk); |
253 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 256 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
254 | 257 | ||
@@ -326,15 +329,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
326 | if (!tmp) | 329 | if (!tmp) |
327 | goto fail_nomem; | 330 | goto fail_nomem; |
328 | *tmp = *mpnt; | 331 | *tmp = *mpnt; |
332 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | ||
329 | pol = mpol_dup(vma_policy(mpnt)); | 333 | pol = mpol_dup(vma_policy(mpnt)); |
330 | retval = PTR_ERR(pol); | 334 | retval = PTR_ERR(pol); |
331 | if (IS_ERR(pol)) | 335 | if (IS_ERR(pol)) |
332 | goto fail_nomem_policy; | 336 | goto fail_nomem_policy; |
333 | vma_set_policy(tmp, pol); | 337 | vma_set_policy(tmp, pol); |
338 | if (anon_vma_fork(tmp, mpnt)) | ||
339 | goto fail_nomem_anon_vma_fork; | ||
334 | tmp->vm_flags &= ~VM_LOCKED; | 340 | tmp->vm_flags &= ~VM_LOCKED; |
335 | tmp->vm_mm = mm; | 341 | tmp->vm_mm = mm; |
336 | tmp->vm_next = NULL; | 342 | tmp->vm_next = NULL; |
337 | anon_vma_link(tmp); | ||
338 | file = tmp->vm_file; | 343 | file = tmp->vm_file; |
339 | if (file) { | 344 | if (file) { |
340 | struct inode *inode = file->f_path.dentry->d_inode; | 345 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -389,6 +394,8 @@ out: | |||
389 | flush_tlb_mm(oldmm); | 394 | flush_tlb_mm(oldmm); |
390 | up_write(&oldmm->mmap_sem); | 395 | up_write(&oldmm->mmap_sem); |
391 | return retval; | 396 | return retval; |
397 | fail_nomem_anon_vma_fork: | ||
398 | mpol_put(pol); | ||
392 | fail_nomem_policy: | 399 | fail_nomem_policy: |
393 | kmem_cache_free(vm_area_cachep, tmp); | 400 | kmem_cache_free(vm_area_cachep, tmp); |
394 | fail_nomem: | 401 | fail_nomem: |
@@ -452,8 +459,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
452 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | 459 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; |
453 | mm->core_state = NULL; | 460 | mm->core_state = NULL; |
454 | mm->nr_ptes = 0; | 461 | mm->nr_ptes = 0; |
455 | set_mm_counter(mm, file_rss, 0); | 462 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
456 | set_mm_counter(mm, anon_rss, 0); | ||
457 | spin_lock_init(&mm->page_table_lock); | 463 | spin_lock_init(&mm->page_table_lock); |
458 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 464 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
459 | mm->cached_hole_size = ~0UL; | 465 | mm->cached_hole_size = ~0UL; |
@@ -822,6 +828,8 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
822 | */ | 828 | */ |
823 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | 829 | static void posix_cpu_timers_init_group(struct signal_struct *sig) |
824 | { | 830 | { |
831 | unsigned long cpu_limit; | ||
832 | |||
825 | /* Thread group counters. */ | 833 | /* Thread group counters. */ |
826 | thread_group_cputime_init(sig); | 834 | thread_group_cputime_init(sig); |
827 | 835 | ||
@@ -836,9 +844,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
836 | sig->cputime_expires.virt_exp = cputime_zero; | 844 | sig->cputime_expires.virt_exp = cputime_zero; |
837 | sig->cputime_expires.sched_exp = 0; | 845 | sig->cputime_expires.sched_exp = 0; |
838 | 846 | ||
839 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | 847 | cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
840 | sig->cputime_expires.prof_exp = | 848 | if (cpu_limit != RLIM_INFINITY) { |
841 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | 849 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
842 | sig->cputimer.running = 1; | 850 | sig->cputimer.running = 1; |
843 | } | 851 | } |
844 | 852 | ||
@@ -884,6 +892,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
884 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | 892 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; |
885 | sig->gtime = cputime_zero; | 893 | sig->gtime = cputime_zero; |
886 | sig->cgtime = cputime_zero; | 894 | sig->cgtime = cputime_zero; |
895 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
896 | sig->prev_utime = sig->prev_stime = cputime_zero; | ||
897 | #endif | ||
887 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 898 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
888 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 899 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
889 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 900 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
@@ -934,9 +945,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) | |||
934 | 945 | ||
935 | static void rt_mutex_init_task(struct task_struct *p) | 946 | static void rt_mutex_init_task(struct task_struct *p) |
936 | { | 947 | { |
937 | spin_lock_init(&p->pi_lock); | 948 | raw_spin_lock_init(&p->pi_lock); |
938 | #ifdef CONFIG_RT_MUTEXES | 949 | #ifdef CONFIG_RT_MUTEXES |
939 | plist_head_init(&p->pi_waiters, &p->pi_lock); | 950 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); |
940 | p->pi_blocked_on = NULL; | 951 | p->pi_blocked_on = NULL; |
941 | #endif | 952 | #endif |
942 | } | 953 | } |
@@ -1028,7 +1039,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1028 | #endif | 1039 | #endif |
1029 | retval = -EAGAIN; | 1040 | retval = -EAGAIN; |
1030 | if (atomic_read(&p->real_cred->user->processes) >= | 1041 | if (atomic_read(&p->real_cred->user->processes) >= |
1031 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1042 | task_rlimit(p, RLIMIT_NPROC)) { |
1032 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1043 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
1033 | p->real_cred->user != INIT_USER) | 1044 | p->real_cred->user != INIT_USER) |
1034 | goto bad_fork_free; | 1045 | goto bad_fork_free; |
@@ -1066,8 +1077,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1066 | p->gtime = cputime_zero; | 1077 | p->gtime = cputime_zero; |
1067 | p->utimescaled = cputime_zero; | 1078 | p->utimescaled = cputime_zero; |
1068 | p->stimescaled = cputime_zero; | 1079 | p->stimescaled = cputime_zero; |
1080 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
1069 | p->prev_utime = cputime_zero; | 1081 | p->prev_utime = cputime_zero; |
1070 | p->prev_stime = cputime_zero; | 1082 | p->prev_stime = cputime_zero; |
1083 | #endif | ||
1071 | 1084 | ||
1072 | p->default_timer_slack_ns = current->timer_slack_ns; | 1085 | p->default_timer_slack_ns = current->timer_slack_ns; |
1073 | 1086 | ||
@@ -1120,6 +1133,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1120 | #ifdef CONFIG_DEBUG_MUTEXES | 1133 | #ifdef CONFIG_DEBUG_MUTEXES |
1121 | p->blocked_on = NULL; /* not blocked yet */ | 1134 | p->blocked_on = NULL; /* not blocked yet */ |
1122 | #endif | 1135 | #endif |
1136 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
1137 | p->memcg_batch.do_batch = 0; | ||
1138 | p->memcg_batch.memcg = NULL; | ||
1139 | #endif | ||
1123 | 1140 | ||
1124 | p->bts = NULL; | 1141 | p->bts = NULL; |
1125 | 1142 | ||
@@ -1199,9 +1216,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1199 | p->sas_ss_sp = p->sas_ss_size = 0; | 1216 | p->sas_ss_sp = p->sas_ss_size = 0; |
1200 | 1217 | ||
1201 | /* | 1218 | /* |
1202 | * Syscall tracing should be turned off in the child regardless | 1219 | * Syscall tracing and stepping should be turned off in the |
1203 | * of CLONE_PTRACE. | 1220 | * child regardless of CLONE_PTRACE. |
1204 | */ | 1221 | */ |
1222 | user_disable_single_step(p); | ||
1205 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); | 1223 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
1206 | #ifdef TIF_SYSCALL_EMU | 1224 | #ifdef TIF_SYSCALL_EMU |
1207 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); | 1225 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); |
@@ -1229,21 +1247,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1229 | /* Need tasklist lock for parent etc handling! */ | 1247 | /* Need tasklist lock for parent etc handling! */ |
1230 | write_lock_irq(&tasklist_lock); | 1248 | write_lock_irq(&tasklist_lock); |
1231 | 1249 | ||
1232 | /* | ||
1233 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1234 | * not be changed, nor will its assigned CPU. | ||
1235 | * | ||
1236 | * The cpus_allowed mask of the parent may have changed after it was | ||
1237 | * copied first time - so re-copy it here, then check the child's CPU | ||
1238 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1239 | * parent's CPU). This avoids alot of nasty races. | ||
1240 | */ | ||
1241 | p->cpus_allowed = current->cpus_allowed; | ||
1242 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1243 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1244 | !cpu_online(task_cpu(p)))) | ||
1245 | set_task_cpu(p, smp_processor_id()); | ||
1246 | |||
1247 | /* CLONE_PARENT re-uses the old parent */ | 1250 | /* CLONE_PARENT re-uses the old parent */ |
1248 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1251 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1249 | p->real_parent = current->real_parent; | 1252 | p->real_parent = current->real_parent; |
@@ -1279,7 +1282,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1279 | } | 1282 | } |
1280 | 1283 | ||
1281 | if (likely(p->pid)) { | 1284 | if (likely(p->pid)) { |
1282 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1283 | tracehook_finish_clone(p, clone_flags, trace); | 1285 | tracehook_finish_clone(p, clone_flags, trace); |
1284 | 1286 | ||
1285 | if (thread_group_leader(p)) { | 1287 | if (thread_group_leader(p)) { |
@@ -1291,6 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1291 | p->signal->tty = tty_kref_get(current->signal->tty); | 1293 | p->signal->tty = tty_kref_get(current->signal->tty); |
1292 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1294 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1293 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1295 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1296 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1294 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1297 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1295 | __get_cpu_var(process_counts)++; | 1298 | __get_cpu_var(process_counts)++; |
1296 | } | 1299 | } |
@@ -1310,7 +1313,8 @@ bad_fork_free_pid: | |||
1310 | if (pid != &init_struct_pid) | 1313 | if (pid != &init_struct_pid) |
1311 | free_pid(pid); | 1314 | free_pid(pid); |
1312 | bad_fork_cleanup_io: | 1315 | bad_fork_cleanup_io: |
1313 | put_io_context(p->io_context); | 1316 | if (p->io_context) |
1317 | exit_io_context(p); | ||
1314 | bad_fork_cleanup_namespaces: | 1318 | bad_fork_cleanup_namespaces: |
1315 | exit_task_namespaces(p); | 1319 | exit_task_namespaces(p); |
1316 | bad_fork_cleanup_mm: | 1320 | bad_fork_cleanup_mm: |