diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 102 |
1 files changed, 40 insertions, 62 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c49257c..44b0791b0a2e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | ||
67 | 68 | ||
68 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
69 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
@@ -86,6 +87,14 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
86 | 87 | ||
87 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
88 | 89 | ||
90 | #ifdef CONFIG_PROVE_RCU | ||
91 | int lockdep_tasklist_lock_is_held(void) | ||
92 | { | ||
93 | return lockdep_is_held(&tasklist_lock); | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); | ||
96 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
97 | |||
89 | int nr_processes(void) | 98 | int nr_processes(void) |
90 | { | 99 | { |
91 | int cpu; | 100 | int cpu; |
@@ -249,6 +258,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
249 | goto out; | 258 | goto out; |
250 | 259 | ||
251 | setup_thread_stack(tsk, orig); | 260 | setup_thread_stack(tsk, orig); |
261 | clear_user_return_notifier(tsk); | ||
252 | stackend = end_of_stack(tsk); | 262 | stackend = end_of_stack(tsk); |
253 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 263 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
254 | 264 | ||
@@ -326,15 +336,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
326 | if (!tmp) | 336 | if (!tmp) |
327 | goto fail_nomem; | 337 | goto fail_nomem; |
328 | *tmp = *mpnt; | 338 | *tmp = *mpnt; |
339 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | ||
329 | pol = mpol_dup(vma_policy(mpnt)); | 340 | pol = mpol_dup(vma_policy(mpnt)); |
330 | retval = PTR_ERR(pol); | 341 | retval = PTR_ERR(pol); |
331 | if (IS_ERR(pol)) | 342 | if (IS_ERR(pol)) |
332 | goto fail_nomem_policy; | 343 | goto fail_nomem_policy; |
333 | vma_set_policy(tmp, pol); | 344 | vma_set_policy(tmp, pol); |
345 | if (anon_vma_fork(tmp, mpnt)) | ||
346 | goto fail_nomem_anon_vma_fork; | ||
334 | tmp->vm_flags &= ~VM_LOCKED; | 347 | tmp->vm_flags &= ~VM_LOCKED; |
335 | tmp->vm_mm = mm; | 348 | tmp->vm_mm = mm; |
336 | tmp->vm_next = NULL; | 349 | tmp->vm_next = NULL; |
337 | anon_vma_link(tmp); | ||
338 | file = tmp->vm_file; | 350 | file = tmp->vm_file; |
339 | if (file) { | 351 | if (file) { |
340 | struct inode *inode = file->f_path.dentry->d_inode; | 352 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -389,6 +401,8 @@ out: | |||
389 | flush_tlb_mm(oldmm); | 401 | flush_tlb_mm(oldmm); |
390 | up_write(&oldmm->mmap_sem); | 402 | up_write(&oldmm->mmap_sem); |
391 | return retval; | 403 | return retval; |
404 | fail_nomem_anon_vma_fork: | ||
405 | mpol_put(pol); | ||
392 | fail_nomem_policy: | 406 | fail_nomem_policy: |
393 | kmem_cache_free(vm_area_cachep, tmp); | 407 | kmem_cache_free(vm_area_cachep, tmp); |
394 | fail_nomem: | 408 | fail_nomem: |
@@ -452,8 +466,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
452 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | 466 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; |
453 | mm->core_state = NULL; | 467 | mm->core_state = NULL; |
454 | mm->nr_ptes = 0; | 468 | mm->nr_ptes = 0; |
455 | set_mm_counter(mm, file_rss, 0); | 469 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
456 | set_mm_counter(mm, anon_rss, 0); | ||
457 | spin_lock_init(&mm->page_table_lock); | 470 | spin_lock_init(&mm->page_table_lock); |
458 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 471 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
459 | mm->cached_hole_size = ~0UL; | 472 | mm->cached_hole_size = ~0UL; |
@@ -822,23 +835,14 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
822 | */ | 835 | */ |
823 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | 836 | static void posix_cpu_timers_init_group(struct signal_struct *sig) |
824 | { | 837 | { |
838 | unsigned long cpu_limit; | ||
839 | |||
825 | /* Thread group counters. */ | 840 | /* Thread group counters. */ |
826 | thread_group_cputime_init(sig); | 841 | thread_group_cputime_init(sig); |
827 | 842 | ||
828 | /* Expiration times and increments. */ | 843 | cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
829 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; | 844 | if (cpu_limit != RLIM_INFINITY) { |
830 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; | 845 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
831 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; | ||
832 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; | ||
833 | |||
834 | /* Cached expiration times. */ | ||
835 | sig->cputime_expires.prof_exp = cputime_zero; | ||
836 | sig->cputime_expires.virt_exp = cputime_zero; | ||
837 | sig->cputime_expires.sched_exp = 0; | ||
838 | |||
839 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
840 | sig->cputime_expires.prof_exp = | ||
841 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
842 | sig->cputimer.running = 1; | 846 | sig->cputimer.running = 1; |
843 | } | 847 | } |
844 | 848 | ||
@@ -855,7 +859,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
855 | if (clone_flags & CLONE_THREAD) | 859 | if (clone_flags & CLONE_THREAD) |
856 | return 0; | 860 | return 0; |
857 | 861 | ||
858 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 862 | sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); |
859 | tsk->signal = sig; | 863 | tsk->signal = sig; |
860 | if (!sig) | 864 | if (!sig) |
861 | return -ENOMEM; | 865 | return -ENOMEM; |
@@ -863,43 +867,21 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
863 | atomic_set(&sig->count, 1); | 867 | atomic_set(&sig->count, 1); |
864 | atomic_set(&sig->live, 1); | 868 | atomic_set(&sig->live, 1); |
865 | init_waitqueue_head(&sig->wait_chldexit); | 869 | init_waitqueue_head(&sig->wait_chldexit); |
866 | sig->flags = 0; | ||
867 | if (clone_flags & CLONE_NEWPID) | 870 | if (clone_flags & CLONE_NEWPID) |
868 | sig->flags |= SIGNAL_UNKILLABLE; | 871 | sig->flags |= SIGNAL_UNKILLABLE; |
869 | sig->group_exit_code = 0; | ||
870 | sig->group_exit_task = NULL; | ||
871 | sig->group_stop_count = 0; | ||
872 | sig->curr_target = tsk; | 872 | sig->curr_target = tsk; |
873 | init_sigpending(&sig->shared_pending); | 873 | init_sigpending(&sig->shared_pending); |
874 | INIT_LIST_HEAD(&sig->posix_timers); | 874 | INIT_LIST_HEAD(&sig->posix_timers); |
875 | 875 | ||
876 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 876 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
877 | sig->it_real_incr.tv64 = 0; | ||
878 | sig->real_timer.function = it_real_fn; | 877 | sig->real_timer.function = it_real_fn; |
879 | 878 | ||
880 | sig->leader = 0; /* session leadership doesn't inherit */ | ||
881 | sig->tty_old_pgrp = NULL; | ||
882 | sig->tty = NULL; | ||
883 | |||
884 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | ||
885 | sig->gtime = cputime_zero; | ||
886 | sig->cgtime = cputime_zero; | ||
887 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | ||
888 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | ||
889 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | ||
890 | sig->maxrss = sig->cmaxrss = 0; | ||
891 | task_io_accounting_init(&sig->ioac); | ||
892 | sig->sum_sched_runtime = 0; | ||
893 | taskstats_tgid_init(sig); | ||
894 | |||
895 | task_lock(current->group_leader); | 879 | task_lock(current->group_leader); |
896 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 880 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
897 | task_unlock(current->group_leader); | 881 | task_unlock(current->group_leader); |
898 | 882 | ||
899 | posix_cpu_timers_init_group(sig); | 883 | posix_cpu_timers_init_group(sig); |
900 | 884 | ||
901 | acct_init_pacct(&sig->pacct); | ||
902 | |||
903 | tty_audit_fork(sig); | 885 | tty_audit_fork(sig); |
904 | 886 | ||
905 | sig->oom_adj = current->signal->oom_adj; | 887 | sig->oom_adj = current->signal->oom_adj; |
@@ -934,9 +916,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) | |||
934 | 916 | ||
935 | static void rt_mutex_init_task(struct task_struct *p) | 917 | static void rt_mutex_init_task(struct task_struct *p) |
936 | { | 918 | { |
937 | spin_lock_init(&p->pi_lock); | 919 | raw_spin_lock_init(&p->pi_lock); |
938 | #ifdef CONFIG_RT_MUTEXES | 920 | #ifdef CONFIG_RT_MUTEXES |
939 | plist_head_init(&p->pi_waiters, &p->pi_lock); | 921 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); |
940 | p->pi_blocked_on = NULL; | 922 | p->pi_blocked_on = NULL; |
941 | #endif | 923 | #endif |
942 | } | 924 | } |
@@ -1028,7 +1010,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1028 | #endif | 1010 | #endif |
1029 | retval = -EAGAIN; | 1011 | retval = -EAGAIN; |
1030 | if (atomic_read(&p->real_cred->user->processes) >= | 1012 | if (atomic_read(&p->real_cred->user->processes) >= |
1031 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1013 | task_rlimit(p, RLIMIT_NPROC)) { |
1032 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1014 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
1033 | p->real_cred->user != INIT_USER) | 1015 | p->real_cred->user != INIT_USER) |
1034 | goto bad_fork_free; | 1016 | goto bad_fork_free; |
@@ -1066,8 +1048,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1066 | p->gtime = cputime_zero; | 1048 | p->gtime = cputime_zero; |
1067 | p->utimescaled = cputime_zero; | 1049 | p->utimescaled = cputime_zero; |
1068 | p->stimescaled = cputime_zero; | 1050 | p->stimescaled = cputime_zero; |
1051 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
1069 | p->prev_utime = cputime_zero; | 1052 | p->prev_utime = cputime_zero; |
1070 | p->prev_stime = cputime_zero; | 1053 | p->prev_stime = cputime_zero; |
1054 | #endif | ||
1055 | #if defined(SPLIT_RSS_COUNTING) | ||
1056 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); | ||
1057 | #endif | ||
1071 | 1058 | ||
1072 | p->default_timer_slack_ns = current->timer_slack_ns; | 1059 | p->default_timer_slack_ns = current->timer_slack_ns; |
1073 | 1060 | ||
@@ -1120,6 +1107,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1120 | #ifdef CONFIG_DEBUG_MUTEXES | 1107 | #ifdef CONFIG_DEBUG_MUTEXES |
1121 | p->blocked_on = NULL; /* not blocked yet */ | 1108 | p->blocked_on = NULL; /* not blocked yet */ |
1122 | #endif | 1109 | #endif |
1110 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
1111 | p->memcg_batch.do_batch = 0; | ||
1112 | p->memcg_batch.memcg = NULL; | ||
1113 | #endif | ||
1123 | 1114 | ||
1124 | p->bts = NULL; | 1115 | p->bts = NULL; |
1125 | 1116 | ||
@@ -1199,9 +1190,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1199 | p->sas_ss_sp = p->sas_ss_size = 0; | 1190 | p->sas_ss_sp = p->sas_ss_size = 0; |
1200 | 1191 | ||
1201 | /* | 1192 | /* |
1202 | * Syscall tracing should be turned off in the child regardless | 1193 | * Syscall tracing and stepping should be turned off in the |
1203 | * of CLONE_PTRACE. | 1194 | * child regardless of CLONE_PTRACE. |
1204 | */ | 1195 | */ |
1196 | user_disable_single_step(p); | ||
1205 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); | 1197 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
1206 | #ifdef TIF_SYSCALL_EMU | 1198 | #ifdef TIF_SYSCALL_EMU |
1207 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); | 1199 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); |
@@ -1229,21 +1221,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1229 | /* Need tasklist lock for parent etc handling! */ | 1221 | /* Need tasklist lock for parent etc handling! */ |
1230 | write_lock_irq(&tasklist_lock); | 1222 | write_lock_irq(&tasklist_lock); |
1231 | 1223 | ||
1232 | /* | ||
1233 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1234 | * not be changed, nor will its assigned CPU. | ||
1235 | * | ||
1236 | * The cpus_allowed mask of the parent may have changed after it was | ||
1237 | * copied first time - so re-copy it here, then check the child's CPU | ||
1238 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1239 | * parent's CPU). This avoids alot of nasty races. | ||
1240 | */ | ||
1241 | p->cpus_allowed = current->cpus_allowed; | ||
1242 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1243 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1244 | !cpu_online(task_cpu(p)))) | ||
1245 | set_task_cpu(p, smp_processor_id()); | ||
1246 | |||
1247 | /* CLONE_PARENT re-uses the old parent */ | 1224 | /* CLONE_PARENT re-uses the old parent */ |
1248 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1225 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1249 | p->real_parent = current->real_parent; | 1226 | p->real_parent = current->real_parent; |
@@ -1279,7 +1256,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1279 | } | 1256 | } |
1280 | 1257 | ||
1281 | if (likely(p->pid)) { | 1258 | if (likely(p->pid)) { |
1282 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1283 | tracehook_finish_clone(p, clone_flags, trace); | 1259 | tracehook_finish_clone(p, clone_flags, trace); |
1284 | 1260 | ||
1285 | if (thread_group_leader(p)) { | 1261 | if (thread_group_leader(p)) { |
@@ -1291,6 +1267,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1291 | p->signal->tty = tty_kref_get(current->signal->tty); | 1267 | p->signal->tty = tty_kref_get(current->signal->tty); |
1292 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1268 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1293 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1269 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1270 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1294 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1271 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1295 | __get_cpu_var(process_counts)++; | 1272 | __get_cpu_var(process_counts)++; |
1296 | } | 1273 | } |
@@ -1310,7 +1287,8 @@ bad_fork_free_pid: | |||
1310 | if (pid != &init_struct_pid) | 1287 | if (pid != &init_struct_pid) |
1311 | free_pid(pid); | 1288 | free_pid(pid); |
1312 | bad_fork_cleanup_io: | 1289 | bad_fork_cleanup_io: |
1313 | put_io_context(p->io_context); | 1290 | if (p->io_context) |
1291 | exit_io_context(p); | ||
1314 | bad_fork_cleanup_namespaces: | 1292 | bad_fork_cleanup_namespaces: |
1315 | exit_task_namespaces(p); | 1293 | exit_task_namespaces(p); |
1316 | bad_fork_cleanup_mm: | 1294 | bad_fork_cleanup_mm: |