diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 138 |
1 files changed, 55 insertions, 83 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 5b2959b3ffc2..b6cce14ba047 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -87,6 +87,14 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
87 | 87 | ||
88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
89 | 89 | ||
90 | #ifdef CONFIG_PROVE_RCU | ||
91 | int lockdep_tasklist_lock_is_held(void) | ||
92 | { | ||
93 | return lockdep_is_held(&tasklist_lock); | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); | ||
96 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
97 | |||
90 | int nr_processes(void) | 98 | int nr_processes(void) |
91 | { | 99 | { |
92 | int cpu; | 100 | int cpu; |
@@ -157,6 +165,18 @@ void free_task(struct task_struct *tsk) | |||
157 | } | 165 | } |
158 | EXPORT_SYMBOL(free_task); | 166 | EXPORT_SYMBOL(free_task); |
159 | 167 | ||
168 | static inline void free_signal_struct(struct signal_struct *sig) | ||
169 | { | ||
170 | taskstats_tgid_free(sig); | ||
171 | kmem_cache_free(signal_cachep, sig); | ||
172 | } | ||
173 | |||
174 | static inline void put_signal_struct(struct signal_struct *sig) | ||
175 | { | ||
176 | if (atomic_dec_and_test(&sig->sigcnt)) | ||
177 | free_signal_struct(sig); | ||
178 | } | ||
179 | |||
160 | void __put_task_struct(struct task_struct *tsk) | 180 | void __put_task_struct(struct task_struct *tsk) |
161 | { | 181 | { |
162 | WARN_ON(!tsk->exit_state); | 182 | WARN_ON(!tsk->exit_state); |
@@ -165,6 +185,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
165 | 185 | ||
166 | exit_creds(tsk); | 186 | exit_creds(tsk); |
167 | delayacct_tsk_free(tsk); | 187 | delayacct_tsk_free(tsk); |
188 | put_signal_struct(tsk->signal); | ||
168 | 189 | ||
169 | if (!profile_handoff_task(tsk)) | 190 | if (!profile_handoff_task(tsk)) |
170 | free_task(tsk); | 191 | free_task(tsk); |
@@ -328,15 +349,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
328 | if (!tmp) | 349 | if (!tmp) |
329 | goto fail_nomem; | 350 | goto fail_nomem; |
330 | *tmp = *mpnt; | 351 | *tmp = *mpnt; |
352 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | ||
331 | pol = mpol_dup(vma_policy(mpnt)); | 353 | pol = mpol_dup(vma_policy(mpnt)); |
332 | retval = PTR_ERR(pol); | 354 | retval = PTR_ERR(pol); |
333 | if (IS_ERR(pol)) | 355 | if (IS_ERR(pol)) |
334 | goto fail_nomem_policy; | 356 | goto fail_nomem_policy; |
335 | vma_set_policy(tmp, pol); | 357 | vma_set_policy(tmp, pol); |
358 | if (anon_vma_fork(tmp, mpnt)) | ||
359 | goto fail_nomem_anon_vma_fork; | ||
336 | tmp->vm_flags &= ~VM_LOCKED; | 360 | tmp->vm_flags &= ~VM_LOCKED; |
337 | tmp->vm_mm = mm; | 361 | tmp->vm_mm = mm; |
338 | tmp->vm_next = NULL; | 362 | tmp->vm_next = NULL; |
339 | anon_vma_link(tmp); | ||
340 | file = tmp->vm_file; | 363 | file = tmp->vm_file; |
341 | if (file) { | 364 | if (file) { |
342 | struct inode *inode = file->f_path.dentry->d_inode; | 365 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -391,6 +414,8 @@ out: | |||
391 | flush_tlb_mm(oldmm); | 414 | flush_tlb_mm(oldmm); |
392 | up_write(&oldmm->mmap_sem); | 415 | up_write(&oldmm->mmap_sem); |
393 | return retval; | 416 | return retval; |
417 | fail_nomem_anon_vma_fork: | ||
418 | mpol_put(pol); | ||
394 | fail_nomem_policy: | 419 | fail_nomem_policy: |
395 | kmem_cache_free(vm_area_cachep, tmp); | 420 | kmem_cache_free(vm_area_cachep, tmp); |
396 | fail_nomem: | 421 | fail_nomem: |
@@ -454,8 +479,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
454 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | 479 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; |
455 | mm->core_state = NULL; | 480 | mm->core_state = NULL; |
456 | mm->nr_ptes = 0; | 481 | mm->nr_ptes = 0; |
457 | set_mm_counter(mm, file_rss, 0); | 482 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
458 | set_mm_counter(mm, anon_rss, 0); | ||
459 | spin_lock_init(&mm->page_table_lock); | 483 | spin_lock_init(&mm->page_table_lock); |
460 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 484 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
461 | mm->cached_hole_size = ~0UL; | 485 | mm->cached_hole_size = ~0UL; |
@@ -824,23 +848,14 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
824 | */ | 848 | */ |
825 | static void posix_cpu_timers_init_group(struct signal_struct *sig) | 849 | static void posix_cpu_timers_init_group(struct signal_struct *sig) |
826 | { | 850 | { |
851 | unsigned long cpu_limit; | ||
852 | |||
827 | /* Thread group counters. */ | 853 | /* Thread group counters. */ |
828 | thread_group_cputime_init(sig); | 854 | thread_group_cputime_init(sig); |
829 | 855 | ||
830 | /* Expiration times and increments. */ | 856 | cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); |
831 | sig->it[CPUCLOCK_PROF].expires = cputime_zero; | 857 | if (cpu_limit != RLIM_INFINITY) { |
832 | sig->it[CPUCLOCK_PROF].incr = cputime_zero; | 858 | sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); |
833 | sig->it[CPUCLOCK_VIRT].expires = cputime_zero; | ||
834 | sig->it[CPUCLOCK_VIRT].incr = cputime_zero; | ||
835 | |||
836 | /* Cached expiration times. */ | ||
837 | sig->cputime_expires.prof_exp = cputime_zero; | ||
838 | sig->cputime_expires.virt_exp = cputime_zero; | ||
839 | sig->cputime_expires.sched_exp = 0; | ||
840 | |||
841 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
842 | sig->cputime_expires.prof_exp = | ||
843 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
844 | sig->cputimer.running = 1; | 859 | sig->cputimer.running = 1; |
845 | } | 860 | } |
846 | 861 | ||
@@ -857,54 +872,30 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
857 | if (clone_flags & CLONE_THREAD) | 872 | if (clone_flags & CLONE_THREAD) |
858 | return 0; | 873 | return 0; |
859 | 874 | ||
860 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | 875 | sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); |
861 | tsk->signal = sig; | 876 | tsk->signal = sig; |
862 | if (!sig) | 877 | if (!sig) |
863 | return -ENOMEM; | 878 | return -ENOMEM; |
864 | 879 | ||
865 | atomic_set(&sig->count, 1); | 880 | sig->nr_threads = 1; |
866 | atomic_set(&sig->live, 1); | 881 | atomic_set(&sig->live, 1); |
882 | atomic_set(&sig->sigcnt, 1); | ||
867 | init_waitqueue_head(&sig->wait_chldexit); | 883 | init_waitqueue_head(&sig->wait_chldexit); |
868 | sig->flags = 0; | ||
869 | if (clone_flags & CLONE_NEWPID) | 884 | if (clone_flags & CLONE_NEWPID) |
870 | sig->flags |= SIGNAL_UNKILLABLE; | 885 | sig->flags |= SIGNAL_UNKILLABLE; |
871 | sig->group_exit_code = 0; | ||
872 | sig->group_exit_task = NULL; | ||
873 | sig->group_stop_count = 0; | ||
874 | sig->curr_target = tsk; | 886 | sig->curr_target = tsk; |
875 | init_sigpending(&sig->shared_pending); | 887 | init_sigpending(&sig->shared_pending); |
876 | INIT_LIST_HEAD(&sig->posix_timers); | 888 | INIT_LIST_HEAD(&sig->posix_timers); |
877 | 889 | ||
878 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 890 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
879 | sig->it_real_incr.tv64 = 0; | ||
880 | sig->real_timer.function = it_real_fn; | 891 | sig->real_timer.function = it_real_fn; |
881 | 892 | ||
882 | sig->leader = 0; /* session leadership doesn't inherit */ | ||
883 | sig->tty_old_pgrp = NULL; | ||
884 | sig->tty = NULL; | ||
885 | |||
886 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | ||
887 | sig->gtime = cputime_zero; | ||
888 | sig->cgtime = cputime_zero; | ||
889 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
890 | sig->prev_utime = sig->prev_stime = cputime_zero; | ||
891 | #endif | ||
892 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | ||
893 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | ||
894 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | ||
895 | sig->maxrss = sig->cmaxrss = 0; | ||
896 | task_io_accounting_init(&sig->ioac); | ||
897 | sig->sum_sched_runtime = 0; | ||
898 | taskstats_tgid_init(sig); | ||
899 | |||
900 | task_lock(current->group_leader); | 893 | task_lock(current->group_leader); |
901 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 894 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
902 | task_unlock(current->group_leader); | 895 | task_unlock(current->group_leader); |
903 | 896 | ||
904 | posix_cpu_timers_init_group(sig); | 897 | posix_cpu_timers_init_group(sig); |
905 | 898 | ||
906 | acct_init_pacct(&sig->pacct); | ||
907 | |||
908 | tty_audit_fork(sig); | 899 | tty_audit_fork(sig); |
909 | 900 | ||
910 | sig->oom_adj = current->signal->oom_adj; | 901 | sig->oom_adj = current->signal->oom_adj; |
@@ -912,13 +903,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
912 | return 0; | 903 | return 0; |
913 | } | 904 | } |
914 | 905 | ||
915 | void __cleanup_signal(struct signal_struct *sig) | ||
916 | { | ||
917 | thread_group_cputime_free(sig); | ||
918 | tty_kref_put(sig->tty); | ||
919 | kmem_cache_free(signal_cachep, sig); | ||
920 | } | ||
921 | |||
922 | static void copy_flags(unsigned long clone_flags, struct task_struct *p) | 906 | static void copy_flags(unsigned long clone_flags, struct task_struct *p) |
923 | { | 907 | { |
924 | unsigned long new_flags = p->flags; | 908 | unsigned long new_flags = p->flags; |
@@ -1033,7 +1017,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1033 | #endif | 1017 | #endif |
1034 | retval = -EAGAIN; | 1018 | retval = -EAGAIN; |
1035 | if (atomic_read(&p->real_cred->user->processes) >= | 1019 | if (atomic_read(&p->real_cred->user->processes) >= |
1036 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 1020 | task_rlimit(p, RLIMIT_NPROC)) { |
1037 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 1021 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
1038 | p->real_cred->user != INIT_USER) | 1022 | p->real_cred->user != INIT_USER) |
1039 | goto bad_fork_free; | 1023 | goto bad_fork_free; |
@@ -1075,6 +1059,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1075 | p->prev_utime = cputime_zero; | 1059 | p->prev_utime = cputime_zero; |
1076 | p->prev_stime = cputime_zero; | 1060 | p->prev_stime = cputime_zero; |
1077 | #endif | 1061 | #endif |
1062 | #if defined(SPLIT_RSS_COUNTING) | ||
1063 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); | ||
1064 | #endif | ||
1078 | 1065 | ||
1079 | p->default_timer_slack_ns = current->timer_slack_ns; | 1066 | p->default_timer_slack_ns = current->timer_slack_ns; |
1080 | 1067 | ||
@@ -1132,10 +1119,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1132 | p->memcg_batch.memcg = NULL; | 1119 | p->memcg_batch.memcg = NULL; |
1133 | #endif | 1120 | #endif |
1134 | 1121 | ||
1135 | p->bts = NULL; | ||
1136 | |||
1137 | p->stack_start = stack_start; | ||
1138 | |||
1139 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1122 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1140 | sched_fork(p, clone_flags); | 1123 | sched_fork(p, clone_flags); |
1141 | 1124 | ||
@@ -1241,21 +1224,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1241 | /* Need tasklist lock for parent etc handling! */ | 1224 | /* Need tasklist lock for parent etc handling! */ |
1242 | write_lock_irq(&tasklist_lock); | 1225 | write_lock_irq(&tasklist_lock); |
1243 | 1226 | ||
1244 | /* | ||
1245 | * The task hasn't been attached yet, so its cpus_allowed mask will | ||
1246 | * not be changed, nor will its assigned CPU. | ||
1247 | * | ||
1248 | * The cpus_allowed mask of the parent may have changed after it was | ||
1249 | * copied first time - so re-copy it here, then check the child's CPU | ||
1250 | * to ensure it is on a valid CPU (and if not, just force it back to | ||
1251 | * parent's CPU). This avoids alot of nasty races. | ||
1252 | */ | ||
1253 | p->cpus_allowed = current->cpus_allowed; | ||
1254 | p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; | ||
1255 | if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || | ||
1256 | !cpu_online(task_cpu(p)))) | ||
1257 | set_task_cpu(p, smp_processor_id()); | ||
1258 | |||
1259 | /* CLONE_PARENT re-uses the old parent */ | 1227 | /* CLONE_PARENT re-uses the old parent */ |
1260 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { | 1228 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { |
1261 | p->real_parent = current->real_parent; | 1229 | p->real_parent = current->real_parent; |
@@ -1284,8 +1252,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1284 | } | 1252 | } |
1285 | 1253 | ||
1286 | if (clone_flags & CLONE_THREAD) { | 1254 | if (clone_flags & CLONE_THREAD) { |
1287 | atomic_inc(¤t->signal->count); | 1255 | current->signal->nr_threads++; |
1288 | atomic_inc(¤t->signal->live); | 1256 | atomic_inc(¤t->signal->live); |
1257 | atomic_inc(¤t->signal->sigcnt); | ||
1289 | p->group_leader = current->group_leader; | 1258 | p->group_leader = current->group_leader; |
1290 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); | 1259 | list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); |
1291 | } | 1260 | } |
@@ -1298,7 +1267,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1298 | p->nsproxy->pid_ns->child_reaper = p; | 1267 | p->nsproxy->pid_ns->child_reaper = p; |
1299 | 1268 | ||
1300 | p->signal->leader_pid = pid; | 1269 | p->signal->leader_pid = pid; |
1301 | tty_kref_put(p->signal->tty); | ||
1302 | p->signal->tty = tty_kref_get(current->signal->tty); | 1270 | p->signal->tty = tty_kref_get(current->signal->tty); |
1303 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1271 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1304 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1272 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
@@ -1331,7 +1299,7 @@ bad_fork_cleanup_mm: | |||
1331 | mmput(p->mm); | 1299 | mmput(p->mm); |
1332 | bad_fork_cleanup_signal: | 1300 | bad_fork_cleanup_signal: |
1333 | if (!(clone_flags & CLONE_THREAD)) | 1301 | if (!(clone_flags & CLONE_THREAD)) |
1334 | __cleanup_signal(p->signal); | 1302 | free_signal_struct(p->signal); |
1335 | bad_fork_cleanup_sighand: | 1303 | bad_fork_cleanup_sighand: |
1336 | __cleanup_sighand(p->sighand); | 1304 | __cleanup_sighand(p->sighand); |
1337 | bad_fork_cleanup_fs: | 1305 | bad_fork_cleanup_fs: |
@@ -1366,6 +1334,16 @@ noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_re | |||
1366 | return regs; | 1334 | return regs; |
1367 | } | 1335 | } |
1368 | 1336 | ||
1337 | static inline void init_idle_pids(struct pid_link *links) | ||
1338 | { | ||
1339 | enum pid_type type; | ||
1340 | |||
1341 | for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { | ||
1342 | INIT_HLIST_NODE(&links[type].node); /* not really needed */ | ||
1343 | links[type].pid = &init_struct_pid; | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1369 | struct task_struct * __cpuinit fork_idle(int cpu) | 1347 | struct task_struct * __cpuinit fork_idle(int cpu) |
1370 | { | 1348 | { |
1371 | struct task_struct *task; | 1349 | struct task_struct *task; |
@@ -1373,8 +1351,10 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
1373 | 1351 | ||
1374 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, | 1352 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
1375 | &init_struct_pid, 0); | 1353 | &init_struct_pid, 0); |
1376 | if (!IS_ERR(task)) | 1354 | if (!IS_ERR(task)) { |
1355 | init_idle_pids(task->pids); | ||
1377 | init_idle(task, cpu); | 1356 | init_idle(task, cpu); |
1357 | } | ||
1378 | 1358 | ||
1379 | return task; | 1359 | return task; |
1380 | } | 1360 | } |
@@ -1546,14 +1526,6 @@ static void check_unshare_flags(unsigned long *flags_ptr) | |||
1546 | *flags_ptr |= CLONE_SIGHAND; | 1526 | *flags_ptr |= CLONE_SIGHAND; |
1547 | 1527 | ||
1548 | /* | 1528 | /* |
1549 | * If unsharing signal handlers and the task was created | ||
1550 | * using CLONE_THREAD, then must unshare the thread | ||
1551 | */ | ||
1552 | if ((*flags_ptr & CLONE_SIGHAND) && | ||
1553 | (atomic_read(¤t->signal->count) > 1)) | ||
1554 | *flags_ptr |= CLONE_THREAD; | ||
1555 | |||
1556 | /* | ||
1557 | * If unsharing namespace, must also unshare filesystem information. | 1529 | * If unsharing namespace, must also unshare filesystem information. |
1558 | */ | 1530 | */ |
1559 | if (*flags_ptr & CLONE_NEWNS) | 1531 | if (*flags_ptr & CLONE_NEWNS) |