aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c111
1 files changed, 46 insertions, 65 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 266c6af6ef1b..4799c5f0e6d0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -64,6 +64,7 @@
64#include <linux/magic.h> 64#include <linux/magic.h>
65#include <linux/perf_event.h> 65#include <linux/perf_event.h>
66#include <linux/posix-timers.h> 66#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h>
67 68
68#include <asm/pgtable.h> 69#include <asm/pgtable.h>
69#include <asm/pgalloc.h> 70#include <asm/pgalloc.h>
@@ -86,12 +87,20 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
86 87
87__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
88 89
90#ifdef CONFIG_PROVE_RCU
91int lockdep_tasklist_lock_is_held(void)
92{
93 return lockdep_is_held(&tasklist_lock);
94}
95EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
96#endif /* #ifdef CONFIG_PROVE_RCU */
97
89int nr_processes(void) 98int nr_processes(void)
90{ 99{
91 int cpu; 100 int cpu;
92 int total = 0; 101 int total = 0;
93 102
94 for_each_online_cpu(cpu) 103 for_each_possible_cpu(cpu)
95 total += per_cpu(process_counts, cpu); 104 total += per_cpu(process_counts, cpu);
96 105
97 return total; 106 return total;
@@ -249,6 +258,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
249 goto out; 258 goto out;
250 259
251 setup_thread_stack(tsk, orig); 260 setup_thread_stack(tsk, orig);
261 clear_user_return_notifier(tsk);
252 stackend = end_of_stack(tsk); 262 stackend = end_of_stack(tsk);
253 *stackend = STACK_END_MAGIC; /* for overflow detection */ 263 *stackend = STACK_END_MAGIC; /* for overflow detection */
254 264
@@ -326,15 +336,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
326 if (!tmp) 336 if (!tmp)
327 goto fail_nomem; 337 goto fail_nomem;
328 *tmp = *mpnt; 338 *tmp = *mpnt;
339 INIT_LIST_HEAD(&tmp->anon_vma_chain);
329 pol = mpol_dup(vma_policy(mpnt)); 340 pol = mpol_dup(vma_policy(mpnt));
330 retval = PTR_ERR(pol); 341 retval = PTR_ERR(pol);
331 if (IS_ERR(pol)) 342 if (IS_ERR(pol))
332 goto fail_nomem_policy; 343 goto fail_nomem_policy;
333 vma_set_policy(tmp, pol); 344 vma_set_policy(tmp, pol);
345 if (anon_vma_fork(tmp, mpnt))
346 goto fail_nomem_anon_vma_fork;
334 tmp->vm_flags &= ~VM_LOCKED; 347 tmp->vm_flags &= ~VM_LOCKED;
335 tmp->vm_mm = mm; 348 tmp->vm_mm = mm;
336 tmp->vm_next = NULL; 349 tmp->vm_next = NULL;
337 anon_vma_link(tmp);
338 file = tmp->vm_file; 350 file = tmp->vm_file;
339 if (file) { 351 if (file) {
340 struct inode *inode = file->f_path.dentry->d_inode; 352 struct inode *inode = file->f_path.dentry->d_inode;
@@ -389,6 +401,8 @@ out:
389 flush_tlb_mm(oldmm); 401 flush_tlb_mm(oldmm);
390 up_write(&oldmm->mmap_sem); 402 up_write(&oldmm->mmap_sem);
391 return retval; 403 return retval;
404fail_nomem_anon_vma_fork:
405 mpol_put(pol);
392fail_nomem_policy: 406fail_nomem_policy:
393 kmem_cache_free(vm_area_cachep, tmp); 407 kmem_cache_free(vm_area_cachep, tmp);
394fail_nomem: 408fail_nomem:
@@ -452,8 +466,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
452 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; 466 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
453 mm->core_state = NULL; 467 mm->core_state = NULL;
454 mm->nr_ptes = 0; 468 mm->nr_ptes = 0;
455 set_mm_counter(mm, file_rss, 0); 469 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
456 set_mm_counter(mm, anon_rss, 0);
457 spin_lock_init(&mm->page_table_lock); 470 spin_lock_init(&mm->page_table_lock);
458 mm->free_area_cache = TASK_UNMAPPED_BASE; 471 mm->free_area_cache = TASK_UNMAPPED_BASE;
459 mm->cached_hole_size = ~0UL; 472 mm->cached_hole_size = ~0UL;
@@ -570,12 +583,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
570 583
571 /* Get rid of any futexes when releasing the mm */ 584 /* Get rid of any futexes when releasing the mm */
572#ifdef CONFIG_FUTEX 585#ifdef CONFIG_FUTEX
573 if (unlikely(tsk->robust_list)) 586 if (unlikely(tsk->robust_list)) {
574 exit_robust_list(tsk); 587 exit_robust_list(tsk);
588 tsk->robust_list = NULL;
589 }
575#ifdef CONFIG_COMPAT 590#ifdef CONFIG_COMPAT
576 if (unlikely(tsk->compat_robust_list)) 591 if (unlikely(tsk->compat_robust_list)) {
577 compat_exit_robust_list(tsk); 592 compat_exit_robust_list(tsk);
593 tsk->compat_robust_list = NULL;
594 }
578#endif 595#endif
596 if (unlikely(!list_empty(&tsk->pi_state_list)))
597 exit_pi_state_list(tsk);
579#endif 598#endif
580 599
581 /* Get rid of any cached register state */ 600 /* Get rid of any cached register state */
@@ -816,23 +835,14 @@ void __cleanup_sighand(struct sighand_struct *sighand)
816 */ 835 */
817static void posix_cpu_timers_init_group(struct signal_struct *sig) 836static void posix_cpu_timers_init_group(struct signal_struct *sig)
818{ 837{
838 unsigned long cpu_limit;
839
819 /* Thread group counters. */ 840 /* Thread group counters. */
820 thread_group_cputime_init(sig); 841 thread_group_cputime_init(sig);
821 842
822 /* Expiration times and increments. */ 843 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
823 sig->it[CPUCLOCK_PROF].expires = cputime_zero; 844 if (cpu_limit != RLIM_INFINITY) {
824 sig->it[CPUCLOCK_PROF].incr = cputime_zero; 845 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
825 sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
826 sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
827
828 /* Cached expiration times. */
829 sig->cputime_expires.prof_exp = cputime_zero;
830 sig->cputime_expires.virt_exp = cputime_zero;
831 sig->cputime_expires.sched_exp = 0;
832
833 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
834 sig->cputime_expires.prof_exp =
835 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
836 sig->cputimer.running = 1; 846 sig->cputimer.running = 1;
837 } 847 }
838 848
@@ -849,7 +859,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
849 if (clone_flags & CLONE_THREAD) 859 if (clone_flags & CLONE_THREAD)
850 return 0; 860 return 0;
851 861
852 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 862 sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
853 tsk->signal = sig; 863 tsk->signal = sig;
854 if (!sig) 864 if (!sig)
855 return -ENOMEM; 865 return -ENOMEM;
@@ -857,43 +867,21 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
857 atomic_set(&sig->count, 1); 867 atomic_set(&sig->count, 1);
858 atomic_set(&sig->live, 1); 868 atomic_set(&sig->live, 1);
859 init_waitqueue_head(&sig->wait_chldexit); 869 init_waitqueue_head(&sig->wait_chldexit);
860 sig->flags = 0;
861 if (clone_flags & CLONE_NEWPID) 870 if (clone_flags & CLONE_NEWPID)
862 sig->flags |= SIGNAL_UNKILLABLE; 871 sig->flags |= SIGNAL_UNKILLABLE;
863 sig->group_exit_code = 0;
864 sig->group_exit_task = NULL;
865 sig->group_stop_count = 0;
866 sig->curr_target = tsk; 872 sig->curr_target = tsk;
867 init_sigpending(&sig->shared_pending); 873 init_sigpending(&sig->shared_pending);
868 INIT_LIST_HEAD(&sig->posix_timers); 874 INIT_LIST_HEAD(&sig->posix_timers);
869 875
870 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 876 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
871 sig->it_real_incr.tv64 = 0;
872 sig->real_timer.function = it_real_fn; 877 sig->real_timer.function = it_real_fn;
873 878
874 sig->leader = 0; /* session leadership doesn't inherit */
875 sig->tty_old_pgrp = NULL;
876 sig->tty = NULL;
877
878 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
879 sig->gtime = cputime_zero;
880 sig->cgtime = cputime_zero;
881 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
882 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
883 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
884 sig->maxrss = sig->cmaxrss = 0;
885 task_io_accounting_init(&sig->ioac);
886 sig->sum_sched_runtime = 0;
887 taskstats_tgid_init(sig);
888
889 task_lock(current->group_leader); 879 task_lock(current->group_leader);
890 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 880 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
891 task_unlock(current->group_leader); 881 task_unlock(current->group_leader);
892 882
893 posix_cpu_timers_init_group(sig); 883 posix_cpu_timers_init_group(sig);
894 884
895 acct_init_pacct(&sig->pacct);
896
897 tty_audit_fork(sig); 885 tty_audit_fork(sig);
898 886
899 sig->oom_adj = current->signal->oom_adj; 887 sig->oom_adj = current->signal->oom_adj;
@@ -928,9 +916,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
928 916
929static void rt_mutex_init_task(struct task_struct *p) 917static void rt_mutex_init_task(struct task_struct *p)
930{ 918{
931 spin_lock_init(&p->pi_lock); 919 raw_spin_lock_init(&p->pi_lock);
932#ifdef CONFIG_RT_MUTEXES 920#ifdef CONFIG_RT_MUTEXES
933 plist_head_init(&p->pi_waiters, &p->pi_lock); 921 plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
934 p->pi_blocked_on = NULL; 922 p->pi_blocked_on = NULL;
935#endif 923#endif
936} 924}
@@ -1022,7 +1010,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1022#endif 1010#endif
1023 retval = -EAGAIN; 1011 retval = -EAGAIN;
1024 if (atomic_read(&p->real_cred->user->processes) >= 1012 if (atomic_read(&p->real_cred->user->processes) >=
1025 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 1013 task_rlimit(p, RLIMIT_NPROC)) {
1026 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 1014 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1027 p->real_cred->user != INIT_USER) 1015 p->real_cred->user != INIT_USER)
1028 goto bad_fork_free; 1016 goto bad_fork_free;
@@ -1060,8 +1048,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1060 p->gtime = cputime_zero; 1048 p->gtime = cputime_zero;
1061 p->utimescaled = cputime_zero; 1049 p->utimescaled = cputime_zero;
1062 p->stimescaled = cputime_zero; 1050 p->stimescaled = cputime_zero;
1051#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1063 p->prev_utime = cputime_zero; 1052 p->prev_utime = cputime_zero;
1064 p->prev_stime = cputime_zero; 1053 p->prev_stime = cputime_zero;
1054#endif
1065 1055
1066 p->default_timer_slack_ns = current->timer_slack_ns; 1056 p->default_timer_slack_ns = current->timer_slack_ns;
1067 1057
@@ -1114,6 +1104,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1114#ifdef CONFIG_DEBUG_MUTEXES 1104#ifdef CONFIG_DEBUG_MUTEXES
1115 p->blocked_on = NULL; /* not blocked yet */ 1105 p->blocked_on = NULL; /* not blocked yet */
1116#endif 1106#endif
1107#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1108 p->memcg_batch.do_batch = 0;
1109 p->memcg_batch.memcg = NULL;
1110#endif
1117 1111
1118 p->bts = NULL; 1112 p->bts = NULL;
1119 1113
@@ -1193,9 +1187,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1193 p->sas_ss_sp = p->sas_ss_size = 0; 1187 p->sas_ss_sp = p->sas_ss_size = 0;
1194 1188
1195 /* 1189 /*
1196 * Syscall tracing should be turned off in the child regardless 1190 * Syscall tracing and stepping should be turned off in the
1197 * of CLONE_PTRACE. 1191 * child regardless of CLONE_PTRACE.
1198 */ 1192 */
1193 user_disable_single_step(p);
1199 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 1194 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1200#ifdef TIF_SYSCALL_EMU 1195#ifdef TIF_SYSCALL_EMU
1201 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 1196 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1223,21 +1218,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1223 /* Need tasklist lock for parent etc handling! */ 1218 /* Need tasklist lock for parent etc handling! */
1224 write_lock_irq(&tasklist_lock); 1219 write_lock_irq(&tasklist_lock);
1225 1220
1226 /*
1227 * The task hasn't been attached yet, so its cpus_allowed mask will
1228 * not be changed, nor will its assigned CPU.
1229 *
1230 * The cpus_allowed mask of the parent may have changed after it was
1231 * copied first time - so re-copy it here, then check the child's CPU
1232 * to ensure it is on a valid CPU (and if not, just force it back to
1233 * parent's CPU). This avoids alot of nasty races.
1234 */
1235 p->cpus_allowed = current->cpus_allowed;
1236 p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
1237 if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
1238 !cpu_online(task_cpu(p))))
1239 set_task_cpu(p, smp_processor_id());
1240
1241 /* CLONE_PARENT re-uses the old parent */ 1221 /* CLONE_PARENT re-uses the old parent */
1242 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 1222 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1243 p->real_parent = current->real_parent; 1223 p->real_parent = current->real_parent;
@@ -1273,7 +1253,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1273 } 1253 }
1274 1254
1275 if (likely(p->pid)) { 1255 if (likely(p->pid)) {
1276 list_add_tail(&p->sibling, &p->real_parent->children);
1277 tracehook_finish_clone(p, clone_flags, trace); 1256 tracehook_finish_clone(p, clone_flags, trace);
1278 1257
1279 if (thread_group_leader(p)) { 1258 if (thread_group_leader(p)) {
@@ -1285,6 +1264,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1285 p->signal->tty = tty_kref_get(current->signal->tty); 1264 p->signal->tty = tty_kref_get(current->signal->tty);
1286 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1265 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1287 attach_pid(p, PIDTYPE_SID, task_session(current)); 1266 attach_pid(p, PIDTYPE_SID, task_session(current));
1267 list_add_tail(&p->sibling, &p->real_parent->children);
1288 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1268 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1289 __get_cpu_var(process_counts)++; 1269 __get_cpu_var(process_counts)++;
1290 } 1270 }
@@ -1304,7 +1284,8 @@ bad_fork_free_pid:
1304 if (pid != &init_struct_pid) 1284 if (pid != &init_struct_pid)
1305 free_pid(pid); 1285 free_pid(pid);
1306bad_fork_cleanup_io: 1286bad_fork_cleanup_io:
1307 put_io_context(p->io_context); 1287 if (p->io_context)
1288 exit_io_context(p);
1308bad_fork_cleanup_namespaces: 1289bad_fork_cleanup_namespaces:
1309 exit_task_namespaces(p); 1290 exit_task_namespaces(p);
1310bad_fork_cleanup_mm: 1291bad_fork_cleanup_mm: