aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c60
1 files changed, 32 insertions, 28 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c49257c..b0ec34abc0bb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -64,6 +64,7 @@
64#include <linux/magic.h> 64#include <linux/magic.h>
65#include <linux/perf_event.h> 65#include <linux/perf_event.h>
66#include <linux/posix-timers.h> 66#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h>
67 68
68#include <asm/pgtable.h> 69#include <asm/pgtable.h>
69#include <asm/pgalloc.h> 70#include <asm/pgalloc.h>
@@ -85,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */
85DEFINE_PER_CPU(unsigned long, process_counts) = 0; 86DEFINE_PER_CPU(unsigned long, process_counts) = 0;
86 87
87__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
89EXPORT_SYMBOL_GPL(tasklist_lock);
88 90
89int nr_processes(void) 91int nr_processes(void)
90{ 92{
@@ -249,6 +251,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
249 goto out; 251 goto out;
250 252
251 setup_thread_stack(tsk, orig); 253 setup_thread_stack(tsk, orig);
254 clear_user_return_notifier(tsk);
252 stackend = end_of_stack(tsk); 255 stackend = end_of_stack(tsk);
253 *stackend = STACK_END_MAGIC; /* for overflow detection */ 256 *stackend = STACK_END_MAGIC; /* for overflow detection */
254 257
@@ -326,15 +329,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
326 if (!tmp) 329 if (!tmp)
327 goto fail_nomem; 330 goto fail_nomem;
328 *tmp = *mpnt; 331 *tmp = *mpnt;
332 INIT_LIST_HEAD(&tmp->anon_vma_chain);
329 pol = mpol_dup(vma_policy(mpnt)); 333 pol = mpol_dup(vma_policy(mpnt));
330 retval = PTR_ERR(pol); 334 retval = PTR_ERR(pol);
331 if (IS_ERR(pol)) 335 if (IS_ERR(pol))
332 goto fail_nomem_policy; 336 goto fail_nomem_policy;
333 vma_set_policy(tmp, pol); 337 vma_set_policy(tmp, pol);
338 if (anon_vma_fork(tmp, mpnt))
339 goto fail_nomem_anon_vma_fork;
334 tmp->vm_flags &= ~VM_LOCKED; 340 tmp->vm_flags &= ~VM_LOCKED;
335 tmp->vm_mm = mm; 341 tmp->vm_mm = mm;
336 tmp->vm_next = NULL; 342 tmp->vm_next = NULL;
337 anon_vma_link(tmp);
338 file = tmp->vm_file; 343 file = tmp->vm_file;
339 if (file) { 344 if (file) {
340 struct inode *inode = file->f_path.dentry->d_inode; 345 struct inode *inode = file->f_path.dentry->d_inode;
@@ -389,6 +394,8 @@ out:
389 flush_tlb_mm(oldmm); 394 flush_tlb_mm(oldmm);
390 up_write(&oldmm->mmap_sem); 395 up_write(&oldmm->mmap_sem);
391 return retval; 396 return retval;
397fail_nomem_anon_vma_fork:
398 mpol_put(pol);
392fail_nomem_policy: 399fail_nomem_policy:
393 kmem_cache_free(vm_area_cachep, tmp); 400 kmem_cache_free(vm_area_cachep, tmp);
394fail_nomem: 401fail_nomem:
@@ -452,8 +459,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
452 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; 459 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
453 mm->core_state = NULL; 460 mm->core_state = NULL;
454 mm->nr_ptes = 0; 461 mm->nr_ptes = 0;
455 set_mm_counter(mm, file_rss, 0); 462 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
456 set_mm_counter(mm, anon_rss, 0);
457 spin_lock_init(&mm->page_table_lock); 463 spin_lock_init(&mm->page_table_lock);
458 mm->free_area_cache = TASK_UNMAPPED_BASE; 464 mm->free_area_cache = TASK_UNMAPPED_BASE;
459 mm->cached_hole_size = ~0UL; 465 mm->cached_hole_size = ~0UL;
@@ -822,6 +828,8 @@ void __cleanup_sighand(struct sighand_struct *sighand)
822 */ 828 */
823static void posix_cpu_timers_init_group(struct signal_struct *sig) 829static void posix_cpu_timers_init_group(struct signal_struct *sig)
824{ 830{
831 unsigned long cpu_limit;
832
825 /* Thread group counters. */ 833 /* Thread group counters. */
826 thread_group_cputime_init(sig); 834 thread_group_cputime_init(sig);
827 835
@@ -836,9 +844,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
836 sig->cputime_expires.virt_exp = cputime_zero; 844 sig->cputime_expires.virt_exp = cputime_zero;
837 sig->cputime_expires.sched_exp = 0; 845 sig->cputime_expires.sched_exp = 0;
838 846
839 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 847 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
840 sig->cputime_expires.prof_exp = 848 if (cpu_limit != RLIM_INFINITY) {
841 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); 849 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
842 sig->cputimer.running = 1; 850 sig->cputimer.running = 1;
843 } 851 }
844 852
@@ -884,6 +892,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
884 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 892 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
885 sig->gtime = cputime_zero; 893 sig->gtime = cputime_zero;
886 sig->cgtime = cputime_zero; 894 sig->cgtime = cputime_zero;
895#ifndef CONFIG_VIRT_CPU_ACCOUNTING
896 sig->prev_utime = sig->prev_stime = cputime_zero;
897#endif
887 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 898 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
888 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 899 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
889 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 900 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
@@ -934,9 +945,9 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
934 945
935static void rt_mutex_init_task(struct task_struct *p) 946static void rt_mutex_init_task(struct task_struct *p)
936{ 947{
937 spin_lock_init(&p->pi_lock); 948 raw_spin_lock_init(&p->pi_lock);
938#ifdef CONFIG_RT_MUTEXES 949#ifdef CONFIG_RT_MUTEXES
939 plist_head_init(&p->pi_waiters, &p->pi_lock); 950 plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
940 p->pi_blocked_on = NULL; 951 p->pi_blocked_on = NULL;
941#endif 952#endif
942} 953}
@@ -1028,7 +1039,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1028#endif 1039#endif
1029 retval = -EAGAIN; 1040 retval = -EAGAIN;
1030 if (atomic_read(&p->real_cred->user->processes) >= 1041 if (atomic_read(&p->real_cred->user->processes) >=
1031 p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 1042 task_rlimit(p, RLIMIT_NPROC)) {
1032 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 1043 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
1033 p->real_cred->user != INIT_USER) 1044 p->real_cred->user != INIT_USER)
1034 goto bad_fork_free; 1045 goto bad_fork_free;
@@ -1066,8 +1077,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1066 p->gtime = cputime_zero; 1077 p->gtime = cputime_zero;
1067 p->utimescaled = cputime_zero; 1078 p->utimescaled = cputime_zero;
1068 p->stimescaled = cputime_zero; 1079 p->stimescaled = cputime_zero;
1080#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1069 p->prev_utime = cputime_zero; 1081 p->prev_utime = cputime_zero;
1070 p->prev_stime = cputime_zero; 1082 p->prev_stime = cputime_zero;
1083#endif
1071 1084
1072 p->default_timer_slack_ns = current->timer_slack_ns; 1085 p->default_timer_slack_ns = current->timer_slack_ns;
1073 1086
@@ -1120,6 +1133,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1120#ifdef CONFIG_DEBUG_MUTEXES 1133#ifdef CONFIG_DEBUG_MUTEXES
1121 p->blocked_on = NULL; /* not blocked yet */ 1134 p->blocked_on = NULL; /* not blocked yet */
1122#endif 1135#endif
1136#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1137 p->memcg_batch.do_batch = 0;
1138 p->memcg_batch.memcg = NULL;
1139#endif
1123 1140
1124 p->bts = NULL; 1141 p->bts = NULL;
1125 1142
@@ -1199,9 +1216,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1199 p->sas_ss_sp = p->sas_ss_size = 0; 1216 p->sas_ss_sp = p->sas_ss_size = 0;
1200 1217
1201 /* 1218 /*
1202 * Syscall tracing should be turned off in the child regardless 1219 * Syscall tracing and stepping should be turned off in the
1203 * of CLONE_PTRACE. 1220 * child regardless of CLONE_PTRACE.
1204 */ 1221 */
1222 user_disable_single_step(p);
1205 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 1223 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1206#ifdef TIF_SYSCALL_EMU 1224#ifdef TIF_SYSCALL_EMU
1207 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 1225 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
@@ -1229,21 +1247,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1229 /* Need tasklist lock for parent etc handling! */ 1247 /* Need tasklist lock for parent etc handling! */
1230 write_lock_irq(&tasklist_lock); 1248 write_lock_irq(&tasklist_lock);
1231 1249
1232 /*
1233 * The task hasn't been attached yet, so its cpus_allowed mask will
1234 * not be changed, nor will its assigned CPU.
1235 *
1236 * The cpus_allowed mask of the parent may have changed after it was
1237 * copied first time - so re-copy it here, then check the child's CPU
1238 * to ensure it is on a valid CPU (and if not, just force it back to
1239 * parent's CPU). This avoids alot of nasty races.
1240 */
1241 p->cpus_allowed = current->cpus_allowed;
1242 p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
1243 if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
1244 !cpu_online(task_cpu(p))))
1245 set_task_cpu(p, smp_processor_id());
1246
1247 /* CLONE_PARENT re-uses the old parent */ 1250 /* CLONE_PARENT re-uses the old parent */
1248 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 1251 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1249 p->real_parent = current->real_parent; 1252 p->real_parent = current->real_parent;
@@ -1279,7 +1282,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1279 } 1282 }
1280 1283
1281 if (likely(p->pid)) { 1284 if (likely(p->pid)) {
1282 list_add_tail(&p->sibling, &p->real_parent->children);
1283 tracehook_finish_clone(p, clone_flags, trace); 1285 tracehook_finish_clone(p, clone_flags, trace);
1284 1286
1285 if (thread_group_leader(p)) { 1287 if (thread_group_leader(p)) {
@@ -1291,6 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1291 p->signal->tty = tty_kref_get(current->signal->tty); 1293 p->signal->tty = tty_kref_get(current->signal->tty);
1292 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1294 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1293 attach_pid(p, PIDTYPE_SID, task_session(current)); 1295 attach_pid(p, PIDTYPE_SID, task_session(current));
1296 list_add_tail(&p->sibling, &p->real_parent->children);
1294 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1297 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1295 __get_cpu_var(process_counts)++; 1298 __get_cpu_var(process_counts)++;
1296 } 1299 }
@@ -1310,7 +1313,8 @@ bad_fork_free_pid:
1310 if (pid != &init_struct_pid) 1313 if (pid != &init_struct_pid)
1311 free_pid(pid); 1314 free_pid(pid);
1312bad_fork_cleanup_io: 1315bad_fork_cleanup_io:
1313 put_io_context(p->io_context); 1316 if (p->io_context)
1317 exit_io_context(p);
1314bad_fork_cleanup_namespaces: 1318bad_fork_cleanup_namespaces:
1315 exit_task_namespaces(p); 1319 exit_task_namespaces(p);
1316bad_fork_cleanup_mm: 1320bad_fork_cleanup_mm: