aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c120
1 files changed, 71 insertions, 49 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f27..abb3ed6298f6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,9 +33,11 @@
33#include <linux/cpu.h> 33#include <linux/cpu.h>
34#include <linux/cgroup.h> 34#include <linux/cgroup.h>
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/hugetlb.h>
36#include <linux/swap.h> 37#include <linux/swap.h>
37#include <linux/syscalls.h> 38#include <linux/syscalls.h>
38#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/tracehook.h>
39#include <linux/futex.h> 41#include <linux/futex.h>
40#include <linux/task_io_accounting_ops.h> 42#include <linux/task_io_accounting_ops.h>
41#include <linux/rcupdate.h> 43#include <linux/rcupdate.h>
@@ -92,6 +94,23 @@ int nr_processes(void)
92static struct kmem_cache *task_struct_cachep; 94static struct kmem_cache *task_struct_cachep;
93#endif 95#endif
94 96
97#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
98static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
99{
100#ifdef CONFIG_DEBUG_STACK_USAGE
101 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
102#else
103 gfp_t mask = GFP_KERNEL;
104#endif
105 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
106}
107
108static inline void free_thread_info(struct thread_info *ti)
109{
110 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
111}
112#endif
113
95/* SLAB cache for signal_struct structures (tsk->signal) */ 114/* SLAB cache for signal_struct structures (tsk->signal) */
96static struct kmem_cache *signal_cachep; 115static struct kmem_cache *signal_cachep;
97 116
@@ -307,6 +326,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
307 } 326 }
308 327
309 /* 328 /*
329 * Clear hugetlb-related page reserves for children. This only
330 * affects MAP_PRIVATE mappings. Faults generated by the child
331 * are not guaranteed to succeed, even if read-only
332 */
333 if (is_vm_hugetlb_page(tmp))
334 reset_vma_resv_huge_pages(tmp);
335
336 /*
310 * Link in the new vma and copy the page table entries. 337 * Link in the new vma and copy the page table entries.
311 */ 338 */
312 *pprev = tmp; 339 *pprev = tmp;
@@ -374,7 +401,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
374 INIT_LIST_HEAD(&mm->mmlist); 401 INIT_LIST_HEAD(&mm->mmlist);
375 mm->flags = (current->mm) ? current->mm->flags 402 mm->flags = (current->mm) ? current->mm->flags
376 : MMF_DUMP_FILTER_DEFAULT; 403 : MMF_DUMP_FILTER_DEFAULT;
377 mm->core_waiters = 0; 404 mm->core_state = NULL;
378 mm->nr_ptes = 0; 405 mm->nr_ptes = 0;
379 set_mm_counter(mm, file_rss, 0); 406 set_mm_counter(mm, file_rss, 0);
380 set_mm_counter(mm, anon_rss, 0); 407 set_mm_counter(mm, anon_rss, 0);
@@ -448,7 +475,7 @@ EXPORT_SYMBOL_GPL(mmput);
448/** 475/**
449 * get_task_mm - acquire a reference to the task's mm 476 * get_task_mm - acquire a reference to the task's mm
450 * 477 *
451 * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 478 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
452 * this kernel workthread has transiently adopted a user mm with use_mm, 479 * this kernel workthread has transiently adopted a user mm with use_mm,
453 * to do its AIO) is not set and if so returns a reference to it, after 480 * to do its AIO) is not set and if so returns a reference to it, after
454 * bumping up the use count. User must release the mm via mmput() 481 * bumping up the use count. User must release the mm via mmput()
@@ -461,7 +488,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
461 task_lock(task); 488 task_lock(task);
462 mm = task->mm; 489 mm = task->mm;
463 if (mm) { 490 if (mm) {
464 if (task->flags & PF_BORROWED_MM) 491 if (task->flags & PF_KTHREAD)
465 mm = NULL; 492 mm = NULL;
466 else 493 else
467 atomic_inc(&mm->mm_users); 494 atomic_inc(&mm->mm_users);
@@ -786,6 +813,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
786 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 813 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
787 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 814 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
788 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 815 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
816#ifdef CONFIG_TASK_XACCT
817 sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
818#endif
819#ifdef CONFIG_TASK_IO_ACCOUNTING
820 memset(&sig->ioac, 0, sizeof(sig->ioac));
821#endif
789 sig->sum_sched_runtime = 0; 822 sig->sum_sched_runtime = 0;
790 INIT_LIST_HEAD(&sig->cpu_timers[0]); 823 INIT_LIST_HEAD(&sig->cpu_timers[0]);
791 INIT_LIST_HEAD(&sig->cpu_timers[1]); 824 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -833,8 +866,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
833 866
834 new_flags &= ~PF_SUPERPRIV; 867 new_flags &= ~PF_SUPERPRIV;
835 new_flags |= PF_FORKNOEXEC; 868 new_flags |= PF_FORKNOEXEC;
836 if (!(clone_flags & CLONE_PTRACE)) 869 new_flags |= PF_STARTING;
837 p->ptrace = 0;
838 p->flags = new_flags; 870 p->flags = new_flags;
839 clear_freeze_flag(p); 871 clear_freeze_flag(p);
840} 872}
@@ -875,7 +907,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
875 struct pt_regs *regs, 907 struct pt_regs *regs,
876 unsigned long stack_size, 908 unsigned long stack_size,
877 int __user *child_tidptr, 909 int __user *child_tidptr,
878 struct pid *pid) 910 struct pid *pid,
911 int trace)
879{ 912{
880 int retval; 913 int retval;
881 struct task_struct *p; 914 struct task_struct *p;
@@ -1081,6 +1114,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1081 if (clone_flags & CLONE_THREAD) 1114 if (clone_flags & CLONE_THREAD)
1082 p->tgid = current->tgid; 1115 p->tgid = current->tgid;
1083 1116
1117 if (current->nsproxy != p->nsproxy) {
1118 retval = ns_cgroup_clone(p, pid);
1119 if (retval)
1120 goto bad_fork_free_pid;
1121 }
1122
1084 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1123 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1085 /* 1124 /*
1086 * Clear TID on mm_release()? 1125 * Clear TID on mm_release()?
@@ -1125,8 +1164,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1125 */ 1164 */
1126 p->group_leader = p; 1165 p->group_leader = p;
1127 INIT_LIST_HEAD(&p->thread_group); 1166 INIT_LIST_HEAD(&p->thread_group);
1128 INIT_LIST_HEAD(&p->ptrace_entry);
1129 INIT_LIST_HEAD(&p->ptraced);
1130 1167
1131 /* Now that the task is set up, run cgroup callbacks if 1168 /* Now that the task is set up, run cgroup callbacks if
1132 * necessary. We need to run them before the task is visible 1169 * necessary. We need to run them before the task is visible
@@ -1157,7 +1194,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1157 p->real_parent = current->real_parent; 1194 p->real_parent = current->real_parent;
1158 else 1195 else
1159 p->real_parent = current; 1196 p->real_parent = current;
1160 p->parent = p->real_parent;
1161 1197
1162 spin_lock(&current->sighand->siglock); 1198 spin_lock(&current->sighand->siglock);
1163 1199
@@ -1199,8 +1235,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1199 1235
1200 if (likely(p->pid)) { 1236 if (likely(p->pid)) {
1201 list_add_tail(&p->sibling, &p->real_parent->children); 1237 list_add_tail(&p->sibling, &p->real_parent->children);
1202 if (unlikely(p->ptrace & PT_PTRACED)) 1238 tracehook_finish_clone(p, clone_flags, trace);
1203 __ptrace_link(p, current->parent);
1204 1239
1205 if (thread_group_leader(p)) { 1240 if (thread_group_leader(p)) {
1206 if (clone_flags & CLONE_NEWPID) 1241 if (clone_flags & CLONE_NEWPID)
@@ -1285,29 +1320,13 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1285 struct pt_regs regs; 1320 struct pt_regs regs;
1286 1321
1287 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1322 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1288 &init_struct_pid); 1323 &init_struct_pid, 0);
1289 if (!IS_ERR(task)) 1324 if (!IS_ERR(task))
1290 init_idle(task, cpu); 1325 init_idle(task, cpu);
1291 1326
1292 return task; 1327 return task;
1293} 1328}
1294 1329
1295static int fork_traceflag(unsigned clone_flags)
1296{
1297 if (clone_flags & CLONE_UNTRACED)
1298 return 0;
1299 else if (clone_flags & CLONE_VFORK) {
1300 if (current->ptrace & PT_TRACE_VFORK)
1301 return PTRACE_EVENT_VFORK;
1302 } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1303 if (current->ptrace & PT_TRACE_CLONE)
1304 return PTRACE_EVENT_CLONE;
1305 } else if (current->ptrace & PT_TRACE_FORK)
1306 return PTRACE_EVENT_FORK;
1307
1308 return 0;
1309}
1310
1311/* 1330/*
1312 * Ok, this is the main fork-routine. 1331 * Ok, this is the main fork-routine.
1313 * 1332 *
@@ -1342,14 +1361,14 @@ long do_fork(unsigned long clone_flags,
1342 } 1361 }
1343 } 1362 }
1344 1363
1345 if (unlikely(current->ptrace)) { 1364 /*
1346 trace = fork_traceflag (clone_flags); 1365 * When called from kernel_thread, don't do user tracing stuff.
1347 if (trace) 1366 */
1348 clone_flags |= CLONE_PTRACE; 1367 if (likely(user_mode(regs)))
1349 } 1368 trace = tracehook_prepare_clone(clone_flags);
1350 1369
1351 p = copy_process(clone_flags, stack_start, regs, stack_size, 1370 p = copy_process(clone_flags, stack_start, regs, stack_size,
1352 child_tidptr, NULL); 1371 child_tidptr, NULL, trace);
1353 /* 1372 /*
1354 * Do this prior waking up the new thread - the thread pointer 1373 * Do this prior waking up the new thread - the thread pointer
1355 * might get invalid after that point, if the thread exits quickly. 1374 * might get invalid after that point, if the thread exits quickly.
@@ -1367,32 +1386,35 @@ long do_fork(unsigned long clone_flags,
1367 init_completion(&vfork); 1386 init_completion(&vfork);
1368 } 1387 }
1369 1388
1370 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 1389 tracehook_report_clone(trace, regs, clone_flags, nr, p);
1390
1391 /*
1392 * We set PF_STARTING at creation in case tracing wants to
1393 * use this to distinguish a fully live task from one that
1394 * hasn't gotten to tracehook_report_clone() yet. Now we
1395 * clear it and set the child going.
1396 */
1397 p->flags &= ~PF_STARTING;
1398
1399 if (unlikely(clone_flags & CLONE_STOPPED)) {
1371 /* 1400 /*
1372 * We'll start up with an immediate SIGSTOP. 1401 * We'll start up with an immediate SIGSTOP.
1373 */ 1402 */
1374 sigaddset(&p->pending.signal, SIGSTOP); 1403 sigaddset(&p->pending.signal, SIGSTOP);
1375 set_tsk_thread_flag(p, TIF_SIGPENDING); 1404 set_tsk_thread_flag(p, TIF_SIGPENDING);
1376 }
1377
1378 if (!(clone_flags & CLONE_STOPPED))
1379 wake_up_new_task(p, clone_flags);
1380 else
1381 __set_task_state(p, TASK_STOPPED); 1405 __set_task_state(p, TASK_STOPPED);
1382 1406 } else {
1383 if (unlikely (trace)) { 1407 wake_up_new_task(p, clone_flags);
1384 current->ptrace_message = nr;
1385 ptrace_notify ((trace << 8) | SIGTRAP);
1386 } 1408 }
1387 1409
1410 tracehook_report_clone_complete(trace, regs,
1411 clone_flags, nr, p);
1412
1388 if (clone_flags & CLONE_VFORK) { 1413 if (clone_flags & CLONE_VFORK) {
1389 freezer_do_not_count(); 1414 freezer_do_not_count();
1390 wait_for_completion(&vfork); 1415 wait_for_completion(&vfork);
1391 freezer_count(); 1416 freezer_count();
1392 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { 1417 tracehook_report_vfork_done(p, nr);
1393 current->ptrace_message = nr;
1394 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1395 }
1396 } 1418 }
1397 } else { 1419 } else {
1398 nr = PTR_ERR(p); 1420 nr = PTR_ERR(p);
@@ -1404,7 +1426,7 @@ long do_fork(unsigned long clone_flags,
1404#define ARCH_MIN_MMSTRUCT_ALIGN 0 1426#define ARCH_MIN_MMSTRUCT_ALIGN 0
1405#endif 1427#endif
1406 1428
1407static void sighand_ctor(struct kmem_cache *cachep, void *data) 1429static void sighand_ctor(void *data)
1408{ 1430{
1409 struct sighand_struct *sighand = data; 1431 struct sighand_struct *sighand = data;
1410 1432