aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c130
1 files changed, 67 insertions, 63 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f27..8214ba7c8bb1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,9 +33,11 @@
33#include <linux/cpu.h> 33#include <linux/cpu.h>
34#include <linux/cgroup.h> 34#include <linux/cgroup.h>
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/hugetlb.h>
36#include <linux/swap.h> 37#include <linux/swap.h>
37#include <linux/syscalls.h> 38#include <linux/syscalls.h>
38#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/tracehook.h>
39#include <linux/futex.h> 41#include <linux/futex.h>
40#include <linux/task_io_accounting_ops.h> 42#include <linux/task_io_accounting_ops.h>
41#include <linux/rcupdate.h> 43#include <linux/rcupdate.h>
@@ -92,6 +94,23 @@ int nr_processes(void)
92static struct kmem_cache *task_struct_cachep; 94static struct kmem_cache *task_struct_cachep;
93#endif 95#endif
94 96
97#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
98static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
99{
100#ifdef CONFIG_DEBUG_STACK_USAGE
101 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
102#else
103 gfp_t mask = GFP_KERNEL;
104#endif
105 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
106}
107
108static inline void free_thread_info(struct thread_info *ti)
109{
110 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
111}
112#endif
113
95/* SLAB cache for signal_struct structures (tsk->signal) */ 114/* SLAB cache for signal_struct structures (tsk->signal) */
96static struct kmem_cache *signal_cachep; 115static struct kmem_cache *signal_cachep;
97 116
@@ -307,6 +326,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
307 } 326 }
308 327
309 /* 328 /*
329 * Clear hugetlb-related page reserves for children. This only
330 * affects MAP_PRIVATE mappings. Faults generated by the child
331 * are not guaranteed to succeed, even if read-only
332 */
333 if (is_vm_hugetlb_page(tmp))
334 reset_vma_resv_huge_pages(tmp);
335
336 /*
310 * Link in the new vma and copy the page table entries. 337 * Link in the new vma and copy the page table entries.
311 */ 338 */
312 *pprev = tmp; 339 *pprev = tmp;
@@ -374,7 +401,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
374 INIT_LIST_HEAD(&mm->mmlist); 401 INIT_LIST_HEAD(&mm->mmlist);
375 mm->flags = (current->mm) ? current->mm->flags 402 mm->flags = (current->mm) ? current->mm->flags
376 : MMF_DUMP_FILTER_DEFAULT; 403 : MMF_DUMP_FILTER_DEFAULT;
377 mm->core_waiters = 0; 404 mm->core_state = NULL;
378 mm->nr_ptes = 0; 405 mm->nr_ptes = 0;
379 set_mm_counter(mm, file_rss, 0); 406 set_mm_counter(mm, file_rss, 0);
380 set_mm_counter(mm, anon_rss, 0); 407 set_mm_counter(mm, anon_rss, 0);
@@ -448,7 +475,7 @@ EXPORT_SYMBOL_GPL(mmput);
448/** 475/**
449 * get_task_mm - acquire a reference to the task's mm 476 * get_task_mm - acquire a reference to the task's mm
450 * 477 *
451 * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 478 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
452 * this kernel workthread has transiently adopted a user mm with use_mm, 479 * this kernel workthread has transiently adopted a user mm with use_mm,
453 * to do its AIO) is not set and if so returns a reference to it, after 480 * to do its AIO) is not set and if so returns a reference to it, after
454 * bumping up the use count. User must release the mm via mmput() 481 * bumping up the use count. User must release the mm via mmput()
@@ -461,7 +488,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
461 task_lock(task); 488 task_lock(task);
462 mm = task->mm; 489 mm = task->mm;
463 if (mm) { 490 if (mm) {
464 if (task->flags & PF_BORROWED_MM) 491 if (task->flags & PF_KTHREAD)
465 mm = NULL; 492 mm = NULL;
466 else 493 else
467 atomic_inc(&mm->mm_users); 494 atomic_inc(&mm->mm_users);
@@ -630,13 +657,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
630 path_get(&old->root); 657 path_get(&old->root);
631 fs->pwd = old->pwd; 658 fs->pwd = old->pwd;
632 path_get(&old->pwd); 659 path_get(&old->pwd);
633 if (old->altroot.dentry) {
634 fs->altroot = old->altroot;
635 path_get(&old->altroot);
636 } else {
637 fs->altroot.mnt = NULL;
638 fs->altroot.dentry = NULL;
639 }
640 read_unlock(&old->lock); 660 read_unlock(&old->lock);
641 } 661 }
642 return fs; 662 return fs;
@@ -786,6 +806,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
786 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 806 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
787 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 807 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
788 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 808 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
809 task_io_accounting_init(&sig->ioac);
789 sig->sum_sched_runtime = 0; 810 sig->sum_sched_runtime = 0;
790 INIT_LIST_HEAD(&sig->cpu_timers[0]); 811 INIT_LIST_HEAD(&sig->cpu_timers[0]);
791 INIT_LIST_HEAD(&sig->cpu_timers[1]); 812 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -833,8 +854,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
833 854
834 new_flags &= ~PF_SUPERPRIV; 855 new_flags &= ~PF_SUPERPRIV;
835 new_flags |= PF_FORKNOEXEC; 856 new_flags |= PF_FORKNOEXEC;
836 if (!(clone_flags & CLONE_PTRACE)) 857 new_flags |= PF_STARTING;
837 p->ptrace = 0;
838 p->flags = new_flags; 858 p->flags = new_flags;
839 clear_freeze_flag(p); 859 clear_freeze_flag(p);
840} 860}
@@ -875,7 +895,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
875 struct pt_regs *regs, 895 struct pt_regs *regs,
876 unsigned long stack_size, 896 unsigned long stack_size,
877 int __user *child_tidptr, 897 int __user *child_tidptr,
878 struct pid *pid) 898 struct pid *pid,
899 int trace)
879{ 900{
880 int retval; 901 int retval;
881 struct task_struct *p; 902 struct task_struct *p;
@@ -968,13 +989,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
968 p->last_switch_timestamp = 0; 989 p->last_switch_timestamp = 0;
969#endif 990#endif
970 991
971#ifdef CONFIG_TASK_XACCT 992 task_io_accounting_init(&p->ioac);
972 p->rchar = 0; /* I/O counter: bytes read */
973 p->wchar = 0; /* I/O counter: bytes written */
974 p->syscr = 0; /* I/O counter: read syscalls */
975 p->syscw = 0; /* I/O counter: write syscalls */
976#endif
977 task_io_accounting_init(p);
978 acct_clear_integrals(p); 993 acct_clear_integrals(p);
979 994
980 p->it_virt_expires = cputime_zero; 995 p->it_virt_expires = cputime_zero;
@@ -1081,6 +1096,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1081 if (clone_flags & CLONE_THREAD) 1096 if (clone_flags & CLONE_THREAD)
1082 p->tgid = current->tgid; 1097 p->tgid = current->tgid;
1083 1098
1099 if (current->nsproxy != p->nsproxy) {
1100 retval = ns_cgroup_clone(p, pid);
1101 if (retval)
1102 goto bad_fork_free_pid;
1103 }
1104
1084 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1105 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1085 /* 1106 /*
1086 * Clear TID on mm_release()? 1107 * Clear TID on mm_release()?
@@ -1125,8 +1146,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1125 */ 1146 */
1126 p->group_leader = p; 1147 p->group_leader = p;
1127 INIT_LIST_HEAD(&p->thread_group); 1148 INIT_LIST_HEAD(&p->thread_group);
1128 INIT_LIST_HEAD(&p->ptrace_entry);
1129 INIT_LIST_HEAD(&p->ptraced);
1130 1149
1131 /* Now that the task is set up, run cgroup callbacks if 1150 /* Now that the task is set up, run cgroup callbacks if
1132 * necessary. We need to run them before the task is visible 1151 * necessary. We need to run them before the task is visible
@@ -1157,7 +1176,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1157 p->real_parent = current->real_parent; 1176 p->real_parent = current->real_parent;
1158 else 1177 else
1159 p->real_parent = current; 1178 p->real_parent = current;
1160 p->parent = p->real_parent;
1161 1179
1162 spin_lock(&current->sighand->siglock); 1180 spin_lock(&current->sighand->siglock);
1163 1181
@@ -1199,8 +1217,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1199 1217
1200 if (likely(p->pid)) { 1218 if (likely(p->pid)) {
1201 list_add_tail(&p->sibling, &p->real_parent->children); 1219 list_add_tail(&p->sibling, &p->real_parent->children);
1202 if (unlikely(p->ptrace & PT_PTRACED)) 1220 tracehook_finish_clone(p, clone_flags, trace);
1203 __ptrace_link(p, current->parent);
1204 1221
1205 if (thread_group_leader(p)) { 1222 if (thread_group_leader(p)) {
1206 if (clone_flags & CLONE_NEWPID) 1223 if (clone_flags & CLONE_NEWPID)
@@ -1285,29 +1302,13 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1285 struct pt_regs regs; 1302 struct pt_regs regs;
1286 1303
1287 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1304 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1288 &init_struct_pid); 1305 &init_struct_pid, 0);
1289 if (!IS_ERR(task)) 1306 if (!IS_ERR(task))
1290 init_idle(task, cpu); 1307 init_idle(task, cpu);
1291 1308
1292 return task; 1309 return task;
1293} 1310}
1294 1311
1295static int fork_traceflag(unsigned clone_flags)
1296{
1297 if (clone_flags & CLONE_UNTRACED)
1298 return 0;
1299 else if (clone_flags & CLONE_VFORK) {
1300 if (current->ptrace & PT_TRACE_VFORK)
1301 return PTRACE_EVENT_VFORK;
1302 } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1303 if (current->ptrace & PT_TRACE_CLONE)
1304 return PTRACE_EVENT_CLONE;
1305 } else if (current->ptrace & PT_TRACE_FORK)
1306 return PTRACE_EVENT_FORK;
1307
1308 return 0;
1309}
1310
1311/* 1312/*
1312 * Ok, this is the main fork-routine. 1313 * Ok, this is the main fork-routine.
1313 * 1314 *
@@ -1342,14 +1343,14 @@ long do_fork(unsigned long clone_flags,
1342 } 1343 }
1343 } 1344 }
1344 1345
1345 if (unlikely(current->ptrace)) { 1346 /*
1346 trace = fork_traceflag (clone_flags); 1347 * When called from kernel_thread, don't do user tracing stuff.
1347 if (trace) 1348 */
1348 clone_flags |= CLONE_PTRACE; 1349 if (likely(user_mode(regs)))
1349 } 1350 trace = tracehook_prepare_clone(clone_flags);
1350 1351
1351 p = copy_process(clone_flags, stack_start, regs, stack_size, 1352 p = copy_process(clone_flags, stack_start, regs, stack_size,
1352 child_tidptr, NULL); 1353 child_tidptr, NULL, trace);
1353 /* 1354 /*
1354 * Do this prior waking up the new thread - the thread pointer 1355 * Do this prior waking up the new thread - the thread pointer
1355 * might get invalid after that point, if the thread exits quickly. 1356 * might get invalid after that point, if the thread exits quickly.
@@ -1367,32 +1368,35 @@ long do_fork(unsigned long clone_flags,
1367 init_completion(&vfork); 1368 init_completion(&vfork);
1368 } 1369 }
1369 1370
1370 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 1371 tracehook_report_clone(trace, regs, clone_flags, nr, p);
1372
1373 /*
1374 * We set PF_STARTING at creation in case tracing wants to
1375 * use this to distinguish a fully live task from one that
1376 * hasn't gotten to tracehook_report_clone() yet. Now we
1377 * clear it and set the child going.
1378 */
1379 p->flags &= ~PF_STARTING;
1380
1381 if (unlikely(clone_flags & CLONE_STOPPED)) {
1371 /* 1382 /*
1372 * We'll start up with an immediate SIGSTOP. 1383 * We'll start up with an immediate SIGSTOP.
1373 */ 1384 */
1374 sigaddset(&p->pending.signal, SIGSTOP); 1385 sigaddset(&p->pending.signal, SIGSTOP);
1375 set_tsk_thread_flag(p, TIF_SIGPENDING); 1386 set_tsk_thread_flag(p, TIF_SIGPENDING);
1376 }
1377
1378 if (!(clone_flags & CLONE_STOPPED))
1379 wake_up_new_task(p, clone_flags);
1380 else
1381 __set_task_state(p, TASK_STOPPED); 1387 __set_task_state(p, TASK_STOPPED);
1382 1388 } else {
1383 if (unlikely (trace)) { 1389 wake_up_new_task(p, clone_flags);
1384 current->ptrace_message = nr;
1385 ptrace_notify ((trace << 8) | SIGTRAP);
1386 } 1390 }
1387 1391
1392 tracehook_report_clone_complete(trace, regs,
1393 clone_flags, nr, p);
1394
1388 if (clone_flags & CLONE_VFORK) { 1395 if (clone_flags & CLONE_VFORK) {
1389 freezer_do_not_count(); 1396 freezer_do_not_count();
1390 wait_for_completion(&vfork); 1397 wait_for_completion(&vfork);
1391 freezer_count(); 1398 freezer_count();
1392 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { 1399 tracehook_report_vfork_done(p, nr);
1393 current->ptrace_message = nr;
1394 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1395 }
1396 } 1400 }
1397 } else { 1401 } else {
1398 nr = PTR_ERR(p); 1402 nr = PTR_ERR(p);
@@ -1404,7 +1408,7 @@ long do_fork(unsigned long clone_flags,
1404#define ARCH_MIN_MMSTRUCT_ALIGN 0 1408#define ARCH_MIN_MMSTRUCT_ALIGN 0
1405#endif 1409#endif
1406 1410
1407static void sighand_ctor(struct kmem_cache *cachep, void *data) 1411static void sighand_ctor(void *data)
1408{ 1412{
1409 struct sighand_struct *sighand = data; 1413 struct sighand_struct *sighand = data;
1410 1414