aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c135
1 files changed, 70 insertions, 65 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..8214ba7c8bb1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,6 +23,7 @@
23#include <linux/sem.h> 23#include <linux/sem.h>
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fdtable.h> 25#include <linux/fdtable.h>
26#include <linux/iocontext.h>
26#include <linux/key.h> 27#include <linux/key.h>
27#include <linux/binfmts.h> 28#include <linux/binfmts.h>
28#include <linux/mman.h> 29#include <linux/mman.h>
@@ -32,9 +33,11 @@
32#include <linux/cpu.h> 33#include <linux/cpu.h>
33#include <linux/cgroup.h> 34#include <linux/cgroup.h>
34#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/hugetlb.h>
35#include <linux/swap.h> 37#include <linux/swap.h>
36#include <linux/syscalls.h> 38#include <linux/syscalls.h>
37#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/tracehook.h>
38#include <linux/futex.h> 41#include <linux/futex.h>
39#include <linux/task_io_accounting_ops.h> 42#include <linux/task_io_accounting_ops.h>
40#include <linux/rcupdate.h> 43#include <linux/rcupdate.h>
@@ -91,6 +94,23 @@ int nr_processes(void)
91static struct kmem_cache *task_struct_cachep; 94static struct kmem_cache *task_struct_cachep;
92#endif 95#endif
93 96
97#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
98static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
99{
100#ifdef CONFIG_DEBUG_STACK_USAGE
101 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
102#else
103 gfp_t mask = GFP_KERNEL;
104#endif
105 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
106}
107
108static inline void free_thread_info(struct thread_info *ti)
109{
110 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
111}
112#endif
113
94/* SLAB cache for signal_struct structures (tsk->signal) */ 114/* SLAB cache for signal_struct structures (tsk->signal) */
95static struct kmem_cache *signal_cachep; 115static struct kmem_cache *signal_cachep;
96 116
@@ -306,6 +326,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
306 } 326 }
307 327
308 /* 328 /*
329 * Clear hugetlb-related page reserves for children. This only
330 * affects MAP_PRIVATE mappings. Faults generated by the child
331 * are not guaranteed to succeed, even if read-only
332 */
333 if (is_vm_hugetlb_page(tmp))
334 reset_vma_resv_huge_pages(tmp);
335
336 /*
309 * Link in the new vma and copy the page table entries. 337 * Link in the new vma and copy the page table entries.
310 */ 338 */
311 *pprev = tmp; 339 *pprev = tmp;
@@ -373,7 +401,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
373 INIT_LIST_HEAD(&mm->mmlist); 401 INIT_LIST_HEAD(&mm->mmlist);
374 mm->flags = (current->mm) ? current->mm->flags 402 mm->flags = (current->mm) ? current->mm->flags
375 : MMF_DUMP_FILTER_DEFAULT; 403 : MMF_DUMP_FILTER_DEFAULT;
376 mm->core_waiters = 0; 404 mm->core_state = NULL;
377 mm->nr_ptes = 0; 405 mm->nr_ptes = 0;
378 set_mm_counter(mm, file_rss, 0); 406 set_mm_counter(mm, file_rss, 0);
379 set_mm_counter(mm, anon_rss, 0); 407 set_mm_counter(mm, anon_rss, 0);
@@ -447,7 +475,7 @@ EXPORT_SYMBOL_GPL(mmput);
447/** 475/**
448 * get_task_mm - acquire a reference to the task's mm 476 * get_task_mm - acquire a reference to the task's mm
449 * 477 *
450 * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 478 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
451 * this kernel workthread has transiently adopted a user mm with use_mm, 479 * this kernel workthread has transiently adopted a user mm with use_mm,
452 * to do its AIO) is not set and if so returns a reference to it, after 480 * to do its AIO) is not set and if so returns a reference to it, after
453 * bumping up the use count. User must release the mm via mmput() 481 * bumping up the use count. User must release the mm via mmput()
@@ -460,7 +488,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
460 task_lock(task); 488 task_lock(task);
461 mm = task->mm; 489 mm = task->mm;
462 if (mm) { 490 if (mm) {
463 if (task->flags & PF_BORROWED_MM) 491 if (task->flags & PF_KTHREAD)
464 mm = NULL; 492 mm = NULL;
465 else 493 else
466 atomic_inc(&mm->mm_users); 494 atomic_inc(&mm->mm_users);
@@ -629,13 +657,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
629 path_get(&old->root); 657 path_get(&old->root);
630 fs->pwd = old->pwd; 658 fs->pwd = old->pwd;
631 path_get(&old->pwd); 659 path_get(&old->pwd);
632 if (old->altroot.dentry) {
633 fs->altroot = old->altroot;
634 path_get(&old->altroot);
635 } else {
636 fs->altroot.mnt = NULL;
637 fs->altroot.dentry = NULL;
638 }
639 read_unlock(&old->lock); 660 read_unlock(&old->lock);
640 } 661 }
641 return fs; 662 return fs;
@@ -785,6 +806,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
785 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 806 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
786 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 807 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
787 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 808 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
809 task_io_accounting_init(&sig->ioac);
788 sig->sum_sched_runtime = 0; 810 sig->sum_sched_runtime = 0;
789 INIT_LIST_HEAD(&sig->cpu_timers[0]); 811 INIT_LIST_HEAD(&sig->cpu_timers[0]);
790 INIT_LIST_HEAD(&sig->cpu_timers[1]); 812 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -832,8 +854,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
832 854
833 new_flags &= ~PF_SUPERPRIV; 855 new_flags &= ~PF_SUPERPRIV;
834 new_flags |= PF_FORKNOEXEC; 856 new_flags |= PF_FORKNOEXEC;
835 if (!(clone_flags & CLONE_PTRACE)) 857 new_flags |= PF_STARTING;
836 p->ptrace = 0;
837 p->flags = new_flags; 858 p->flags = new_flags;
838 clear_freeze_flag(p); 859 clear_freeze_flag(p);
839} 860}
@@ -874,7 +895,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
874 struct pt_regs *regs, 895 struct pt_regs *regs,
875 unsigned long stack_size, 896 unsigned long stack_size,
876 int __user *child_tidptr, 897 int __user *child_tidptr,
877 struct pid *pid) 898 struct pid *pid,
899 int trace)
878{ 900{
879 int retval; 901 int retval;
880 struct task_struct *p; 902 struct task_struct *p;
@@ -909,7 +931,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
909 931
910 rt_mutex_init_task(p); 932 rt_mutex_init_task(p);
911 933
912#ifdef CONFIG_TRACE_IRQFLAGS 934#ifdef CONFIG_PROVE_LOCKING
913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 935 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 936 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
915#endif 937#endif
@@ -967,13 +989,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
967 p->last_switch_timestamp = 0; 989 p->last_switch_timestamp = 0;
968#endif 990#endif
969 991
970#ifdef CONFIG_TASK_XACCT 992 task_io_accounting_init(&p->ioac);
971 p->rchar = 0; /* I/O counter: bytes read */
972 p->wchar = 0; /* I/O counter: bytes written */
973 p->syscr = 0; /* I/O counter: read syscalls */
974 p->syscw = 0; /* I/O counter: write syscalls */
975#endif
976 task_io_accounting_init(p);
977 acct_clear_integrals(p); 993 acct_clear_integrals(p);
978 994
979 p->it_virt_expires = cputime_zero; 995 p->it_virt_expires = cputime_zero;
@@ -1080,6 +1096,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1080 if (clone_flags & CLONE_THREAD) 1096 if (clone_flags & CLONE_THREAD)
1081 p->tgid = current->tgid; 1097 p->tgid = current->tgid;
1082 1098
1099 if (current->nsproxy != p->nsproxy) {
1100 retval = ns_cgroup_clone(p, pid);
1101 if (retval)
1102 goto bad_fork_free_pid;
1103 }
1104
1083 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1105 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1084 /* 1106 /*
1085 * Clear TID on mm_release()? 1107 * Clear TID on mm_release()?
@@ -1124,8 +1146,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1124 */ 1146 */
1125 p->group_leader = p; 1147 p->group_leader = p;
1126 INIT_LIST_HEAD(&p->thread_group); 1148 INIT_LIST_HEAD(&p->thread_group);
1127 INIT_LIST_HEAD(&p->ptrace_children);
1128 INIT_LIST_HEAD(&p->ptrace_list);
1129 1149
1130 /* Now that the task is set up, run cgroup callbacks if 1150 /* Now that the task is set up, run cgroup callbacks if
1131 * necessary. We need to run them before the task is visible 1151 * necessary. We need to run them before the task is visible
@@ -1156,7 +1176,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1156 p->real_parent = current->real_parent; 1176 p->real_parent = current->real_parent;
1157 else 1177 else
1158 p->real_parent = current; 1178 p->real_parent = current;
1159 p->parent = p->real_parent;
1160 1179
1161 spin_lock(&current->sighand->siglock); 1180 spin_lock(&current->sighand->siglock);
1162 1181
@@ -1197,9 +1216,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1197 } 1216 }
1198 1217
1199 if (likely(p->pid)) { 1218 if (likely(p->pid)) {
1200 add_parent(p); 1219 list_add_tail(&p->sibling, &p->real_parent->children);
1201 if (unlikely(p->ptrace & PT_PTRACED)) 1220 tracehook_finish_clone(p, clone_flags, trace);
1202 __ptrace_link(p, current->parent);
1203 1221
1204 if (thread_group_leader(p)) { 1222 if (thread_group_leader(p)) {
1205 if (clone_flags & CLONE_NEWPID) 1223 if (clone_flags & CLONE_NEWPID)
@@ -1284,29 +1302,13 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1284 struct pt_regs regs; 1302 struct pt_regs regs;
1285 1303
1286 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1304 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1287 &init_struct_pid); 1305 &init_struct_pid, 0);
1288 if (!IS_ERR(task)) 1306 if (!IS_ERR(task))
1289 init_idle(task, cpu); 1307 init_idle(task, cpu);
1290 1308
1291 return task; 1309 return task;
1292} 1310}
1293 1311
1294static int fork_traceflag(unsigned clone_flags)
1295{
1296 if (clone_flags & CLONE_UNTRACED)
1297 return 0;
1298 else if (clone_flags & CLONE_VFORK) {
1299 if (current->ptrace & PT_TRACE_VFORK)
1300 return PTRACE_EVENT_VFORK;
1301 } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1302 if (current->ptrace & PT_TRACE_CLONE)
1303 return PTRACE_EVENT_CLONE;
1304 } else if (current->ptrace & PT_TRACE_FORK)
1305 return PTRACE_EVENT_FORK;
1306
1307 return 0;
1308}
1309
1310/* 1312/*
1311 * Ok, this is the main fork-routine. 1313 * Ok, this is the main fork-routine.
1312 * 1314 *
@@ -1341,14 +1343,14 @@ long do_fork(unsigned long clone_flags,
1341 } 1343 }
1342 } 1344 }
1343 1345
1344 if (unlikely(current->ptrace)) { 1346 /*
1345 trace = fork_traceflag (clone_flags); 1347 * When called from kernel_thread, don't do user tracing stuff.
1346 if (trace) 1348 */
1347 clone_flags |= CLONE_PTRACE; 1349 if (likely(user_mode(regs)))
1348 } 1350 trace = tracehook_prepare_clone(clone_flags);
1349 1351
1350 p = copy_process(clone_flags, stack_start, regs, stack_size, 1352 p = copy_process(clone_flags, stack_start, regs, stack_size,
1351 child_tidptr, NULL); 1353 child_tidptr, NULL, trace);
1352 /* 1354 /*
1353 * Do this prior waking up the new thread - the thread pointer 1355 * Do this prior waking up the new thread - the thread pointer
1354 * might get invalid after that point, if the thread exits quickly. 1356 * might get invalid after that point, if the thread exits quickly.
@@ -1366,32 +1368,35 @@ long do_fork(unsigned long clone_flags,
1366 init_completion(&vfork); 1368 init_completion(&vfork);
1367 } 1369 }
1368 1370
1369 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 1371 tracehook_report_clone(trace, regs, clone_flags, nr, p);
1372
1373 /*
1374 * We set PF_STARTING at creation in case tracing wants to
1375 * use this to distinguish a fully live task from one that
1376 * hasn't gotten to tracehook_report_clone() yet. Now we
1377 * clear it and set the child going.
1378 */
1379 p->flags &= ~PF_STARTING;
1380
1381 if (unlikely(clone_flags & CLONE_STOPPED)) {
1370 /* 1382 /*
1371 * We'll start up with an immediate SIGSTOP. 1383 * We'll start up with an immediate SIGSTOP.
1372 */ 1384 */
1373 sigaddset(&p->pending.signal, SIGSTOP); 1385 sigaddset(&p->pending.signal, SIGSTOP);
1374 set_tsk_thread_flag(p, TIF_SIGPENDING); 1386 set_tsk_thread_flag(p, TIF_SIGPENDING);
1375 }
1376
1377 if (!(clone_flags & CLONE_STOPPED))
1378 wake_up_new_task(p, clone_flags);
1379 else
1380 __set_task_state(p, TASK_STOPPED); 1387 __set_task_state(p, TASK_STOPPED);
1381 1388 } else {
1382 if (unlikely (trace)) { 1389 wake_up_new_task(p, clone_flags);
1383 current->ptrace_message = nr;
1384 ptrace_notify ((trace << 8) | SIGTRAP);
1385 } 1390 }
1386 1391
1392 tracehook_report_clone_complete(trace, regs,
1393 clone_flags, nr, p);
1394
1387 if (clone_flags & CLONE_VFORK) { 1395 if (clone_flags & CLONE_VFORK) {
1388 freezer_do_not_count(); 1396 freezer_do_not_count();
1389 wait_for_completion(&vfork); 1397 wait_for_completion(&vfork);
1390 freezer_count(); 1398 freezer_count();
1391 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { 1399 tracehook_report_vfork_done(p, nr);
1392 current->ptrace_message = nr;
1393 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1394 }
1395 } 1400 }
1396 } else { 1401 } else {
1397 nr = PTR_ERR(p); 1402 nr = PTR_ERR(p);
@@ -1403,7 +1408,7 @@ long do_fork(unsigned long clone_flags,
1403#define ARCH_MIN_MMSTRUCT_ALIGN 0 1408#define ARCH_MIN_MMSTRUCT_ALIGN 0
1404#endif 1409#endif
1405 1410
1406static void sighand_ctor(struct kmem_cache *cachep, void *data) 1411static void sighand_ctor(void *data)
1407{ 1412{
1408 struct sighand_struct *sighand = data; 1413 struct sighand_struct *sighand = data;
1409 1414