aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c133
1 files changed, 70 insertions, 63 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f27..7ce2ebe84796 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,15 +27,18 @@
27#include <linux/key.h> 27#include <linux/key.h>
28#include <linux/binfmts.h> 28#include <linux/binfmts.h>
29#include <linux/mman.h> 29#include <linux/mman.h>
30#include <linux/mmu_notifier.h>
30#include <linux/fs.h> 31#include <linux/fs.h>
31#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
32#include <linux/capability.h> 33#include <linux/capability.h>
33#include <linux/cpu.h> 34#include <linux/cpu.h>
34#include <linux/cgroup.h> 35#include <linux/cgroup.h>
35#include <linux/security.h> 36#include <linux/security.h>
37#include <linux/hugetlb.h>
36#include <linux/swap.h> 38#include <linux/swap.h>
37#include <linux/syscalls.h> 39#include <linux/syscalls.h>
38#include <linux/jiffies.h> 40#include <linux/jiffies.h>
41#include <linux/tracehook.h>
39#include <linux/futex.h> 42#include <linux/futex.h>
40#include <linux/task_io_accounting_ops.h> 43#include <linux/task_io_accounting_ops.h>
41#include <linux/rcupdate.h> 44#include <linux/rcupdate.h>
@@ -92,6 +95,23 @@ int nr_processes(void)
92static struct kmem_cache *task_struct_cachep; 95static struct kmem_cache *task_struct_cachep;
93#endif 96#endif
94 97
98#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
99static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
100{
101#ifdef CONFIG_DEBUG_STACK_USAGE
102 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
103#else
104 gfp_t mask = GFP_KERNEL;
105#endif
106 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
107}
108
109static inline void free_thread_info(struct thread_info *ti)
110{
111 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
112}
113#endif
114
95/* SLAB cache for signal_struct structures (tsk->signal) */ 115/* SLAB cache for signal_struct structures (tsk->signal) */
96static struct kmem_cache *signal_cachep; 116static struct kmem_cache *signal_cachep;
97 117
@@ -307,6 +327,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
307 } 327 }
308 328
309 /* 329 /*
330 * Clear hugetlb-related page reserves for children. This only
331 * affects MAP_PRIVATE mappings. Faults generated by the child
332 * are not guaranteed to succeed, even if read-only
333 */
334 if (is_vm_hugetlb_page(tmp))
335 reset_vma_resv_huge_pages(tmp);
336
337 /*
310 * Link in the new vma and copy the page table entries. 338 * Link in the new vma and copy the page table entries.
311 */ 339 */
312 *pprev = tmp; 340 *pprev = tmp;
@@ -374,7 +402,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
374 INIT_LIST_HEAD(&mm->mmlist); 402 INIT_LIST_HEAD(&mm->mmlist);
375 mm->flags = (current->mm) ? current->mm->flags 403 mm->flags = (current->mm) ? current->mm->flags
376 : MMF_DUMP_FILTER_DEFAULT; 404 : MMF_DUMP_FILTER_DEFAULT;
377 mm->core_waiters = 0; 405 mm->core_state = NULL;
378 mm->nr_ptes = 0; 406 mm->nr_ptes = 0;
379 set_mm_counter(mm, file_rss, 0); 407 set_mm_counter(mm, file_rss, 0);
380 set_mm_counter(mm, anon_rss, 0); 408 set_mm_counter(mm, anon_rss, 0);
@@ -387,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
387 415
388 if (likely(!mm_alloc_pgd(mm))) { 416 if (likely(!mm_alloc_pgd(mm))) {
389 mm->def_flags = 0; 417 mm->def_flags = 0;
418 mmu_notifier_mm_init(mm);
390 return mm; 419 return mm;
391 } 420 }
392 421
@@ -419,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
419 BUG_ON(mm == &init_mm); 448 BUG_ON(mm == &init_mm);
420 mm_free_pgd(mm); 449 mm_free_pgd(mm);
421 destroy_context(mm); 450 destroy_context(mm);
451 mmu_notifier_mm_destroy(mm);
422 free_mm(mm); 452 free_mm(mm);
423} 453}
424EXPORT_SYMBOL_GPL(__mmdrop); 454EXPORT_SYMBOL_GPL(__mmdrop);
@@ -448,7 +478,7 @@ EXPORT_SYMBOL_GPL(mmput);
448/** 478/**
449 * get_task_mm - acquire a reference to the task's mm 479 * get_task_mm - acquire a reference to the task's mm
450 * 480 *
451 * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 481 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
452 * this kernel workthread has transiently adopted a user mm with use_mm, 482 * this kernel workthread has transiently adopted a user mm with use_mm,
453 * to do its AIO) is not set and if so returns a reference to it, after 483 * to do its AIO) is not set and if so returns a reference to it, after
454 * bumping up the use count. User must release the mm via mmput() 484 * bumping up the use count. User must release the mm via mmput()
@@ -461,7 +491,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
461 task_lock(task); 491 task_lock(task);
462 mm = task->mm; 492 mm = task->mm;
463 if (mm) { 493 if (mm) {
464 if (task->flags & PF_BORROWED_MM) 494 if (task->flags & PF_KTHREAD)
465 mm = NULL; 495 mm = NULL;
466 else 496 else
467 atomic_inc(&mm->mm_users); 497 atomic_inc(&mm->mm_users);
@@ -630,13 +660,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
630 path_get(&old->root); 660 path_get(&old->root);
631 fs->pwd = old->pwd; 661 fs->pwd = old->pwd;
632 path_get(&old->pwd); 662 path_get(&old->pwd);
633 if (old->altroot.dentry) {
634 fs->altroot = old->altroot;
635 path_get(&old->altroot);
636 } else {
637 fs->altroot.mnt = NULL;
638 fs->altroot.dentry = NULL;
639 }
640 read_unlock(&old->lock); 663 read_unlock(&old->lock);
641 } 664 }
642 return fs; 665 return fs;
@@ -786,6 +809,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
786 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 809 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
787 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 810 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
788 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 811 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
812 task_io_accounting_init(&sig->ioac);
789 sig->sum_sched_runtime = 0; 813 sig->sum_sched_runtime = 0;
790 INIT_LIST_HEAD(&sig->cpu_timers[0]); 814 INIT_LIST_HEAD(&sig->cpu_timers[0]);
791 INIT_LIST_HEAD(&sig->cpu_timers[1]); 815 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -833,8 +857,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
833 857
834 new_flags &= ~PF_SUPERPRIV; 858 new_flags &= ~PF_SUPERPRIV;
835 new_flags |= PF_FORKNOEXEC; 859 new_flags |= PF_FORKNOEXEC;
836 if (!(clone_flags & CLONE_PTRACE)) 860 new_flags |= PF_STARTING;
837 p->ptrace = 0;
838 p->flags = new_flags; 861 p->flags = new_flags;
839 clear_freeze_flag(p); 862 clear_freeze_flag(p);
840} 863}
@@ -875,7 +898,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
875 struct pt_regs *regs, 898 struct pt_regs *regs,
876 unsigned long stack_size, 899 unsigned long stack_size,
877 int __user *child_tidptr, 900 int __user *child_tidptr,
878 struct pid *pid) 901 struct pid *pid,
902 int trace)
879{ 903{
880 int retval; 904 int retval;
881 struct task_struct *p; 905 struct task_struct *p;
@@ -968,13 +992,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
968 p->last_switch_timestamp = 0; 992 p->last_switch_timestamp = 0;
969#endif 993#endif
970 994
971#ifdef CONFIG_TASK_XACCT 995 task_io_accounting_init(&p->ioac);
972 p->rchar = 0; /* I/O counter: bytes read */
973 p->wchar = 0; /* I/O counter: bytes written */
974 p->syscr = 0; /* I/O counter: read syscalls */
975 p->syscw = 0; /* I/O counter: write syscalls */
976#endif
977 task_io_accounting_init(p);
978 acct_clear_integrals(p); 996 acct_clear_integrals(p);
979 997
980 p->it_virt_expires = cputime_zero; 998 p->it_virt_expires = cputime_zero;
@@ -1081,6 +1099,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1081 if (clone_flags & CLONE_THREAD) 1099 if (clone_flags & CLONE_THREAD)
1082 p->tgid = current->tgid; 1100 p->tgid = current->tgid;
1083 1101
1102 if (current->nsproxy != p->nsproxy) {
1103 retval = ns_cgroup_clone(p, pid);
1104 if (retval)
1105 goto bad_fork_free_pid;
1106 }
1107
1084 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1108 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1085 /* 1109 /*
1086 * Clear TID on mm_release()? 1110 * Clear TID on mm_release()?
@@ -1125,8 +1149,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1125 */ 1149 */
1126 p->group_leader = p; 1150 p->group_leader = p;
1127 INIT_LIST_HEAD(&p->thread_group); 1151 INIT_LIST_HEAD(&p->thread_group);
1128 INIT_LIST_HEAD(&p->ptrace_entry);
1129 INIT_LIST_HEAD(&p->ptraced);
1130 1152
1131 /* Now that the task is set up, run cgroup callbacks if 1153 /* Now that the task is set up, run cgroup callbacks if
1132 * necessary. We need to run them before the task is visible 1154 * necessary. We need to run them before the task is visible
@@ -1157,7 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1157 p->real_parent = current->real_parent; 1179 p->real_parent = current->real_parent;
1158 else 1180 else
1159 p->real_parent = current; 1181 p->real_parent = current;
1160 p->parent = p->real_parent;
1161 1182
1162 spin_lock(&current->sighand->siglock); 1183 spin_lock(&current->sighand->siglock);
1163 1184
@@ -1199,8 +1220,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1199 1220
1200 if (likely(p->pid)) { 1221 if (likely(p->pid)) {
1201 list_add_tail(&p->sibling, &p->real_parent->children); 1222 list_add_tail(&p->sibling, &p->real_parent->children);
1202 if (unlikely(p->ptrace & PT_PTRACED)) 1223 tracehook_finish_clone(p, clone_flags, trace);
1203 __ptrace_link(p, current->parent);
1204 1224
1205 if (thread_group_leader(p)) { 1225 if (thread_group_leader(p)) {
1206 if (clone_flags & CLONE_NEWPID) 1226 if (clone_flags & CLONE_NEWPID)
@@ -1285,29 +1305,13 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1285 struct pt_regs regs; 1305 struct pt_regs regs;
1286 1306
1287 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1307 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1288 &init_struct_pid); 1308 &init_struct_pid, 0);
1289 if (!IS_ERR(task)) 1309 if (!IS_ERR(task))
1290 init_idle(task, cpu); 1310 init_idle(task, cpu);
1291 1311
1292 return task; 1312 return task;
1293} 1313}
1294 1314
1295static int fork_traceflag(unsigned clone_flags)
1296{
1297 if (clone_flags & CLONE_UNTRACED)
1298 return 0;
1299 else if (clone_flags & CLONE_VFORK) {
1300 if (current->ptrace & PT_TRACE_VFORK)
1301 return PTRACE_EVENT_VFORK;
1302 } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1303 if (current->ptrace & PT_TRACE_CLONE)
1304 return PTRACE_EVENT_CLONE;
1305 } else if (current->ptrace & PT_TRACE_FORK)
1306 return PTRACE_EVENT_FORK;
1307
1308 return 0;
1309}
1310
1311/* 1315/*
1312 * Ok, this is the main fork-routine. 1316 * Ok, this is the main fork-routine.
1313 * 1317 *
@@ -1342,14 +1346,14 @@ long do_fork(unsigned long clone_flags,
1342 } 1346 }
1343 } 1347 }
1344 1348
1345 if (unlikely(current->ptrace)) { 1349 /*
1346 trace = fork_traceflag (clone_flags); 1350 * When called from kernel_thread, don't do user tracing stuff.
1347 if (trace) 1351 */
1348 clone_flags |= CLONE_PTRACE; 1352 if (likely(user_mode(regs)))
1349 } 1353 trace = tracehook_prepare_clone(clone_flags);
1350 1354
1351 p = copy_process(clone_flags, stack_start, regs, stack_size, 1355 p = copy_process(clone_flags, stack_start, regs, stack_size,
1352 child_tidptr, NULL); 1356 child_tidptr, NULL, trace);
1353 /* 1357 /*
1354 * Do this prior waking up the new thread - the thread pointer 1358 * Do this prior waking up the new thread - the thread pointer
1355 * might get invalid after that point, if the thread exits quickly. 1359 * might get invalid after that point, if the thread exits quickly.
@@ -1367,32 +1371,35 @@ long do_fork(unsigned long clone_flags,
1367 init_completion(&vfork); 1371 init_completion(&vfork);
1368 } 1372 }
1369 1373
1370 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 1374 tracehook_report_clone(trace, regs, clone_flags, nr, p);
1375
1376 /*
1377 * We set PF_STARTING at creation in case tracing wants to
1378 * use this to distinguish a fully live task from one that
1379 * hasn't gotten to tracehook_report_clone() yet. Now we
1380 * clear it and set the child going.
1381 */
1382 p->flags &= ~PF_STARTING;
1383
1384 if (unlikely(clone_flags & CLONE_STOPPED)) {
1371 /* 1385 /*
1372 * We'll start up with an immediate SIGSTOP. 1386 * We'll start up with an immediate SIGSTOP.
1373 */ 1387 */
1374 sigaddset(&p->pending.signal, SIGSTOP); 1388 sigaddset(&p->pending.signal, SIGSTOP);
1375 set_tsk_thread_flag(p, TIF_SIGPENDING); 1389 set_tsk_thread_flag(p, TIF_SIGPENDING);
1376 }
1377
1378 if (!(clone_flags & CLONE_STOPPED))
1379 wake_up_new_task(p, clone_flags);
1380 else
1381 __set_task_state(p, TASK_STOPPED); 1390 __set_task_state(p, TASK_STOPPED);
1382 1391 } else {
1383 if (unlikely (trace)) { 1392 wake_up_new_task(p, clone_flags);
1384 current->ptrace_message = nr;
1385 ptrace_notify ((trace << 8) | SIGTRAP);
1386 } 1393 }
1387 1394
1395 tracehook_report_clone_complete(trace, regs,
1396 clone_flags, nr, p);
1397
1388 if (clone_flags & CLONE_VFORK) { 1398 if (clone_flags & CLONE_VFORK) {
1389 freezer_do_not_count(); 1399 freezer_do_not_count();
1390 wait_for_completion(&vfork); 1400 wait_for_completion(&vfork);
1391 freezer_count(); 1401 freezer_count();
1392 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { 1402 tracehook_report_vfork_done(p, nr);
1393 current->ptrace_message = nr;
1394 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1395 }
1396 } 1403 }
1397 } else { 1404 } else {
1398 nr = PTR_ERR(p); 1405 nr = PTR_ERR(p);
@@ -1404,7 +1411,7 @@ long do_fork(unsigned long clone_flags,
1404#define ARCH_MIN_MMSTRUCT_ALIGN 0 1411#define ARCH_MIN_MMSTRUCT_ALIGN 0
1405#endif 1412#endif
1406 1413
1407static void sighand_ctor(struct kmem_cache *cachep, void *data) 1414static void sighand_ctor(void *data)
1408{ 1415{
1409 struct sighand_struct *sighand = data; 1416 struct sighand_struct *sighand = data;
1410 1417