diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 130 |
1 files changed, 67 insertions, 63 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index adefc1131f27..8214ba7c8bb1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -33,9 +33,11 @@ | |||
33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
34 | #include <linux/cgroup.h> | 34 | #include <linux/cgroup.h> |
35 | #include <linux/security.h> | 35 | #include <linux/security.h> |
36 | #include <linux/hugetlb.h> | ||
36 | #include <linux/swap.h> | 37 | #include <linux/swap.h> |
37 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
38 | #include <linux/jiffies.h> | 39 | #include <linux/jiffies.h> |
40 | #include <linux/tracehook.h> | ||
39 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
40 | #include <linux/task_io_accounting_ops.h> | 42 | #include <linux/task_io_accounting_ops.h> |
41 | #include <linux/rcupdate.h> | 43 | #include <linux/rcupdate.h> |
@@ -92,6 +94,23 @@ int nr_processes(void) | |||
92 | static struct kmem_cache *task_struct_cachep; | 94 | static struct kmem_cache *task_struct_cachep; |
93 | #endif | 95 | #endif |
94 | 96 | ||
97 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | ||
98 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | ||
99 | { | ||
100 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
101 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | ||
102 | #else | ||
103 | gfp_t mask = GFP_KERNEL; | ||
104 | #endif | ||
105 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | ||
106 | } | ||
107 | |||
108 | static inline void free_thread_info(struct thread_info *ti) | ||
109 | { | ||
110 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | ||
111 | } | ||
112 | #endif | ||
113 | |||
95 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 114 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
96 | static struct kmem_cache *signal_cachep; | 115 | static struct kmem_cache *signal_cachep; |
97 | 116 | ||
@@ -307,6 +326,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
307 | } | 326 | } |
308 | 327 | ||
309 | /* | 328 | /* |
329 | * Clear hugetlb-related page reserves for children. This only | ||
330 | * affects MAP_PRIVATE mappings. Faults generated by the child | ||
331 | * are not guaranteed to succeed, even if read-only | ||
332 | */ | ||
333 | if (is_vm_hugetlb_page(tmp)) | ||
334 | reset_vma_resv_huge_pages(tmp); | ||
335 | |||
336 | /* | ||
310 | * Link in the new vma and copy the page table entries. | 337 | * Link in the new vma and copy the page table entries. |
311 | */ | 338 | */ |
312 | *pprev = tmp; | 339 | *pprev = tmp; |
@@ -374,7 +401,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
374 | INIT_LIST_HEAD(&mm->mmlist); | 401 | INIT_LIST_HEAD(&mm->mmlist); |
375 | mm->flags = (current->mm) ? current->mm->flags | 402 | mm->flags = (current->mm) ? current->mm->flags |
376 | : MMF_DUMP_FILTER_DEFAULT; | 403 | : MMF_DUMP_FILTER_DEFAULT; |
377 | mm->core_waiters = 0; | 404 | mm->core_state = NULL; |
378 | mm->nr_ptes = 0; | 405 | mm->nr_ptes = 0; |
379 | set_mm_counter(mm, file_rss, 0); | 406 | set_mm_counter(mm, file_rss, 0); |
380 | set_mm_counter(mm, anon_rss, 0); | 407 | set_mm_counter(mm, anon_rss, 0); |
@@ -448,7 +475,7 @@ EXPORT_SYMBOL_GPL(mmput); | |||
448 | /** | 475 | /** |
449 | * get_task_mm - acquire a reference to the task's mm | 476 | * get_task_mm - acquire a reference to the task's mm |
450 | * | 477 | * |
451 | * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning | 478 | * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning |
452 | * this kernel workthread has transiently adopted a user mm with use_mm, | 479 | * this kernel workthread has transiently adopted a user mm with use_mm, |
453 | * to do its AIO) is not set and if so returns a reference to it, after | 480 | * to do its AIO) is not set and if so returns a reference to it, after |
454 | * bumping up the use count. User must release the mm via mmput() | 481 | * bumping up the use count. User must release the mm via mmput() |
@@ -461,7 +488,7 @@ struct mm_struct *get_task_mm(struct task_struct *task) | |||
461 | task_lock(task); | 488 | task_lock(task); |
462 | mm = task->mm; | 489 | mm = task->mm; |
463 | if (mm) { | 490 | if (mm) { |
464 | if (task->flags & PF_BORROWED_MM) | 491 | if (task->flags & PF_KTHREAD) |
465 | mm = NULL; | 492 | mm = NULL; |
466 | else | 493 | else |
467 | atomic_inc(&mm->mm_users); | 494 | atomic_inc(&mm->mm_users); |
@@ -630,13 +657,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old) | |||
630 | path_get(&old->root); | 657 | path_get(&old->root); |
631 | fs->pwd = old->pwd; | 658 | fs->pwd = old->pwd; |
632 | path_get(&old->pwd); | 659 | path_get(&old->pwd); |
633 | if (old->altroot.dentry) { | ||
634 | fs->altroot = old->altroot; | ||
635 | path_get(&old->altroot); | ||
636 | } else { | ||
637 | fs->altroot.mnt = NULL; | ||
638 | fs->altroot.dentry = NULL; | ||
639 | } | ||
640 | read_unlock(&old->lock); | 660 | read_unlock(&old->lock); |
641 | } | 661 | } |
642 | return fs; | 662 | return fs; |
@@ -786,6 +806,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
786 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 806 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
787 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 807 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
788 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 808 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
809 | task_io_accounting_init(&sig->ioac); | ||
789 | sig->sum_sched_runtime = 0; | 810 | sig->sum_sched_runtime = 0; |
790 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 811 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
791 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 812 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
@@ -833,8 +854,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
833 | 854 | ||
834 | new_flags &= ~PF_SUPERPRIV; | 855 | new_flags &= ~PF_SUPERPRIV; |
835 | new_flags |= PF_FORKNOEXEC; | 856 | new_flags |= PF_FORKNOEXEC; |
836 | if (!(clone_flags & CLONE_PTRACE)) | 857 | new_flags |= PF_STARTING; |
837 | p->ptrace = 0; | ||
838 | p->flags = new_flags; | 858 | p->flags = new_flags; |
839 | clear_freeze_flag(p); | 859 | clear_freeze_flag(p); |
840 | } | 860 | } |
@@ -875,7 +895,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
875 | struct pt_regs *regs, | 895 | struct pt_regs *regs, |
876 | unsigned long stack_size, | 896 | unsigned long stack_size, |
877 | int __user *child_tidptr, | 897 | int __user *child_tidptr, |
878 | struct pid *pid) | 898 | struct pid *pid, |
899 | int trace) | ||
879 | { | 900 | { |
880 | int retval; | 901 | int retval; |
881 | struct task_struct *p; | 902 | struct task_struct *p; |
@@ -968,13 +989,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
968 | p->last_switch_timestamp = 0; | 989 | p->last_switch_timestamp = 0; |
969 | #endif | 990 | #endif |
970 | 991 | ||
971 | #ifdef CONFIG_TASK_XACCT | 992 | task_io_accounting_init(&p->ioac); |
972 | p->rchar = 0; /* I/O counter: bytes read */ | ||
973 | p->wchar = 0; /* I/O counter: bytes written */ | ||
974 | p->syscr = 0; /* I/O counter: read syscalls */ | ||
975 | p->syscw = 0; /* I/O counter: write syscalls */ | ||
976 | #endif | ||
977 | task_io_accounting_init(p); | ||
978 | acct_clear_integrals(p); | 993 | acct_clear_integrals(p); |
979 | 994 | ||
980 | p->it_virt_expires = cputime_zero; | 995 | p->it_virt_expires = cputime_zero; |
@@ -1081,6 +1096,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1081 | if (clone_flags & CLONE_THREAD) | 1096 | if (clone_flags & CLONE_THREAD) |
1082 | p->tgid = current->tgid; | 1097 | p->tgid = current->tgid; |
1083 | 1098 | ||
1099 | if (current->nsproxy != p->nsproxy) { | ||
1100 | retval = ns_cgroup_clone(p, pid); | ||
1101 | if (retval) | ||
1102 | goto bad_fork_free_pid; | ||
1103 | } | ||
1104 | |||
1084 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1105 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
1085 | /* | 1106 | /* |
1086 | * Clear TID on mm_release()? | 1107 | * Clear TID on mm_release()? |
@@ -1125,8 +1146,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1125 | */ | 1146 | */ |
1126 | p->group_leader = p; | 1147 | p->group_leader = p; |
1127 | INIT_LIST_HEAD(&p->thread_group); | 1148 | INIT_LIST_HEAD(&p->thread_group); |
1128 | INIT_LIST_HEAD(&p->ptrace_entry); | ||
1129 | INIT_LIST_HEAD(&p->ptraced); | ||
1130 | 1149 | ||
1131 | /* Now that the task is set up, run cgroup callbacks if | 1150 | /* Now that the task is set up, run cgroup callbacks if |
1132 | * necessary. We need to run them before the task is visible | 1151 | * necessary. We need to run them before the task is visible |
@@ -1157,7 +1176,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1157 | p->real_parent = current->real_parent; | 1176 | p->real_parent = current->real_parent; |
1158 | else | 1177 | else |
1159 | p->real_parent = current; | 1178 | p->real_parent = current; |
1160 | p->parent = p->real_parent; | ||
1161 | 1179 | ||
1162 | spin_lock(¤t->sighand->siglock); | 1180 | spin_lock(¤t->sighand->siglock); |
1163 | 1181 | ||
@@ -1199,8 +1217,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1199 | 1217 | ||
1200 | if (likely(p->pid)) { | 1218 | if (likely(p->pid)) { |
1201 | list_add_tail(&p->sibling, &p->real_parent->children); | 1219 | list_add_tail(&p->sibling, &p->real_parent->children); |
1202 | if (unlikely(p->ptrace & PT_PTRACED)) | 1220 | tracehook_finish_clone(p, clone_flags, trace); |
1203 | __ptrace_link(p, current->parent); | ||
1204 | 1221 | ||
1205 | if (thread_group_leader(p)) { | 1222 | if (thread_group_leader(p)) { |
1206 | if (clone_flags & CLONE_NEWPID) | 1223 | if (clone_flags & CLONE_NEWPID) |
@@ -1285,29 +1302,13 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
1285 | struct pt_regs regs; | 1302 | struct pt_regs regs; |
1286 | 1303 | ||
1287 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, | 1304 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
1288 | &init_struct_pid); | 1305 | &init_struct_pid, 0); |
1289 | if (!IS_ERR(task)) | 1306 | if (!IS_ERR(task)) |
1290 | init_idle(task, cpu); | 1307 | init_idle(task, cpu); |
1291 | 1308 | ||
1292 | return task; | 1309 | return task; |
1293 | } | 1310 | } |
1294 | 1311 | ||
1295 | static int fork_traceflag(unsigned clone_flags) | ||
1296 | { | ||
1297 | if (clone_flags & CLONE_UNTRACED) | ||
1298 | return 0; | ||
1299 | else if (clone_flags & CLONE_VFORK) { | ||
1300 | if (current->ptrace & PT_TRACE_VFORK) | ||
1301 | return PTRACE_EVENT_VFORK; | ||
1302 | } else if ((clone_flags & CSIGNAL) != SIGCHLD) { | ||
1303 | if (current->ptrace & PT_TRACE_CLONE) | ||
1304 | return PTRACE_EVENT_CLONE; | ||
1305 | } else if (current->ptrace & PT_TRACE_FORK) | ||
1306 | return PTRACE_EVENT_FORK; | ||
1307 | |||
1308 | return 0; | ||
1309 | } | ||
1310 | |||
1311 | /* | 1312 | /* |
1312 | * Ok, this is the main fork-routine. | 1313 | * Ok, this is the main fork-routine. |
1313 | * | 1314 | * |
@@ -1342,14 +1343,14 @@ long do_fork(unsigned long clone_flags, | |||
1342 | } | 1343 | } |
1343 | } | 1344 | } |
1344 | 1345 | ||
1345 | if (unlikely(current->ptrace)) { | 1346 | /* |
1346 | trace = fork_traceflag (clone_flags); | 1347 | * When called from kernel_thread, don't do user tracing stuff. |
1347 | if (trace) | 1348 | */ |
1348 | clone_flags |= CLONE_PTRACE; | 1349 | if (likely(user_mode(regs))) |
1349 | } | 1350 | trace = tracehook_prepare_clone(clone_flags); |
1350 | 1351 | ||
1351 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1352 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
1352 | child_tidptr, NULL); | 1353 | child_tidptr, NULL, trace); |
1353 | /* | 1354 | /* |
1354 | * Do this prior waking up the new thread - the thread pointer | 1355 | * Do this prior waking up the new thread - the thread pointer |
1355 | * might get invalid after that point, if the thread exits quickly. | 1356 | * might get invalid after that point, if the thread exits quickly. |
@@ -1367,32 +1368,35 @@ long do_fork(unsigned long clone_flags, | |||
1367 | init_completion(&vfork); | 1368 | init_completion(&vfork); |
1368 | } | 1369 | } |
1369 | 1370 | ||
1370 | if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { | 1371 | tracehook_report_clone(trace, regs, clone_flags, nr, p); |
1372 | |||
1373 | /* | ||
1374 | * We set PF_STARTING at creation in case tracing wants to | ||
1375 | * use this to distinguish a fully live task from one that | ||
1376 | * hasn't gotten to tracehook_report_clone() yet. Now we | ||
1377 | * clear it and set the child going. | ||
1378 | */ | ||
1379 | p->flags &= ~PF_STARTING; | ||
1380 | |||
1381 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
1371 | /* | 1382 | /* |
1372 | * We'll start up with an immediate SIGSTOP. | 1383 | * We'll start up with an immediate SIGSTOP. |
1373 | */ | 1384 | */ |
1374 | sigaddset(&p->pending.signal, SIGSTOP); | 1385 | sigaddset(&p->pending.signal, SIGSTOP); |
1375 | set_tsk_thread_flag(p, TIF_SIGPENDING); | 1386 | set_tsk_thread_flag(p, TIF_SIGPENDING); |
1376 | } | ||
1377 | |||
1378 | if (!(clone_flags & CLONE_STOPPED)) | ||
1379 | wake_up_new_task(p, clone_flags); | ||
1380 | else | ||
1381 | __set_task_state(p, TASK_STOPPED); | 1387 | __set_task_state(p, TASK_STOPPED); |
1382 | 1388 | } else { | |
1383 | if (unlikely (trace)) { | 1389 | wake_up_new_task(p, clone_flags); |
1384 | current->ptrace_message = nr; | ||
1385 | ptrace_notify ((trace << 8) | SIGTRAP); | ||
1386 | } | 1390 | } |
1387 | 1391 | ||
1392 | tracehook_report_clone_complete(trace, regs, | ||
1393 | clone_flags, nr, p); | ||
1394 | |||
1388 | if (clone_flags & CLONE_VFORK) { | 1395 | if (clone_flags & CLONE_VFORK) { |
1389 | freezer_do_not_count(); | 1396 | freezer_do_not_count(); |
1390 | wait_for_completion(&vfork); | 1397 | wait_for_completion(&vfork); |
1391 | freezer_count(); | 1398 | freezer_count(); |
1392 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { | 1399 | tracehook_report_vfork_done(p, nr); |
1393 | current->ptrace_message = nr; | ||
1394 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | ||
1395 | } | ||
1396 | } | 1400 | } |
1397 | } else { | 1401 | } else { |
1398 | nr = PTR_ERR(p); | 1402 | nr = PTR_ERR(p); |
@@ -1404,7 +1408,7 @@ long do_fork(unsigned long clone_flags, | |||
1404 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1408 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
1405 | #endif | 1409 | #endif |
1406 | 1410 | ||
1407 | static void sighand_ctor(struct kmem_cache *cachep, void *data) | 1411 | static void sighand_ctor(void *data) |
1408 | { | 1412 | { |
1409 | struct sighand_struct *sighand = data; | 1413 | struct sighand_struct *sighand = data; |
1410 | 1414 | ||