diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 130 |
1 files changed, 67 insertions, 63 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index adefc1131f27..8214ba7c8bb1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -33,9 +33,11 @@ | |||
| 33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
| 34 | #include <linux/cgroup.h> | 34 | #include <linux/cgroup.h> |
| 35 | #include <linux/security.h> | 35 | #include <linux/security.h> |
| 36 | #include <linux/hugetlb.h> | ||
| 36 | #include <linux/swap.h> | 37 | #include <linux/swap.h> |
| 37 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
| 38 | #include <linux/jiffies.h> | 39 | #include <linux/jiffies.h> |
| 40 | #include <linux/tracehook.h> | ||
| 39 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
| 40 | #include <linux/task_io_accounting_ops.h> | 42 | #include <linux/task_io_accounting_ops.h> |
| 41 | #include <linux/rcupdate.h> | 43 | #include <linux/rcupdate.h> |
| @@ -92,6 +94,23 @@ int nr_processes(void) | |||
| 92 | static struct kmem_cache *task_struct_cachep; | 94 | static struct kmem_cache *task_struct_cachep; |
| 93 | #endif | 95 | #endif |
| 94 | 96 | ||
| 97 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | ||
| 98 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | ||
| 99 | { | ||
| 100 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
| 101 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | ||
| 102 | #else | ||
| 103 | gfp_t mask = GFP_KERNEL; | ||
| 104 | #endif | ||
| 105 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void free_thread_info(struct thread_info *ti) | ||
| 109 | { | ||
| 110 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | ||
| 111 | } | ||
| 112 | #endif | ||
| 113 | |||
| 95 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 114 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
| 96 | static struct kmem_cache *signal_cachep; | 115 | static struct kmem_cache *signal_cachep; |
| 97 | 116 | ||
| @@ -307,6 +326,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 307 | } | 326 | } |
| 308 | 327 | ||
| 309 | /* | 328 | /* |
| 329 | * Clear hugetlb-related page reserves for children. This only | ||
| 330 | * affects MAP_PRIVATE mappings. Faults generated by the child | ||
| 331 | * are not guaranteed to succeed, even if read-only | ||
| 332 | */ | ||
| 333 | if (is_vm_hugetlb_page(tmp)) | ||
| 334 | reset_vma_resv_huge_pages(tmp); | ||
| 335 | |||
| 336 | /* | ||
| 310 | * Link in the new vma and copy the page table entries. | 337 | * Link in the new vma and copy the page table entries. |
| 311 | */ | 338 | */ |
| 312 | *pprev = tmp; | 339 | *pprev = tmp; |
| @@ -374,7 +401,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
| 374 | INIT_LIST_HEAD(&mm->mmlist); | 401 | INIT_LIST_HEAD(&mm->mmlist); |
| 375 | mm->flags = (current->mm) ? current->mm->flags | 402 | mm->flags = (current->mm) ? current->mm->flags |
| 376 | : MMF_DUMP_FILTER_DEFAULT; | 403 | : MMF_DUMP_FILTER_DEFAULT; |
| 377 | mm->core_waiters = 0; | 404 | mm->core_state = NULL; |
| 378 | mm->nr_ptes = 0; | 405 | mm->nr_ptes = 0; |
| 379 | set_mm_counter(mm, file_rss, 0); | 406 | set_mm_counter(mm, file_rss, 0); |
| 380 | set_mm_counter(mm, anon_rss, 0); | 407 | set_mm_counter(mm, anon_rss, 0); |
| @@ -448,7 +475,7 @@ EXPORT_SYMBOL_GPL(mmput); | |||
| 448 | /** | 475 | /** |
| 449 | * get_task_mm - acquire a reference to the task's mm | 476 | * get_task_mm - acquire a reference to the task's mm |
| 450 | * | 477 | * |
| 451 | * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning | 478 | * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning |
| 452 | * this kernel workthread has transiently adopted a user mm with use_mm, | 479 | * this kernel workthread has transiently adopted a user mm with use_mm, |
| 453 | * to do its AIO) is not set and if so returns a reference to it, after | 480 | * to do its AIO) is not set and if so returns a reference to it, after |
| 454 | * bumping up the use count. User must release the mm via mmput() | 481 | * bumping up the use count. User must release the mm via mmput() |
| @@ -461,7 +488,7 @@ struct mm_struct *get_task_mm(struct task_struct *task) | |||
| 461 | task_lock(task); | 488 | task_lock(task); |
| 462 | mm = task->mm; | 489 | mm = task->mm; |
| 463 | if (mm) { | 490 | if (mm) { |
| 464 | if (task->flags & PF_BORROWED_MM) | 491 | if (task->flags & PF_KTHREAD) |
| 465 | mm = NULL; | 492 | mm = NULL; |
| 466 | else | 493 | else |
| 467 | atomic_inc(&mm->mm_users); | 494 | atomic_inc(&mm->mm_users); |
| @@ -630,13 +657,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old) | |||
| 630 | path_get(&old->root); | 657 | path_get(&old->root); |
| 631 | fs->pwd = old->pwd; | 658 | fs->pwd = old->pwd; |
| 632 | path_get(&old->pwd); | 659 | path_get(&old->pwd); |
| 633 | if (old->altroot.dentry) { | ||
| 634 | fs->altroot = old->altroot; | ||
| 635 | path_get(&old->altroot); | ||
| 636 | } else { | ||
| 637 | fs->altroot.mnt = NULL; | ||
| 638 | fs->altroot.dentry = NULL; | ||
| 639 | } | ||
| 640 | read_unlock(&old->lock); | 660 | read_unlock(&old->lock); |
| 641 | } | 661 | } |
| 642 | return fs; | 662 | return fs; |
| @@ -786,6 +806,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 786 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 806 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
| 787 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 807 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
| 788 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 808 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
| 809 | task_io_accounting_init(&sig->ioac); | ||
| 789 | sig->sum_sched_runtime = 0; | 810 | sig->sum_sched_runtime = 0; |
| 790 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 811 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
| 791 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 812 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
| @@ -833,8 +854,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
| 833 | 854 | ||
| 834 | new_flags &= ~PF_SUPERPRIV; | 855 | new_flags &= ~PF_SUPERPRIV; |
| 835 | new_flags |= PF_FORKNOEXEC; | 856 | new_flags |= PF_FORKNOEXEC; |
| 836 | if (!(clone_flags & CLONE_PTRACE)) | 857 | new_flags |= PF_STARTING; |
| 837 | p->ptrace = 0; | ||
| 838 | p->flags = new_flags; | 858 | p->flags = new_flags; |
| 839 | clear_freeze_flag(p); | 859 | clear_freeze_flag(p); |
| 840 | } | 860 | } |
| @@ -875,7 +895,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 875 | struct pt_regs *regs, | 895 | struct pt_regs *regs, |
| 876 | unsigned long stack_size, | 896 | unsigned long stack_size, |
| 877 | int __user *child_tidptr, | 897 | int __user *child_tidptr, |
| 878 | struct pid *pid) | 898 | struct pid *pid, |
| 899 | int trace) | ||
| 879 | { | 900 | { |
| 880 | int retval; | 901 | int retval; |
| 881 | struct task_struct *p; | 902 | struct task_struct *p; |
| @@ -968,13 +989,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 968 | p->last_switch_timestamp = 0; | 989 | p->last_switch_timestamp = 0; |
| 969 | #endif | 990 | #endif |
| 970 | 991 | ||
| 971 | #ifdef CONFIG_TASK_XACCT | 992 | task_io_accounting_init(&p->ioac); |
| 972 | p->rchar = 0; /* I/O counter: bytes read */ | ||
| 973 | p->wchar = 0; /* I/O counter: bytes written */ | ||
| 974 | p->syscr = 0; /* I/O counter: read syscalls */ | ||
| 975 | p->syscw = 0; /* I/O counter: write syscalls */ | ||
| 976 | #endif | ||
| 977 | task_io_accounting_init(p); | ||
| 978 | acct_clear_integrals(p); | 993 | acct_clear_integrals(p); |
| 979 | 994 | ||
| 980 | p->it_virt_expires = cputime_zero; | 995 | p->it_virt_expires = cputime_zero; |
| @@ -1081,6 +1096,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1081 | if (clone_flags & CLONE_THREAD) | 1096 | if (clone_flags & CLONE_THREAD) |
| 1082 | p->tgid = current->tgid; | 1097 | p->tgid = current->tgid; |
| 1083 | 1098 | ||
| 1099 | if (current->nsproxy != p->nsproxy) { | ||
| 1100 | retval = ns_cgroup_clone(p, pid); | ||
| 1101 | if (retval) | ||
| 1102 | goto bad_fork_free_pid; | ||
| 1103 | } | ||
| 1104 | |||
| 1084 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1105 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
| 1085 | /* | 1106 | /* |
| 1086 | * Clear TID on mm_release()? | 1107 | * Clear TID on mm_release()? |
| @@ -1125,8 +1146,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1125 | */ | 1146 | */ |
| 1126 | p->group_leader = p; | 1147 | p->group_leader = p; |
| 1127 | INIT_LIST_HEAD(&p->thread_group); | 1148 | INIT_LIST_HEAD(&p->thread_group); |
| 1128 | INIT_LIST_HEAD(&p->ptrace_entry); | ||
| 1129 | INIT_LIST_HEAD(&p->ptraced); | ||
| 1130 | 1149 | ||
| 1131 | /* Now that the task is set up, run cgroup callbacks if | 1150 | /* Now that the task is set up, run cgroup callbacks if |
| 1132 | * necessary. We need to run them before the task is visible | 1151 | * necessary. We need to run them before the task is visible |
| @@ -1157,7 +1176,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1157 | p->real_parent = current->real_parent; | 1176 | p->real_parent = current->real_parent; |
| 1158 | else | 1177 | else |
| 1159 | p->real_parent = current; | 1178 | p->real_parent = current; |
| 1160 | p->parent = p->real_parent; | ||
| 1161 | 1179 | ||
| 1162 | spin_lock(¤t->sighand->siglock); | 1180 | spin_lock(¤t->sighand->siglock); |
| 1163 | 1181 | ||
| @@ -1199,8 +1217,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1199 | 1217 | ||
| 1200 | if (likely(p->pid)) { | 1218 | if (likely(p->pid)) { |
| 1201 | list_add_tail(&p->sibling, &p->real_parent->children); | 1219 | list_add_tail(&p->sibling, &p->real_parent->children); |
| 1202 | if (unlikely(p->ptrace & PT_PTRACED)) | 1220 | tracehook_finish_clone(p, clone_flags, trace); |
| 1203 | __ptrace_link(p, current->parent); | ||
| 1204 | 1221 | ||
| 1205 | if (thread_group_leader(p)) { | 1222 | if (thread_group_leader(p)) { |
| 1206 | if (clone_flags & CLONE_NEWPID) | 1223 | if (clone_flags & CLONE_NEWPID) |
| @@ -1285,29 +1302,13 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
| 1285 | struct pt_regs regs; | 1302 | struct pt_regs regs; |
| 1286 | 1303 | ||
| 1287 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, | 1304 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
| 1288 | &init_struct_pid); | 1305 | &init_struct_pid, 0); |
| 1289 | if (!IS_ERR(task)) | 1306 | if (!IS_ERR(task)) |
| 1290 | init_idle(task, cpu); | 1307 | init_idle(task, cpu); |
| 1291 | 1308 | ||
| 1292 | return task; | 1309 | return task; |
| 1293 | } | 1310 | } |
| 1294 | 1311 | ||
| 1295 | static int fork_traceflag(unsigned clone_flags) | ||
| 1296 | { | ||
| 1297 | if (clone_flags & CLONE_UNTRACED) | ||
| 1298 | return 0; | ||
| 1299 | else if (clone_flags & CLONE_VFORK) { | ||
| 1300 | if (current->ptrace & PT_TRACE_VFORK) | ||
| 1301 | return PTRACE_EVENT_VFORK; | ||
| 1302 | } else if ((clone_flags & CSIGNAL) != SIGCHLD) { | ||
| 1303 | if (current->ptrace & PT_TRACE_CLONE) | ||
| 1304 | return PTRACE_EVENT_CLONE; | ||
| 1305 | } else if (current->ptrace & PT_TRACE_FORK) | ||
| 1306 | return PTRACE_EVENT_FORK; | ||
| 1307 | |||
| 1308 | return 0; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | /* | 1312 | /* |
| 1312 | * Ok, this is the main fork-routine. | 1313 | * Ok, this is the main fork-routine. |
| 1313 | * | 1314 | * |
| @@ -1342,14 +1343,14 @@ long do_fork(unsigned long clone_flags, | |||
| 1342 | } | 1343 | } |
| 1343 | } | 1344 | } |
| 1344 | 1345 | ||
| 1345 | if (unlikely(current->ptrace)) { | 1346 | /* |
| 1346 | trace = fork_traceflag (clone_flags); | 1347 | * When called from kernel_thread, don't do user tracing stuff. |
| 1347 | if (trace) | 1348 | */ |
| 1348 | clone_flags |= CLONE_PTRACE; | 1349 | if (likely(user_mode(regs))) |
| 1349 | } | 1350 | trace = tracehook_prepare_clone(clone_flags); |
| 1350 | 1351 | ||
| 1351 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1352 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
| 1352 | child_tidptr, NULL); | 1353 | child_tidptr, NULL, trace); |
| 1353 | /* | 1354 | /* |
| 1354 | * Do this prior waking up the new thread - the thread pointer | 1355 | * Do this prior waking up the new thread - the thread pointer |
| 1355 | * might get invalid after that point, if the thread exits quickly. | 1356 | * might get invalid after that point, if the thread exits quickly. |
| @@ -1367,32 +1368,35 @@ long do_fork(unsigned long clone_flags, | |||
| 1367 | init_completion(&vfork); | 1368 | init_completion(&vfork); |
| 1368 | } | 1369 | } |
| 1369 | 1370 | ||
| 1370 | if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { | 1371 | tracehook_report_clone(trace, regs, clone_flags, nr, p); |
| 1372 | |||
| 1373 | /* | ||
| 1374 | * We set PF_STARTING at creation in case tracing wants to | ||
| 1375 | * use this to distinguish a fully live task from one that | ||
| 1376 | * hasn't gotten to tracehook_report_clone() yet. Now we | ||
| 1377 | * clear it and set the child going. | ||
| 1378 | */ | ||
| 1379 | p->flags &= ~PF_STARTING; | ||
| 1380 | |||
| 1381 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
| 1371 | /* | 1382 | /* |
| 1372 | * We'll start up with an immediate SIGSTOP. | 1383 | * We'll start up with an immediate SIGSTOP. |
| 1373 | */ | 1384 | */ |
| 1374 | sigaddset(&p->pending.signal, SIGSTOP); | 1385 | sigaddset(&p->pending.signal, SIGSTOP); |
| 1375 | set_tsk_thread_flag(p, TIF_SIGPENDING); | 1386 | set_tsk_thread_flag(p, TIF_SIGPENDING); |
| 1376 | } | ||
| 1377 | |||
| 1378 | if (!(clone_flags & CLONE_STOPPED)) | ||
| 1379 | wake_up_new_task(p, clone_flags); | ||
| 1380 | else | ||
| 1381 | __set_task_state(p, TASK_STOPPED); | 1387 | __set_task_state(p, TASK_STOPPED); |
| 1382 | 1388 | } else { | |
| 1383 | if (unlikely (trace)) { | 1389 | wake_up_new_task(p, clone_flags); |
| 1384 | current->ptrace_message = nr; | ||
| 1385 | ptrace_notify ((trace << 8) | SIGTRAP); | ||
| 1386 | } | 1390 | } |
| 1387 | 1391 | ||
| 1392 | tracehook_report_clone_complete(trace, regs, | ||
| 1393 | clone_flags, nr, p); | ||
| 1394 | |||
| 1388 | if (clone_flags & CLONE_VFORK) { | 1395 | if (clone_flags & CLONE_VFORK) { |
| 1389 | freezer_do_not_count(); | 1396 | freezer_do_not_count(); |
| 1390 | wait_for_completion(&vfork); | 1397 | wait_for_completion(&vfork); |
| 1391 | freezer_count(); | 1398 | freezer_count(); |
| 1392 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { | 1399 | tracehook_report_vfork_done(p, nr); |
| 1393 | current->ptrace_message = nr; | ||
| 1394 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | ||
| 1395 | } | ||
| 1396 | } | 1400 | } |
| 1397 | } else { | 1401 | } else { |
| 1398 | nr = PTR_ERR(p); | 1402 | nr = PTR_ERR(p); |
| @@ -1404,7 +1408,7 @@ long do_fork(unsigned long clone_flags, | |||
| 1404 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1408 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
| 1405 | #endif | 1409 | #endif |
| 1406 | 1410 | ||
| 1407 | static void sighand_ctor(struct kmem_cache *cachep, void *data) | 1411 | static void sighand_ctor(void *data) |
| 1408 | { | 1412 | { |
| 1409 | struct sighand_struct *sighand = data; | 1413 | struct sighand_struct *sighand = data; |
| 1410 | 1414 | ||
