diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 133 |
1 files changed, 70 insertions, 63 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index adefc1131f27..7ce2ebe84796 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -27,15 +27,18 @@ | |||
27 | #include <linux/key.h> | 27 | #include <linux/key.h> |
28 | #include <linux/binfmts.h> | 28 | #include <linux/binfmts.h> |
29 | #include <linux/mman.h> | 29 | #include <linux/mman.h> |
30 | #include <linux/mmu_notifier.h> | ||
30 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
31 | #include <linux/nsproxy.h> | 32 | #include <linux/nsproxy.h> |
32 | #include <linux/capability.h> | 33 | #include <linux/capability.h> |
33 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
34 | #include <linux/cgroup.h> | 35 | #include <linux/cgroup.h> |
35 | #include <linux/security.h> | 36 | #include <linux/security.h> |
37 | #include <linux/hugetlb.h> | ||
36 | #include <linux/swap.h> | 38 | #include <linux/swap.h> |
37 | #include <linux/syscalls.h> | 39 | #include <linux/syscalls.h> |
38 | #include <linux/jiffies.h> | 40 | #include <linux/jiffies.h> |
41 | #include <linux/tracehook.h> | ||
39 | #include <linux/futex.h> | 42 | #include <linux/futex.h> |
40 | #include <linux/task_io_accounting_ops.h> | 43 | #include <linux/task_io_accounting_ops.h> |
41 | #include <linux/rcupdate.h> | 44 | #include <linux/rcupdate.h> |
@@ -92,6 +95,23 @@ int nr_processes(void) | |||
92 | static struct kmem_cache *task_struct_cachep; | 95 | static struct kmem_cache *task_struct_cachep; |
93 | #endif | 96 | #endif |
94 | 97 | ||
98 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | ||
99 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | ||
100 | { | ||
101 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
102 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | ||
103 | #else | ||
104 | gfp_t mask = GFP_KERNEL; | ||
105 | #endif | ||
106 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | ||
107 | } | ||
108 | |||
109 | static inline void free_thread_info(struct thread_info *ti) | ||
110 | { | ||
111 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | ||
112 | } | ||
113 | #endif | ||
114 | |||
95 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 115 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
96 | static struct kmem_cache *signal_cachep; | 116 | static struct kmem_cache *signal_cachep; |
97 | 117 | ||
@@ -307,6 +327,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
307 | } | 327 | } |
308 | 328 | ||
309 | /* | 329 | /* |
330 | * Clear hugetlb-related page reserves for children. This only | ||
331 | * affects MAP_PRIVATE mappings. Faults generated by the child | ||
332 | * are not guaranteed to succeed, even if read-only | ||
333 | */ | ||
334 | if (is_vm_hugetlb_page(tmp)) | ||
335 | reset_vma_resv_huge_pages(tmp); | ||
336 | |||
337 | /* | ||
310 | * Link in the new vma and copy the page table entries. | 338 | * Link in the new vma and copy the page table entries. |
311 | */ | 339 | */ |
312 | *pprev = tmp; | 340 | *pprev = tmp; |
@@ -374,7 +402,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
374 | INIT_LIST_HEAD(&mm->mmlist); | 402 | INIT_LIST_HEAD(&mm->mmlist); |
375 | mm->flags = (current->mm) ? current->mm->flags | 403 | mm->flags = (current->mm) ? current->mm->flags |
376 | : MMF_DUMP_FILTER_DEFAULT; | 404 | : MMF_DUMP_FILTER_DEFAULT; |
377 | mm->core_waiters = 0; | 405 | mm->core_state = NULL; |
378 | mm->nr_ptes = 0; | 406 | mm->nr_ptes = 0; |
379 | set_mm_counter(mm, file_rss, 0); | 407 | set_mm_counter(mm, file_rss, 0); |
380 | set_mm_counter(mm, anon_rss, 0); | 408 | set_mm_counter(mm, anon_rss, 0); |
@@ -387,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
387 | 415 | ||
388 | if (likely(!mm_alloc_pgd(mm))) { | 416 | if (likely(!mm_alloc_pgd(mm))) { |
389 | mm->def_flags = 0; | 417 | mm->def_flags = 0; |
418 | mmu_notifier_mm_init(mm); | ||
390 | return mm; | 419 | return mm; |
391 | } | 420 | } |
392 | 421 | ||
@@ -419,6 +448,7 @@ void __mmdrop(struct mm_struct *mm) | |||
419 | BUG_ON(mm == &init_mm); | 448 | BUG_ON(mm == &init_mm); |
420 | mm_free_pgd(mm); | 449 | mm_free_pgd(mm); |
421 | destroy_context(mm); | 450 | destroy_context(mm); |
451 | mmu_notifier_mm_destroy(mm); | ||
422 | free_mm(mm); | 452 | free_mm(mm); |
423 | } | 453 | } |
424 | EXPORT_SYMBOL_GPL(__mmdrop); | 454 | EXPORT_SYMBOL_GPL(__mmdrop); |
@@ -448,7 +478,7 @@ EXPORT_SYMBOL_GPL(mmput); | |||
448 | /** | 478 | /** |
449 | * get_task_mm - acquire a reference to the task's mm | 479 | * get_task_mm - acquire a reference to the task's mm |
450 | * | 480 | * |
451 | * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning | 481 | * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning |
452 | * this kernel workthread has transiently adopted a user mm with use_mm, | 482 | * this kernel workthread has transiently adopted a user mm with use_mm, |
453 | * to do its AIO) is not set and if so returns a reference to it, after | 483 | * to do its AIO) is not set and if so returns a reference to it, after |
454 | * bumping up the use count. User must release the mm via mmput() | 484 | * bumping up the use count. User must release the mm via mmput() |
@@ -461,7 +491,7 @@ struct mm_struct *get_task_mm(struct task_struct *task) | |||
461 | task_lock(task); | 491 | task_lock(task); |
462 | mm = task->mm; | 492 | mm = task->mm; |
463 | if (mm) { | 493 | if (mm) { |
464 | if (task->flags & PF_BORROWED_MM) | 494 | if (task->flags & PF_KTHREAD) |
465 | mm = NULL; | 495 | mm = NULL; |
466 | else | 496 | else |
467 | atomic_inc(&mm->mm_users); | 497 | atomic_inc(&mm->mm_users); |
@@ -630,13 +660,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old) | |||
630 | path_get(&old->root); | 660 | path_get(&old->root); |
631 | fs->pwd = old->pwd; | 661 | fs->pwd = old->pwd; |
632 | path_get(&old->pwd); | 662 | path_get(&old->pwd); |
633 | if (old->altroot.dentry) { | ||
634 | fs->altroot = old->altroot; | ||
635 | path_get(&old->altroot); | ||
636 | } else { | ||
637 | fs->altroot.mnt = NULL; | ||
638 | fs->altroot.dentry = NULL; | ||
639 | } | ||
640 | read_unlock(&old->lock); | 663 | read_unlock(&old->lock); |
641 | } | 664 | } |
642 | return fs; | 665 | return fs; |
@@ -786,6 +809,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
786 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 809 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
787 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 810 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
788 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 811 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
812 | task_io_accounting_init(&sig->ioac); | ||
789 | sig->sum_sched_runtime = 0; | 813 | sig->sum_sched_runtime = 0; |
790 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 814 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
791 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 815 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
@@ -833,8 +857,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
833 | 857 | ||
834 | new_flags &= ~PF_SUPERPRIV; | 858 | new_flags &= ~PF_SUPERPRIV; |
835 | new_flags |= PF_FORKNOEXEC; | 859 | new_flags |= PF_FORKNOEXEC; |
836 | if (!(clone_flags & CLONE_PTRACE)) | 860 | new_flags |= PF_STARTING; |
837 | p->ptrace = 0; | ||
838 | p->flags = new_flags; | 861 | p->flags = new_flags; |
839 | clear_freeze_flag(p); | 862 | clear_freeze_flag(p); |
840 | } | 863 | } |
@@ -875,7 +898,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
875 | struct pt_regs *regs, | 898 | struct pt_regs *regs, |
876 | unsigned long stack_size, | 899 | unsigned long stack_size, |
877 | int __user *child_tidptr, | 900 | int __user *child_tidptr, |
878 | struct pid *pid) | 901 | struct pid *pid, |
902 | int trace) | ||
879 | { | 903 | { |
880 | int retval; | 904 | int retval; |
881 | struct task_struct *p; | 905 | struct task_struct *p; |
@@ -968,13 +992,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
968 | p->last_switch_timestamp = 0; | 992 | p->last_switch_timestamp = 0; |
969 | #endif | 993 | #endif |
970 | 994 | ||
971 | #ifdef CONFIG_TASK_XACCT | 995 | task_io_accounting_init(&p->ioac); |
972 | p->rchar = 0; /* I/O counter: bytes read */ | ||
973 | p->wchar = 0; /* I/O counter: bytes written */ | ||
974 | p->syscr = 0; /* I/O counter: read syscalls */ | ||
975 | p->syscw = 0; /* I/O counter: write syscalls */ | ||
976 | #endif | ||
977 | task_io_accounting_init(p); | ||
978 | acct_clear_integrals(p); | 996 | acct_clear_integrals(p); |
979 | 997 | ||
980 | p->it_virt_expires = cputime_zero; | 998 | p->it_virt_expires = cputime_zero; |
@@ -1081,6 +1099,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1081 | if (clone_flags & CLONE_THREAD) | 1099 | if (clone_flags & CLONE_THREAD) |
1082 | p->tgid = current->tgid; | 1100 | p->tgid = current->tgid; |
1083 | 1101 | ||
1102 | if (current->nsproxy != p->nsproxy) { | ||
1103 | retval = ns_cgroup_clone(p, pid); | ||
1104 | if (retval) | ||
1105 | goto bad_fork_free_pid; | ||
1106 | } | ||
1107 | |||
1084 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1108 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
1085 | /* | 1109 | /* |
1086 | * Clear TID on mm_release()? | 1110 | * Clear TID on mm_release()? |
@@ -1125,8 +1149,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1125 | */ | 1149 | */ |
1126 | p->group_leader = p; | 1150 | p->group_leader = p; |
1127 | INIT_LIST_HEAD(&p->thread_group); | 1151 | INIT_LIST_HEAD(&p->thread_group); |
1128 | INIT_LIST_HEAD(&p->ptrace_entry); | ||
1129 | INIT_LIST_HEAD(&p->ptraced); | ||
1130 | 1152 | ||
1131 | /* Now that the task is set up, run cgroup callbacks if | 1153 | /* Now that the task is set up, run cgroup callbacks if |
1132 | * necessary. We need to run them before the task is visible | 1154 | * necessary. We need to run them before the task is visible |
@@ -1157,7 +1179,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1157 | p->real_parent = current->real_parent; | 1179 | p->real_parent = current->real_parent; |
1158 | else | 1180 | else |
1159 | p->real_parent = current; | 1181 | p->real_parent = current; |
1160 | p->parent = p->real_parent; | ||
1161 | 1182 | ||
1162 | spin_lock(¤t->sighand->siglock); | 1183 | spin_lock(¤t->sighand->siglock); |
1163 | 1184 | ||
@@ -1199,8 +1220,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1199 | 1220 | ||
1200 | if (likely(p->pid)) { | 1221 | if (likely(p->pid)) { |
1201 | list_add_tail(&p->sibling, &p->real_parent->children); | 1222 | list_add_tail(&p->sibling, &p->real_parent->children); |
1202 | if (unlikely(p->ptrace & PT_PTRACED)) | 1223 | tracehook_finish_clone(p, clone_flags, trace); |
1203 | __ptrace_link(p, current->parent); | ||
1204 | 1224 | ||
1205 | if (thread_group_leader(p)) { | 1225 | if (thread_group_leader(p)) { |
1206 | if (clone_flags & CLONE_NEWPID) | 1226 | if (clone_flags & CLONE_NEWPID) |
@@ -1285,29 +1305,13 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
1285 | struct pt_regs regs; | 1305 | struct pt_regs regs; |
1286 | 1306 | ||
1287 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, | 1307 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
1288 | &init_struct_pid); | 1308 | &init_struct_pid, 0); |
1289 | if (!IS_ERR(task)) | 1309 | if (!IS_ERR(task)) |
1290 | init_idle(task, cpu); | 1310 | init_idle(task, cpu); |
1291 | 1311 | ||
1292 | return task; | 1312 | return task; |
1293 | } | 1313 | } |
1294 | 1314 | ||
1295 | static int fork_traceflag(unsigned clone_flags) | ||
1296 | { | ||
1297 | if (clone_flags & CLONE_UNTRACED) | ||
1298 | return 0; | ||
1299 | else if (clone_flags & CLONE_VFORK) { | ||
1300 | if (current->ptrace & PT_TRACE_VFORK) | ||
1301 | return PTRACE_EVENT_VFORK; | ||
1302 | } else if ((clone_flags & CSIGNAL) != SIGCHLD) { | ||
1303 | if (current->ptrace & PT_TRACE_CLONE) | ||
1304 | return PTRACE_EVENT_CLONE; | ||
1305 | } else if (current->ptrace & PT_TRACE_FORK) | ||
1306 | return PTRACE_EVENT_FORK; | ||
1307 | |||
1308 | return 0; | ||
1309 | } | ||
1310 | |||
1311 | /* | 1315 | /* |
1312 | * Ok, this is the main fork-routine. | 1316 | * Ok, this is the main fork-routine. |
1313 | * | 1317 | * |
@@ -1342,14 +1346,14 @@ long do_fork(unsigned long clone_flags, | |||
1342 | } | 1346 | } |
1343 | } | 1347 | } |
1344 | 1348 | ||
1345 | if (unlikely(current->ptrace)) { | 1349 | /* |
1346 | trace = fork_traceflag (clone_flags); | 1350 | * When called from kernel_thread, don't do user tracing stuff. |
1347 | if (trace) | 1351 | */ |
1348 | clone_flags |= CLONE_PTRACE; | 1352 | if (likely(user_mode(regs))) |
1349 | } | 1353 | trace = tracehook_prepare_clone(clone_flags); |
1350 | 1354 | ||
1351 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1355 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
1352 | child_tidptr, NULL); | 1356 | child_tidptr, NULL, trace); |
1353 | /* | 1357 | /* |
1354 | * Do this prior waking up the new thread - the thread pointer | 1358 | * Do this prior waking up the new thread - the thread pointer |
1355 | * might get invalid after that point, if the thread exits quickly. | 1359 | * might get invalid after that point, if the thread exits quickly. |
@@ -1367,32 +1371,35 @@ long do_fork(unsigned long clone_flags, | |||
1367 | init_completion(&vfork); | 1371 | init_completion(&vfork); |
1368 | } | 1372 | } |
1369 | 1373 | ||
1370 | if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { | 1374 | tracehook_report_clone(trace, regs, clone_flags, nr, p); |
1375 | |||
1376 | /* | ||
1377 | * We set PF_STARTING at creation in case tracing wants to | ||
1378 | * use this to distinguish a fully live task from one that | ||
1379 | * hasn't gotten to tracehook_report_clone() yet. Now we | ||
1380 | * clear it and set the child going. | ||
1381 | */ | ||
1382 | p->flags &= ~PF_STARTING; | ||
1383 | |||
1384 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
1371 | /* | 1385 | /* |
1372 | * We'll start up with an immediate SIGSTOP. | 1386 | * We'll start up with an immediate SIGSTOP. |
1373 | */ | 1387 | */ |
1374 | sigaddset(&p->pending.signal, SIGSTOP); | 1388 | sigaddset(&p->pending.signal, SIGSTOP); |
1375 | set_tsk_thread_flag(p, TIF_SIGPENDING); | 1389 | set_tsk_thread_flag(p, TIF_SIGPENDING); |
1376 | } | ||
1377 | |||
1378 | if (!(clone_flags & CLONE_STOPPED)) | ||
1379 | wake_up_new_task(p, clone_flags); | ||
1380 | else | ||
1381 | __set_task_state(p, TASK_STOPPED); | 1390 | __set_task_state(p, TASK_STOPPED); |
1382 | 1391 | } else { | |
1383 | if (unlikely (trace)) { | 1392 | wake_up_new_task(p, clone_flags); |
1384 | current->ptrace_message = nr; | ||
1385 | ptrace_notify ((trace << 8) | SIGTRAP); | ||
1386 | } | 1393 | } |
1387 | 1394 | ||
1395 | tracehook_report_clone_complete(trace, regs, | ||
1396 | clone_flags, nr, p); | ||
1397 | |||
1388 | if (clone_flags & CLONE_VFORK) { | 1398 | if (clone_flags & CLONE_VFORK) { |
1389 | freezer_do_not_count(); | 1399 | freezer_do_not_count(); |
1390 | wait_for_completion(&vfork); | 1400 | wait_for_completion(&vfork); |
1391 | freezer_count(); | 1401 | freezer_count(); |
1392 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { | 1402 | tracehook_report_vfork_done(p, nr); |
1393 | current->ptrace_message = nr; | ||
1394 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | ||
1395 | } | ||
1396 | } | 1403 | } |
1397 | } else { | 1404 | } else { |
1398 | nr = PTR_ERR(p); | 1405 | nr = PTR_ERR(p); |
@@ -1404,7 +1411,7 @@ long do_fork(unsigned long clone_flags, | |||
1404 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1411 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
1405 | #endif | 1412 | #endif |
1406 | 1413 | ||
1407 | static void sighand_ctor(struct kmem_cache *cachep, void *data) | 1414 | static void sighand_ctor(void *data) |
1408 | { | 1415 | { |
1409 | struct sighand_struct *sighand = data; | 1416 | struct sighand_struct *sighand = data; |
1410 | 1417 | ||