diff options
| author | David S. Miller <davem@davemloft.net> | 2008-07-27 07:40:08 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2008-07-27 07:40:08 -0400 |
| commit | 15d3b4a26291c170563e2b25ded5de1324f93959 (patch) | |
| tree | 9bea548a7de5215c58a091d58f4eefdb92349f2c /kernel/fork.c | |
| parent | 2c3abab7c95295f319dc8899b74cbd60140fcdfb (diff) | |
| parent | 8be1a6d6c77ab4532e4476fdb8177030ef48b52c (diff) | |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 127 |
1 files changed, 71 insertions, 56 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index adefc1131f27..5e050c1317c4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -33,9 +33,11 @@ | |||
| 33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
| 34 | #include <linux/cgroup.h> | 34 | #include <linux/cgroup.h> |
| 35 | #include <linux/security.h> | 35 | #include <linux/security.h> |
| 36 | #include <linux/hugetlb.h> | ||
| 36 | #include <linux/swap.h> | 37 | #include <linux/swap.h> |
| 37 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
| 38 | #include <linux/jiffies.h> | 39 | #include <linux/jiffies.h> |
| 40 | #include <linux/tracehook.h> | ||
| 39 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
| 40 | #include <linux/task_io_accounting_ops.h> | 42 | #include <linux/task_io_accounting_ops.h> |
| 41 | #include <linux/rcupdate.h> | 43 | #include <linux/rcupdate.h> |
| @@ -92,6 +94,23 @@ int nr_processes(void) | |||
| 92 | static struct kmem_cache *task_struct_cachep; | 94 | static struct kmem_cache *task_struct_cachep; |
| 93 | #endif | 95 | #endif |
| 94 | 96 | ||
| 97 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | ||
| 98 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | ||
| 99 | { | ||
| 100 | #ifdef CONFIG_DEBUG_STACK_USAGE | ||
| 101 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | ||
| 102 | #else | ||
| 103 | gfp_t mask = GFP_KERNEL; | ||
| 104 | #endif | ||
| 105 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void free_thread_info(struct thread_info *ti) | ||
| 109 | { | ||
| 110 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | ||
| 111 | } | ||
| 112 | #endif | ||
| 113 | |||
| 95 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 114 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
| 96 | static struct kmem_cache *signal_cachep; | 115 | static struct kmem_cache *signal_cachep; |
| 97 | 116 | ||
| @@ -307,6 +326,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 307 | } | 326 | } |
| 308 | 327 | ||
| 309 | /* | 328 | /* |
| 329 | * Clear hugetlb-related page reserves for children. This only | ||
| 330 | * affects MAP_PRIVATE mappings. Faults generated by the child | ||
| 331 | * are not guaranteed to succeed, even if read-only | ||
| 332 | */ | ||
| 333 | if (is_vm_hugetlb_page(tmp)) | ||
| 334 | reset_vma_resv_huge_pages(tmp); | ||
| 335 | |||
| 336 | /* | ||
| 310 | * Link in the new vma and copy the page table entries. | 337 | * Link in the new vma and copy the page table entries. |
| 311 | */ | 338 | */ |
| 312 | *pprev = tmp; | 339 | *pprev = tmp; |
| @@ -374,7 +401,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
| 374 | INIT_LIST_HEAD(&mm->mmlist); | 401 | INIT_LIST_HEAD(&mm->mmlist); |
| 375 | mm->flags = (current->mm) ? current->mm->flags | 402 | mm->flags = (current->mm) ? current->mm->flags |
| 376 | : MMF_DUMP_FILTER_DEFAULT; | 403 | : MMF_DUMP_FILTER_DEFAULT; |
| 377 | mm->core_waiters = 0; | 404 | mm->core_state = NULL; |
| 378 | mm->nr_ptes = 0; | 405 | mm->nr_ptes = 0; |
| 379 | set_mm_counter(mm, file_rss, 0); | 406 | set_mm_counter(mm, file_rss, 0); |
| 380 | set_mm_counter(mm, anon_rss, 0); | 407 | set_mm_counter(mm, anon_rss, 0); |
| @@ -448,7 +475,7 @@ EXPORT_SYMBOL_GPL(mmput); | |||
| 448 | /** | 475 | /** |
| 449 | * get_task_mm - acquire a reference to the task's mm | 476 | * get_task_mm - acquire a reference to the task's mm |
| 450 | * | 477 | * |
| 451 | * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning | 478 | * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning |
| 452 | * this kernel workthread has transiently adopted a user mm with use_mm, | 479 | * this kernel workthread has transiently adopted a user mm with use_mm, |
| 453 | * to do its AIO) is not set and if so returns a reference to it, after | 480 | * to do its AIO) is not set and if so returns a reference to it, after |
| 454 | * bumping up the use count. User must release the mm via mmput() | 481 | * bumping up the use count. User must release the mm via mmput() |
| @@ -461,7 +488,7 @@ struct mm_struct *get_task_mm(struct task_struct *task) | |||
| 461 | task_lock(task); | 488 | task_lock(task); |
| 462 | mm = task->mm; | 489 | mm = task->mm; |
| 463 | if (mm) { | 490 | if (mm) { |
| 464 | if (task->flags & PF_BORROWED_MM) | 491 | if (task->flags & PF_KTHREAD) |
| 465 | mm = NULL; | 492 | mm = NULL; |
| 466 | else | 493 | else |
| 467 | atomic_inc(&mm->mm_users); | 494 | atomic_inc(&mm->mm_users); |
| @@ -630,13 +657,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old) | |||
| 630 | path_get(&old->root); | 657 | path_get(&old->root); |
| 631 | fs->pwd = old->pwd; | 658 | fs->pwd = old->pwd; |
| 632 | path_get(&old->pwd); | 659 | path_get(&old->pwd); |
| 633 | if (old->altroot.dentry) { | ||
| 634 | fs->altroot = old->altroot; | ||
| 635 | path_get(&old->altroot); | ||
| 636 | } else { | ||
| 637 | fs->altroot.mnt = NULL; | ||
| 638 | fs->altroot.dentry = NULL; | ||
| 639 | } | ||
| 640 | read_unlock(&old->lock); | 660 | read_unlock(&old->lock); |
| 641 | } | 661 | } |
| 642 | return fs; | 662 | return fs; |
| @@ -786,6 +806,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 786 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | 806 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; |
| 787 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | 807 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; |
| 788 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; | 808 | sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; |
| 809 | #ifdef CONFIG_TASK_XACCT | ||
| 810 | sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0; | ||
| 811 | #endif | ||
| 812 | #ifdef CONFIG_TASK_IO_ACCOUNTING | ||
| 813 | memset(&sig->ioac, 0, sizeof(sig->ioac)); | ||
| 814 | #endif | ||
| 789 | sig->sum_sched_runtime = 0; | 815 | sig->sum_sched_runtime = 0; |
| 790 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 816 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
| 791 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 817 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
| @@ -833,8 +859,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) | |||
| 833 | 859 | ||
| 834 | new_flags &= ~PF_SUPERPRIV; | 860 | new_flags &= ~PF_SUPERPRIV; |
| 835 | new_flags |= PF_FORKNOEXEC; | 861 | new_flags |= PF_FORKNOEXEC; |
| 836 | if (!(clone_flags & CLONE_PTRACE)) | 862 | new_flags |= PF_STARTING; |
| 837 | p->ptrace = 0; | ||
| 838 | p->flags = new_flags; | 863 | p->flags = new_flags; |
| 839 | clear_freeze_flag(p); | 864 | clear_freeze_flag(p); |
| 840 | } | 865 | } |
| @@ -875,7 +900,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 875 | struct pt_regs *regs, | 900 | struct pt_regs *regs, |
| 876 | unsigned long stack_size, | 901 | unsigned long stack_size, |
| 877 | int __user *child_tidptr, | 902 | int __user *child_tidptr, |
| 878 | struct pid *pid) | 903 | struct pid *pid, |
| 904 | int trace) | ||
| 879 | { | 905 | { |
| 880 | int retval; | 906 | int retval; |
| 881 | struct task_struct *p; | 907 | struct task_struct *p; |
| @@ -1081,6 +1107,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1081 | if (clone_flags & CLONE_THREAD) | 1107 | if (clone_flags & CLONE_THREAD) |
| 1082 | p->tgid = current->tgid; | 1108 | p->tgid = current->tgid; |
| 1083 | 1109 | ||
| 1110 | if (current->nsproxy != p->nsproxy) { | ||
| 1111 | retval = ns_cgroup_clone(p, pid); | ||
| 1112 | if (retval) | ||
| 1113 | goto bad_fork_free_pid; | ||
| 1114 | } | ||
| 1115 | |||
| 1084 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1116 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
| 1085 | /* | 1117 | /* |
| 1086 | * Clear TID on mm_release()? | 1118 | * Clear TID on mm_release()? |
| @@ -1125,8 +1157,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1125 | */ | 1157 | */ |
| 1126 | p->group_leader = p; | 1158 | p->group_leader = p; |
| 1127 | INIT_LIST_HEAD(&p->thread_group); | 1159 | INIT_LIST_HEAD(&p->thread_group); |
| 1128 | INIT_LIST_HEAD(&p->ptrace_entry); | ||
| 1129 | INIT_LIST_HEAD(&p->ptraced); | ||
| 1130 | 1160 | ||
| 1131 | /* Now that the task is set up, run cgroup callbacks if | 1161 | /* Now that the task is set up, run cgroup callbacks if |
| 1132 | * necessary. We need to run them before the task is visible | 1162 | * necessary. We need to run them before the task is visible |
| @@ -1157,7 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1157 | p->real_parent = current->real_parent; | 1187 | p->real_parent = current->real_parent; |
| 1158 | else | 1188 | else |
| 1159 | p->real_parent = current; | 1189 | p->real_parent = current; |
| 1160 | p->parent = p->real_parent; | ||
| 1161 | 1190 | ||
| 1162 | spin_lock(¤t->sighand->siglock); | 1191 | spin_lock(¤t->sighand->siglock); |
| 1163 | 1192 | ||
| @@ -1199,8 +1228,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1199 | 1228 | ||
| 1200 | if (likely(p->pid)) { | 1229 | if (likely(p->pid)) { |
| 1201 | list_add_tail(&p->sibling, &p->real_parent->children); | 1230 | list_add_tail(&p->sibling, &p->real_parent->children); |
| 1202 | if (unlikely(p->ptrace & PT_PTRACED)) | 1231 | tracehook_finish_clone(p, clone_flags, trace); |
| 1203 | __ptrace_link(p, current->parent); | ||
| 1204 | 1232 | ||
| 1205 | if (thread_group_leader(p)) { | 1233 | if (thread_group_leader(p)) { |
| 1206 | if (clone_flags & CLONE_NEWPID) | 1234 | if (clone_flags & CLONE_NEWPID) |
| @@ -1285,29 +1313,13 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
| 1285 | struct pt_regs regs; | 1313 | struct pt_regs regs; |
| 1286 | 1314 | ||
| 1287 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, | 1315 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
| 1288 | &init_struct_pid); | 1316 | &init_struct_pid, 0); |
| 1289 | if (!IS_ERR(task)) | 1317 | if (!IS_ERR(task)) |
| 1290 | init_idle(task, cpu); | 1318 | init_idle(task, cpu); |
| 1291 | 1319 | ||
| 1292 | return task; | 1320 | return task; |
| 1293 | } | 1321 | } |
| 1294 | 1322 | ||
| 1295 | static int fork_traceflag(unsigned clone_flags) | ||
| 1296 | { | ||
| 1297 | if (clone_flags & CLONE_UNTRACED) | ||
| 1298 | return 0; | ||
| 1299 | else if (clone_flags & CLONE_VFORK) { | ||
| 1300 | if (current->ptrace & PT_TRACE_VFORK) | ||
| 1301 | return PTRACE_EVENT_VFORK; | ||
| 1302 | } else if ((clone_flags & CSIGNAL) != SIGCHLD) { | ||
| 1303 | if (current->ptrace & PT_TRACE_CLONE) | ||
| 1304 | return PTRACE_EVENT_CLONE; | ||
| 1305 | } else if (current->ptrace & PT_TRACE_FORK) | ||
| 1306 | return PTRACE_EVENT_FORK; | ||
| 1307 | |||
| 1308 | return 0; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | /* | 1323 | /* |
| 1312 | * Ok, this is the main fork-routine. | 1324 | * Ok, this is the main fork-routine. |
| 1313 | * | 1325 | * |
| @@ -1342,14 +1354,14 @@ long do_fork(unsigned long clone_flags, | |||
| 1342 | } | 1354 | } |
| 1343 | } | 1355 | } |
| 1344 | 1356 | ||
| 1345 | if (unlikely(current->ptrace)) { | 1357 | /* |
| 1346 | trace = fork_traceflag (clone_flags); | 1358 | * When called from kernel_thread, don't do user tracing stuff. |
| 1347 | if (trace) | 1359 | */ |
| 1348 | clone_flags |= CLONE_PTRACE; | 1360 | if (likely(user_mode(regs))) |
| 1349 | } | 1361 | trace = tracehook_prepare_clone(clone_flags); |
| 1350 | 1362 | ||
| 1351 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1363 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
| 1352 | child_tidptr, NULL); | 1364 | child_tidptr, NULL, trace); |
| 1353 | /* | 1365 | /* |
| 1354 | * Do this prior waking up the new thread - the thread pointer | 1366 | * Do this prior waking up the new thread - the thread pointer |
| 1355 | * might get invalid after that point, if the thread exits quickly. | 1367 | * might get invalid after that point, if the thread exits quickly. |
| @@ -1367,32 +1379,35 @@ long do_fork(unsigned long clone_flags, | |||
| 1367 | init_completion(&vfork); | 1379 | init_completion(&vfork); |
| 1368 | } | 1380 | } |
| 1369 | 1381 | ||
| 1370 | if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { | 1382 | tracehook_report_clone(trace, regs, clone_flags, nr, p); |
| 1383 | |||
| 1384 | /* | ||
| 1385 | * We set PF_STARTING at creation in case tracing wants to | ||
| 1386 | * use this to distinguish a fully live task from one that | ||
| 1387 | * hasn't gotten to tracehook_report_clone() yet. Now we | ||
| 1388 | * clear it and set the child going. | ||
| 1389 | */ | ||
| 1390 | p->flags &= ~PF_STARTING; | ||
| 1391 | |||
| 1392 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
| 1371 | /* | 1393 | /* |
| 1372 | * We'll start up with an immediate SIGSTOP. | 1394 | * We'll start up with an immediate SIGSTOP. |
| 1373 | */ | 1395 | */ |
| 1374 | sigaddset(&p->pending.signal, SIGSTOP); | 1396 | sigaddset(&p->pending.signal, SIGSTOP); |
| 1375 | set_tsk_thread_flag(p, TIF_SIGPENDING); | 1397 | set_tsk_thread_flag(p, TIF_SIGPENDING); |
| 1376 | } | ||
| 1377 | |||
| 1378 | if (!(clone_flags & CLONE_STOPPED)) | ||
| 1379 | wake_up_new_task(p, clone_flags); | ||
| 1380 | else | ||
| 1381 | __set_task_state(p, TASK_STOPPED); | 1398 | __set_task_state(p, TASK_STOPPED); |
| 1382 | 1399 | } else { | |
| 1383 | if (unlikely (trace)) { | 1400 | wake_up_new_task(p, clone_flags); |
| 1384 | current->ptrace_message = nr; | ||
| 1385 | ptrace_notify ((trace << 8) | SIGTRAP); | ||
| 1386 | } | 1401 | } |
| 1387 | 1402 | ||
| 1403 | tracehook_report_clone_complete(trace, regs, | ||
| 1404 | clone_flags, nr, p); | ||
| 1405 | |||
| 1388 | if (clone_flags & CLONE_VFORK) { | 1406 | if (clone_flags & CLONE_VFORK) { |
| 1389 | freezer_do_not_count(); | 1407 | freezer_do_not_count(); |
| 1390 | wait_for_completion(&vfork); | 1408 | wait_for_completion(&vfork); |
| 1391 | freezer_count(); | 1409 | freezer_count(); |
| 1392 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { | 1410 | tracehook_report_vfork_done(p, nr); |
| 1393 | current->ptrace_message = nr; | ||
| 1394 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | ||
| 1395 | } | ||
| 1396 | } | 1411 | } |
| 1397 | } else { | 1412 | } else { |
| 1398 | nr = PTR_ERR(p); | 1413 | nr = PTR_ERR(p); |
| @@ -1404,7 +1419,7 @@ long do_fork(unsigned long clone_flags, | |||
| 1404 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1419 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
| 1405 | #endif | 1420 | #endif |
| 1406 | 1421 | ||
| 1407 | static void sighand_ctor(struct kmem_cache *cachep, void *data) | 1422 | static void sighand_ctor(void *data) |
| 1408 | { | 1423 | { |
| 1409 | struct sighand_struct *sighand = data; | 1424 | struct sighand_struct *sighand = data; |
| 1410 | 1425 | ||
