diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 124 |
1 files changed, 76 insertions, 48 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 0276c30401a0..e7ceaca89609 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -37,7 +37,6 @@ | |||
| 37 | #include <linux/swap.h> | 37 | #include <linux/swap.h> |
| 38 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
| 39 | #include <linux/jiffies.h> | 39 | #include <linux/jiffies.h> |
| 40 | #include <linux/tracehook.h> | ||
| 41 | #include <linux/futex.h> | 40 | #include <linux/futex.h> |
| 42 | #include <linux/compat.h> | 41 | #include <linux/compat.h> |
| 43 | #include <linux/kthread.h> | 42 | #include <linux/kthread.h> |
| @@ -81,7 +80,7 @@ | |||
| 81 | * Protected counters by write_lock_irq(&tasklist_lock) | 80 | * Protected counters by write_lock_irq(&tasklist_lock) |
| 82 | */ | 81 | */ |
| 83 | unsigned long total_forks; /* Handle normal Linux uptimes. */ | 82 | unsigned long total_forks; /* Handle normal Linux uptimes. */ |
| 84 | int nr_threads; /* The idle threads do not count.. */ | 83 | int nr_threads; /* The idle threads do not count.. */ |
| 85 | 84 | ||
| 86 | int max_threads; /* tunable limit on nr_threads */ | 85 | int max_threads; /* tunable limit on nr_threads */ |
| 87 | 86 | ||
| @@ -233,7 +232,7 @@ void __init fork_init(unsigned long mempages) | |||
| 233 | /* | 232 | /* |
| 234 | * we need to allow at least 20 threads to boot a system | 233 | * we need to allow at least 20 threads to boot a system |
| 235 | */ | 234 | */ |
| 236 | if(max_threads < 20) | 235 | if (max_threads < 20) |
| 237 | max_threads = 20; | 236 | max_threads = 20; |
| 238 | 237 | ||
| 239 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; | 238 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; |
| @@ -269,7 +268,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 269 | return NULL; | 268 | return NULL; |
| 270 | } | 269 | } |
| 271 | 270 | ||
| 272 | err = arch_dup_task_struct(tsk, orig); | 271 | err = arch_dup_task_struct(tsk, orig); |
| 273 | if (err) | 272 | if (err) |
| 274 | goto out; | 273 | goto out; |
| 275 | 274 | ||
| @@ -289,9 +288,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 289 | tsk->stack_canary = get_random_int(); | 288 | tsk->stack_canary = get_random_int(); |
| 290 | #endif | 289 | #endif |
| 291 | 290 | ||
| 292 | /* One for us, one for whoever does the "release_task()" (usually parent) */ | 291 | /* |
| 293 | atomic_set(&tsk->usage,2); | 292 | * One for us, one for whoever does the "release_task()" (usually |
| 294 | atomic_set(&tsk->fs_excl, 0); | 293 | * parent) |
| 294 | */ | ||
| 295 | atomic_set(&tsk->usage, 2); | ||
| 295 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 296 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
| 296 | tsk->btrace_seq = 0; | 297 | tsk->btrace_seq = 0; |
| 297 | #endif | 298 | #endif |
| @@ -439,7 +440,7 @@ fail_nomem: | |||
| 439 | goto out; | 440 | goto out; |
| 440 | } | 441 | } |
| 441 | 442 | ||
| 442 | static inline int mm_alloc_pgd(struct mm_struct * mm) | 443 | static inline int mm_alloc_pgd(struct mm_struct *mm) |
| 443 | { | 444 | { |
| 444 | mm->pgd = pgd_alloc(mm); | 445 | mm->pgd = pgd_alloc(mm); |
| 445 | if (unlikely(!mm->pgd)) | 446 | if (unlikely(!mm->pgd)) |
| @@ -447,7 +448,7 @@ static inline int mm_alloc_pgd(struct mm_struct * mm) | |||
| 447 | return 0; | 448 | return 0; |
| 448 | } | 449 | } |
| 449 | 450 | ||
| 450 | static inline void mm_free_pgd(struct mm_struct * mm) | 451 | static inline void mm_free_pgd(struct mm_struct *mm) |
| 451 | { | 452 | { |
| 452 | pgd_free(mm, mm->pgd); | 453 | pgd_free(mm, mm->pgd); |
| 453 | } | 454 | } |
| @@ -484,7 +485,7 @@ static void mm_init_aio(struct mm_struct *mm) | |||
| 484 | #endif | 485 | #endif |
| 485 | } | 486 | } |
| 486 | 487 | ||
| 487 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 488 | static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) |
| 488 | { | 489 | { |
| 489 | atomic_set(&mm->mm_users, 1); | 490 | atomic_set(&mm->mm_users, 1); |
| 490 | atomic_set(&mm->mm_count, 1); | 491 | atomic_set(&mm->mm_count, 1); |
| @@ -515,9 +516,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
| 515 | /* | 516 | /* |
| 516 | * Allocate and initialize an mm_struct. | 517 | * Allocate and initialize an mm_struct. |
| 517 | */ | 518 | */ |
| 518 | struct mm_struct * mm_alloc(void) | 519 | struct mm_struct *mm_alloc(void) |
| 519 | { | 520 | { |
| 520 | struct mm_struct * mm; | 521 | struct mm_struct *mm; |
| 521 | 522 | ||
| 522 | mm = allocate_mm(); | 523 | mm = allocate_mm(); |
| 523 | if (!mm) | 524 | if (!mm) |
| @@ -585,7 +586,7 @@ void added_exe_file_vma(struct mm_struct *mm) | |||
| 585 | void removed_exe_file_vma(struct mm_struct *mm) | 586 | void removed_exe_file_vma(struct mm_struct *mm) |
| 586 | { | 587 | { |
| 587 | mm->num_exe_file_vmas--; | 588 | mm->num_exe_file_vmas--; |
| 588 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ | 589 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file) { |
| 589 | fput(mm->exe_file); | 590 | fput(mm->exe_file); |
| 590 | mm->exe_file = NULL; | 591 | mm->exe_file = NULL; |
| 591 | } | 592 | } |
| @@ -777,9 +778,9 @@ fail_nocontext: | |||
| 777 | return NULL; | 778 | return NULL; |
| 778 | } | 779 | } |
| 779 | 780 | ||
| 780 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | 781 | static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) |
| 781 | { | 782 | { |
| 782 | struct mm_struct * mm, *oldmm; | 783 | struct mm_struct *mm, *oldmm; |
| 783 | int retval; | 784 | int retval; |
| 784 | 785 | ||
| 785 | tsk->min_flt = tsk->maj_flt = 0; | 786 | tsk->min_flt = tsk->maj_flt = 0; |
| @@ -846,7 +847,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) | |||
| 846 | return 0; | 847 | return 0; |
| 847 | } | 848 | } |
| 848 | 849 | ||
| 849 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 850 | static int copy_files(unsigned long clone_flags, struct task_struct *tsk) |
| 850 | { | 851 | { |
| 851 | struct files_struct *oldf, *newf; | 852 | struct files_struct *oldf, *newf; |
| 852 | int error = 0; | 853 | int error = 0; |
| @@ -1013,7 +1014,7 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
| 1013 | { | 1014 | { |
| 1014 | raw_spin_lock_init(&p->pi_lock); | 1015 | raw_spin_lock_init(&p->pi_lock); |
| 1015 | #ifdef CONFIG_RT_MUTEXES | 1016 | #ifdef CONFIG_RT_MUTEXES |
| 1016 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); | 1017 | plist_head_init(&p->pi_waiters); |
| 1017 | p->pi_blocked_on = NULL; | 1018 | p->pi_blocked_on = NULL; |
| 1018 | #endif | 1019 | #endif |
| 1019 | } | 1020 | } |
| @@ -1168,13 +1169,17 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1168 | cgroup_fork(p); | 1169 | cgroup_fork(p); |
| 1169 | #ifdef CONFIG_NUMA | 1170 | #ifdef CONFIG_NUMA |
| 1170 | p->mempolicy = mpol_dup(p->mempolicy); | 1171 | p->mempolicy = mpol_dup(p->mempolicy); |
| 1171 | if (IS_ERR(p->mempolicy)) { | 1172 | if (IS_ERR(p->mempolicy)) { |
| 1172 | retval = PTR_ERR(p->mempolicy); | 1173 | retval = PTR_ERR(p->mempolicy); |
| 1173 | p->mempolicy = NULL; | 1174 | p->mempolicy = NULL; |
| 1174 | goto bad_fork_cleanup_cgroup; | 1175 | goto bad_fork_cleanup_cgroup; |
| 1175 | } | 1176 | } |
| 1176 | mpol_fix_fork_child_flag(p); | 1177 | mpol_fix_fork_child_flag(p); |
| 1177 | #endif | 1178 | #endif |
| 1179 | #ifdef CONFIG_CPUSETS | ||
| 1180 | p->cpuset_mem_spread_rotor = NUMA_NO_NODE; | ||
| 1181 | p->cpuset_slab_spread_rotor = NUMA_NO_NODE; | ||
| 1182 | #endif | ||
| 1178 | #ifdef CONFIG_TRACE_IRQFLAGS | 1183 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 1179 | p->irq_events = 0; | 1184 | p->irq_events = 0; |
| 1180 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 1185 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
| @@ -1214,25 +1219,33 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1214 | retval = perf_event_init_task(p); | 1219 | retval = perf_event_init_task(p); |
| 1215 | if (retval) | 1220 | if (retval) |
| 1216 | goto bad_fork_cleanup_policy; | 1221 | goto bad_fork_cleanup_policy; |
| 1217 | 1222 | retval = audit_alloc(p); | |
| 1218 | if ((retval = audit_alloc(p))) | 1223 | if (retval) |
| 1219 | goto bad_fork_cleanup_policy; | 1224 | goto bad_fork_cleanup_policy; |
| 1220 | /* copy all the process information */ | 1225 | /* copy all the process information */ |
| 1221 | if ((retval = copy_semundo(clone_flags, p))) | 1226 | retval = copy_semundo(clone_flags, p); |
| 1227 | if (retval) | ||
| 1222 | goto bad_fork_cleanup_audit; | 1228 | goto bad_fork_cleanup_audit; |
| 1223 | if ((retval = copy_files(clone_flags, p))) | 1229 | retval = copy_files(clone_flags, p); |
| 1230 | if (retval) | ||
| 1224 | goto bad_fork_cleanup_semundo; | 1231 | goto bad_fork_cleanup_semundo; |
| 1225 | if ((retval = copy_fs(clone_flags, p))) | 1232 | retval = copy_fs(clone_flags, p); |
| 1233 | if (retval) | ||
| 1226 | goto bad_fork_cleanup_files; | 1234 | goto bad_fork_cleanup_files; |
| 1227 | if ((retval = copy_sighand(clone_flags, p))) | 1235 | retval = copy_sighand(clone_flags, p); |
| 1236 | if (retval) | ||
| 1228 | goto bad_fork_cleanup_fs; | 1237 | goto bad_fork_cleanup_fs; |
| 1229 | if ((retval = copy_signal(clone_flags, p))) | 1238 | retval = copy_signal(clone_flags, p); |
| 1239 | if (retval) | ||
| 1230 | goto bad_fork_cleanup_sighand; | 1240 | goto bad_fork_cleanup_sighand; |
| 1231 | if ((retval = copy_mm(clone_flags, p))) | 1241 | retval = copy_mm(clone_flags, p); |
| 1242 | if (retval) | ||
| 1232 | goto bad_fork_cleanup_signal; | 1243 | goto bad_fork_cleanup_signal; |
| 1233 | if ((retval = copy_namespaces(clone_flags, p))) | 1244 | retval = copy_namespaces(clone_flags, p); |
| 1245 | if (retval) | ||
| 1234 | goto bad_fork_cleanup_mm; | 1246 | goto bad_fork_cleanup_mm; |
| 1235 | if ((retval = copy_io(clone_flags, p))) | 1247 | retval = copy_io(clone_flags, p); |
| 1248 | if (retval) | ||
| 1236 | goto bad_fork_cleanup_namespaces; | 1249 | goto bad_fork_cleanup_namespaces; |
| 1237 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); | 1250 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); |
| 1238 | if (retval) | 1251 | if (retval) |
| @@ -1254,7 +1267,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1254 | /* | 1267 | /* |
| 1255 | * Clear TID on mm_release()? | 1268 | * Clear TID on mm_release()? |
| 1256 | */ | 1269 | */ |
| 1257 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1270 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; |
| 1258 | #ifdef CONFIG_BLOCK | 1271 | #ifdef CONFIG_BLOCK |
| 1259 | p->plug = NULL; | 1272 | p->plug = NULL; |
| 1260 | #endif | 1273 | #endif |
| @@ -1322,7 +1335,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1322 | * it's process group. | 1335 | * it's process group. |
| 1323 | * A fatal signal pending means that current will exit, so the new | 1336 | * A fatal signal pending means that current will exit, so the new |
| 1324 | * thread can't slip out of an OOM kill (or normal SIGKILL). | 1337 | * thread can't slip out of an OOM kill (or normal SIGKILL). |
| 1325 | */ | 1338 | */ |
| 1326 | recalc_sigpending(); | 1339 | recalc_sigpending(); |
| 1327 | if (signal_pending(current)) { | 1340 | if (signal_pending(current)) { |
| 1328 | spin_unlock(¤t->sighand->siglock); | 1341 | spin_unlock(¤t->sighand->siglock); |
| @@ -1340,7 +1353,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1340 | } | 1353 | } |
| 1341 | 1354 | ||
| 1342 | if (likely(p->pid)) { | 1355 | if (likely(p->pid)) { |
| 1343 | tracehook_finish_clone(p, clone_flags, trace); | 1356 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
| 1344 | 1357 | ||
| 1345 | if (thread_group_leader(p)) { | 1358 | if (thread_group_leader(p)) { |
| 1346 | if (is_child_reaper(pid)) | 1359 | if (is_child_reaper(pid)) |
| @@ -1481,10 +1494,22 @@ long do_fork(unsigned long clone_flags, | |||
| 1481 | } | 1494 | } |
| 1482 | 1495 | ||
| 1483 | /* | 1496 | /* |
| 1484 | * When called from kernel_thread, don't do user tracing stuff. | 1497 | * Determine whether and which event to report to ptracer. When |
| 1498 | * called from kernel_thread or CLONE_UNTRACED is explicitly | ||
| 1499 | * requested, no event is reported; otherwise, report if the event | ||
| 1500 | * for the type of forking is enabled. | ||
| 1485 | */ | 1501 | */ |
| 1486 | if (likely(user_mode(regs))) | 1502 | if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { |
| 1487 | trace = tracehook_prepare_clone(clone_flags); | 1503 | if (clone_flags & CLONE_VFORK) |
| 1504 | trace = PTRACE_EVENT_VFORK; | ||
| 1505 | else if ((clone_flags & CSIGNAL) != SIGCHLD) | ||
| 1506 | trace = PTRACE_EVENT_CLONE; | ||
| 1507 | else | ||
| 1508 | trace = PTRACE_EVENT_FORK; | ||
| 1509 | |||
| 1510 | if (likely(!ptrace_event_enabled(current, trace))) | ||
| 1511 | trace = 0; | ||
| 1512 | } | ||
| 1488 | 1513 | ||
| 1489 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1514 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
| 1490 | child_tidptr, NULL, trace); | 1515 | child_tidptr, NULL, trace); |
| @@ -1508,26 +1533,26 @@ long do_fork(unsigned long clone_flags, | |||
| 1508 | } | 1533 | } |
| 1509 | 1534 | ||
| 1510 | audit_finish_fork(p); | 1535 | audit_finish_fork(p); |
| 1511 | tracehook_report_clone(regs, clone_flags, nr, p); | ||
| 1512 | 1536 | ||
| 1513 | /* | 1537 | /* |
| 1514 | * We set PF_STARTING at creation in case tracing wants to | 1538 | * We set PF_STARTING at creation in case tracing wants to |
| 1515 | * use this to distinguish a fully live task from one that | 1539 | * use this to distinguish a fully live task from one that |
| 1516 | * hasn't gotten to tracehook_report_clone() yet. Now we | 1540 | * hasn't finished SIGSTOP raising yet. Now we clear it |
| 1517 | * clear it and set the child going. | 1541 | * and set the child going. |
| 1518 | */ | 1542 | */ |
| 1519 | p->flags &= ~PF_STARTING; | 1543 | p->flags &= ~PF_STARTING; |
| 1520 | 1544 | ||
| 1521 | wake_up_new_task(p); | 1545 | wake_up_new_task(p); |
| 1522 | 1546 | ||
| 1523 | tracehook_report_clone_complete(trace, regs, | 1547 | /* forking complete and child started to run, tell ptracer */ |
| 1524 | clone_flags, nr, p); | 1548 | if (unlikely(trace)) |
| 1549 | ptrace_event(trace, nr); | ||
| 1525 | 1550 | ||
| 1526 | if (clone_flags & CLONE_VFORK) { | 1551 | if (clone_flags & CLONE_VFORK) { |
| 1527 | freezer_do_not_count(); | 1552 | freezer_do_not_count(); |
| 1528 | wait_for_completion(&vfork); | 1553 | wait_for_completion(&vfork); |
| 1529 | freezer_count(); | 1554 | freezer_count(); |
| 1530 | tracehook_report_vfork_done(p, nr); | 1555 | ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); |
| 1531 | } | 1556 | } |
| 1532 | } else { | 1557 | } else { |
| 1533 | nr = PTR_ERR(p); | 1558 | nr = PTR_ERR(p); |
| @@ -1574,6 +1599,7 @@ void __init proc_caches_init(void) | |||
| 1574 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1599 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
| 1575 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); | 1600 | vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); |
| 1576 | mmap_init(); | 1601 | mmap_init(); |
| 1602 | nsproxy_cache_init(); | ||
| 1577 | } | 1603 | } |
| 1578 | 1604 | ||
| 1579 | /* | 1605 | /* |
| @@ -1670,12 +1696,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
| 1670 | */ | 1696 | */ |
| 1671 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | 1697 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
| 1672 | do_sysvsem = 1; | 1698 | do_sysvsem = 1; |
| 1673 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1699 | err = unshare_fs(unshare_flags, &new_fs); |
| 1700 | if (err) | ||
| 1674 | goto bad_unshare_out; | 1701 | goto bad_unshare_out; |
| 1675 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1702 | err = unshare_fd(unshare_flags, &new_fd); |
| 1703 | if (err) | ||
| 1676 | goto bad_unshare_cleanup_fs; | 1704 | goto bad_unshare_cleanup_fs; |
| 1677 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1705 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); |
| 1678 | new_fs))) | 1706 | if (err) |
| 1679 | goto bad_unshare_cleanup_fd; | 1707 | goto bad_unshare_cleanup_fd; |
| 1680 | 1708 | ||
| 1681 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { | 1709 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
