aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c239
1 files changed, 198 insertions, 41 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 61667909ce83..d8ae0f1b4148 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -677,7 +677,6 @@ void __mmdrop(struct mm_struct *mm)
677 WARN_ON_ONCE(mm == current->active_mm); 677 WARN_ON_ONCE(mm == current->active_mm);
678 mm_free_pgd(mm); 678 mm_free_pgd(mm);
679 destroy_context(mm); 679 destroy_context(mm);
680 hmm_mm_destroy(mm);
681 mmu_notifier_mm_destroy(mm); 680 mmu_notifier_mm_destroy(mm);
682 check_mm(mm); 681 check_mm(mm);
683 put_user_ns(mm->user_ns); 682 put_user_ns(mm->user_ns);
@@ -898,6 +897,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
898#ifdef CONFIG_STACKPROTECTOR 897#ifdef CONFIG_STACKPROTECTOR
899 tsk->stack_canary = get_random_canary(); 898 tsk->stack_canary = get_random_canary();
900#endif 899#endif
900 if (orig->cpus_ptr == &orig->cpus_mask)
901 tsk->cpus_ptr = &tsk->cpus_mask;
901 902
902 /* 903 /*
903 * One for us, one for whoever does the "release_task()" (usually 904 * One for us, one for whoever does the "release_task()" (usually
@@ -1709,8 +1710,34 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
1709} 1710}
1710#endif 1711#endif
1711 1712
1713/*
1714 * Poll support for process exit notification.
1715 */
1716static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
1717{
1718 struct task_struct *task;
1719 struct pid *pid = file->private_data;
1720 int poll_flags = 0;
1721
1722 poll_wait(file, &pid->wait_pidfd, pts);
1723
1724 rcu_read_lock();
1725 task = pid_task(pid, PIDTYPE_PID);
1726 /*
1727 * Inform pollers only when the whole thread group exits.
1728 * If the thread group leader exits before all other threads in the
1729 * group, then poll(2) should block, similar to the wait(2) family.
1730 */
1731 if (!task || (task->exit_state && thread_group_empty(task)))
1732 poll_flags = POLLIN | POLLRDNORM;
1733 rcu_read_unlock();
1734
1735 return poll_flags;
1736}
1737
1712const struct file_operations pidfd_fops = { 1738const struct file_operations pidfd_fops = {
1713 .release = pidfd_release, 1739 .release = pidfd_release,
1740 .poll = pidfd_poll,
1714#ifdef CONFIG_PROC_FS 1741#ifdef CONFIG_PROC_FS
1715 .show_fdinfo = pidfd_show_fdinfo, 1742 .show_fdinfo = pidfd_show_fdinfo,
1716#endif 1743#endif
@@ -1740,20 +1767,16 @@ static __always_inline void delayed_free_task(struct task_struct *tsk)
1740 * flags). The actual kick-off is left to the caller. 1767 * flags). The actual kick-off is left to the caller.
1741 */ 1768 */
1742static __latent_entropy struct task_struct *copy_process( 1769static __latent_entropy struct task_struct *copy_process(
1743 unsigned long clone_flags,
1744 unsigned long stack_start,
1745 unsigned long stack_size,
1746 int __user *parent_tidptr,
1747 int __user *child_tidptr,
1748 struct pid *pid, 1770 struct pid *pid,
1749 int trace, 1771 int trace,
1750 unsigned long tls, 1772 int node,
1751 int node) 1773 struct kernel_clone_args *args)
1752{ 1774{
1753 int pidfd = -1, retval; 1775 int pidfd = -1, retval;
1754 struct task_struct *p; 1776 struct task_struct *p;
1755 struct multiprocess_signals delayed; 1777 struct multiprocess_signals delayed;
1756 struct file *pidfile = NULL; 1778 struct file *pidfile = NULL;
1779 u64 clone_flags = args->flags;
1757 1780
1758 /* 1781 /*
1759 * Don't allow sharing the root directory with processes in a different 1782 * Don't allow sharing the root directory with processes in a different
@@ -1803,14 +1826,11 @@ static __latent_entropy struct task_struct *copy_process(
1803 1826
1804 if (clone_flags & CLONE_PIDFD) { 1827 if (clone_flags & CLONE_PIDFD) {
1805 /* 1828 /*
1806 * - CLONE_PARENT_SETTID is useless for pidfds and also
1807 * parent_tidptr is used to return pidfds.
1808 * - CLONE_DETACHED is blocked so that we can potentially 1829 * - CLONE_DETACHED is blocked so that we can potentially
1809 * reuse it later for CLONE_PIDFD. 1830 * reuse it later for CLONE_PIDFD.
1810 * - CLONE_THREAD is blocked until someone really needs it. 1831 * - CLONE_THREAD is blocked until someone really needs it.
1811 */ 1832 */
1812 if (clone_flags & 1833 if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
1813 (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
1814 return ERR_PTR(-EINVAL); 1834 return ERR_PTR(-EINVAL);
1815 } 1835 }
1816 1836
@@ -1843,11 +1863,11 @@ static __latent_entropy struct task_struct *copy_process(
1843 * p->set_child_tid which is (ab)used as a kthread's data pointer for 1863 * p->set_child_tid which is (ab)used as a kthread's data pointer for
1844 * kernel threads (PF_KTHREAD). 1864 * kernel threads (PF_KTHREAD).
1845 */ 1865 */
1846 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1866 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
1847 /* 1867 /*
1848 * Clear TID on mm_release()? 1868 * Clear TID on mm_release()?
1849 */ 1869 */
1850 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; 1870 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
1851 1871
1852 ftrace_graph_init_task(p); 1872 ftrace_graph_init_task(p);
1853 1873
@@ -1952,9 +1972,6 @@ static __latent_entropy struct task_struct *copy_process(
1952 p->pagefault_disabled = 0; 1972 p->pagefault_disabled = 0;
1953 1973
1954#ifdef CONFIG_LOCKDEP 1974#ifdef CONFIG_LOCKDEP
1955 p->lockdep_depth = 0; /* no locks held yet */
1956 p->curr_chain_key = 0;
1957 p->lockdep_recursion = 0;
1958 lockdep_init_task(p); 1975 lockdep_init_task(p);
1959#endif 1976#endif
1960 1977
@@ -2006,7 +2023,8 @@ static __latent_entropy struct task_struct *copy_process(
2006 retval = copy_io(clone_flags, p); 2023 retval = copy_io(clone_flags, p);
2007 if (retval) 2024 if (retval)
2008 goto bad_fork_cleanup_namespaces; 2025 goto bad_fork_cleanup_namespaces;
2009 retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); 2026 retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p,
2027 args->tls);
2010 if (retval) 2028 if (retval)
2011 goto bad_fork_cleanup_io; 2029 goto bad_fork_cleanup_io;
2012 2030
@@ -2036,11 +2054,12 @@ static __latent_entropy struct task_struct *copy_process(
2036 O_RDWR | O_CLOEXEC); 2054 O_RDWR | O_CLOEXEC);
2037 if (IS_ERR(pidfile)) { 2055 if (IS_ERR(pidfile)) {
2038 put_unused_fd(pidfd); 2056 put_unused_fd(pidfd);
2057 retval = PTR_ERR(pidfile);
2039 goto bad_fork_free_pid; 2058 goto bad_fork_free_pid;
2040 } 2059 }
2041 get_pid(pid); /* held by pidfile now */ 2060 get_pid(pid); /* held by pidfile now */
2042 2061
2043 retval = put_user(pidfd, parent_tidptr); 2062 retval = put_user(pidfd, args->pidfd);
2044 if (retval) 2063 if (retval)
2045 goto bad_fork_put_pidfd; 2064 goto bad_fork_put_pidfd;
2046 } 2065 }
@@ -2083,7 +2102,7 @@ static __latent_entropy struct task_struct *copy_process(
2083 if (clone_flags & CLONE_PARENT) 2102 if (clone_flags & CLONE_PARENT)
2084 p->exit_signal = current->group_leader->exit_signal; 2103 p->exit_signal = current->group_leader->exit_signal;
2085 else 2104 else
2086 p->exit_signal = (clone_flags & CSIGNAL); 2105 p->exit_signal = args->exit_signal;
2087 p->group_leader = p; 2106 p->group_leader = p;
2088 p->tgid = p->pid; 2107 p->tgid = p->pid;
2089 } 2108 }
@@ -2116,7 +2135,7 @@ static __latent_entropy struct task_struct *copy_process(
2116 */ 2135 */
2117 2136
2118 p->start_time = ktime_get_ns(); 2137 p->start_time = ktime_get_ns();
2119 p->real_start_time = ktime_get_boot_ns(); 2138 p->real_start_time = ktime_get_boottime_ns();
2120 2139
2121 /* 2140 /*
2122 * Make it visible to the rest of the system, but dont wake it up yet. 2141 * Make it visible to the rest of the system, but dont wake it up yet.
@@ -2296,8 +2315,11 @@ static inline void init_idle_pids(struct task_struct *idle)
2296struct task_struct *fork_idle(int cpu) 2315struct task_struct *fork_idle(int cpu)
2297{ 2316{
2298 struct task_struct *task; 2317 struct task_struct *task;
2299 task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0, 2318 struct kernel_clone_args args = {
2300 cpu_to_node(cpu)); 2319 .flags = CLONE_VM,
2320 };
2321
2322 task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
2301 if (!IS_ERR(task)) { 2323 if (!IS_ERR(task)) {
2302 init_idle_pids(task); 2324 init_idle_pids(task);
2303 init_idle(task, cpu); 2325 init_idle(task, cpu);
@@ -2317,13 +2339,9 @@ struct mm_struct *copy_init_mm(void)
2317 * It copies the process, and if successful kick-starts 2339 * It copies the process, and if successful kick-starts
2318 * it and waits for it to finish using the VM if required. 2340 * it and waits for it to finish using the VM if required.
2319 */ 2341 */
2320long _do_fork(unsigned long clone_flags, 2342long _do_fork(struct kernel_clone_args *args)
2321 unsigned long stack_start,
2322 unsigned long stack_size,
2323 int __user *parent_tidptr,
2324 int __user *child_tidptr,
2325 unsigned long tls)
2326{ 2343{
2344 u64 clone_flags = args->flags;
2327 struct completion vfork; 2345 struct completion vfork;
2328 struct pid *pid; 2346 struct pid *pid;
2329 struct task_struct *p; 2347 struct task_struct *p;
@@ -2339,7 +2357,7 @@ long _do_fork(unsigned long clone_flags,
2339 if (!(clone_flags & CLONE_UNTRACED)) { 2357 if (!(clone_flags & CLONE_UNTRACED)) {
2340 if (clone_flags & CLONE_VFORK) 2358 if (clone_flags & CLONE_VFORK)
2341 trace = PTRACE_EVENT_VFORK; 2359 trace = PTRACE_EVENT_VFORK;
2342 else if ((clone_flags & CSIGNAL) != SIGCHLD) 2360 else if (args->exit_signal != SIGCHLD)
2343 trace = PTRACE_EVENT_CLONE; 2361 trace = PTRACE_EVENT_CLONE;
2344 else 2362 else
2345 trace = PTRACE_EVENT_FORK; 2363 trace = PTRACE_EVENT_FORK;
@@ -2348,8 +2366,7 @@ long _do_fork(unsigned long clone_flags,
2348 trace = 0; 2366 trace = 0;
2349 } 2367 }
2350 2368
2351 p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr, 2369 p = copy_process(NULL, trace, NUMA_NO_NODE, args);
2352 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
2353 add_latent_entropy(); 2370 add_latent_entropy();
2354 2371
2355 if (IS_ERR(p)) 2372 if (IS_ERR(p))
@@ -2365,7 +2382,7 @@ long _do_fork(unsigned long clone_flags,
2365 nr = pid_vnr(pid); 2382 nr = pid_vnr(pid);
2366 2383
2367 if (clone_flags & CLONE_PARENT_SETTID) 2384 if (clone_flags & CLONE_PARENT_SETTID)
2368 put_user(nr, parent_tidptr); 2385 put_user(nr, args->parent_tid);
2369 2386
2370 if (clone_flags & CLONE_VFORK) { 2387 if (clone_flags & CLONE_VFORK) {
2371 p->vfork_done = &vfork; 2388 p->vfork_done = &vfork;
@@ -2388,6 +2405,16 @@ long _do_fork(unsigned long clone_flags,
2388 return nr; 2405 return nr;
2389} 2406}
2390 2407
2408bool legacy_clone_args_valid(const struct kernel_clone_args *kargs)
2409{
2410 /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
2411 if ((kargs->flags & CLONE_PIDFD) &&
2412 (kargs->flags & CLONE_PARENT_SETTID))
2413 return false;
2414
2415 return true;
2416}
2417
2391#ifndef CONFIG_HAVE_COPY_THREAD_TLS 2418#ifndef CONFIG_HAVE_COPY_THREAD_TLS
2392/* For compatibility with architectures that call do_fork directly rather than 2419/* For compatibility with architectures that call do_fork directly rather than
2393 * using the syscall entry points below. */ 2420 * using the syscall entry points below. */
@@ -2397,8 +2424,20 @@ long do_fork(unsigned long clone_flags,
2397 int __user *parent_tidptr, 2424 int __user *parent_tidptr,
2398 int __user *child_tidptr) 2425 int __user *child_tidptr)
2399{ 2426{
2400 return _do_fork(clone_flags, stack_start, stack_size, 2427 struct kernel_clone_args args = {
2401 parent_tidptr, child_tidptr, 0); 2428 .flags = (clone_flags & ~CSIGNAL),
2429 .pidfd = parent_tidptr,
2430 .child_tid = child_tidptr,
2431 .parent_tid = parent_tidptr,
2432 .exit_signal = (clone_flags & CSIGNAL),
2433 .stack = stack_start,
2434 .stack_size = stack_size,
2435 };
2436
2437 if (!legacy_clone_args_valid(&args))
2438 return -EINVAL;
2439
2440 return _do_fork(&args);
2402} 2441}
2403#endif 2442#endif
2404 2443
@@ -2407,15 +2446,25 @@ long do_fork(unsigned long clone_flags,
2407 */ 2446 */
2408pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) 2447pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
2409{ 2448{
2410 return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, 2449 struct kernel_clone_args args = {
2411 (unsigned long)arg, NULL, NULL, 0); 2450 .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
2451 .exit_signal = (flags & CSIGNAL),
2452 .stack = (unsigned long)fn,
2453 .stack_size = (unsigned long)arg,
2454 };
2455
2456 return _do_fork(&args);
2412} 2457}
2413 2458
2414#ifdef __ARCH_WANT_SYS_FORK 2459#ifdef __ARCH_WANT_SYS_FORK
2415SYSCALL_DEFINE0(fork) 2460SYSCALL_DEFINE0(fork)
2416{ 2461{
2417#ifdef CONFIG_MMU 2462#ifdef CONFIG_MMU
2418 return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); 2463 struct kernel_clone_args args = {
2464 .exit_signal = SIGCHLD,
2465 };
2466
2467 return _do_fork(&args);
2419#else 2468#else
2420 /* can not support in nommu mode */ 2469 /* can not support in nommu mode */
2421 return -EINVAL; 2470 return -EINVAL;
@@ -2426,8 +2475,12 @@ SYSCALL_DEFINE0(fork)
2426#ifdef __ARCH_WANT_SYS_VFORK 2475#ifdef __ARCH_WANT_SYS_VFORK
2427SYSCALL_DEFINE0(vfork) 2476SYSCALL_DEFINE0(vfork)
2428{ 2477{
2429 return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 2478 struct kernel_clone_args args = {
2430 0, NULL, NULL, 0); 2479 .flags = CLONE_VFORK | CLONE_VM,
2480 .exit_signal = SIGCHLD,
2481 };
2482
2483 return _do_fork(&args);
2431} 2484}
2432#endif 2485#endif
2433 2486
@@ -2455,7 +2508,111 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
2455 unsigned long, tls) 2508 unsigned long, tls)
2456#endif 2509#endif
2457{ 2510{
2458 return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); 2511 struct kernel_clone_args args = {
2512 .flags = (clone_flags & ~CSIGNAL),
2513 .pidfd = parent_tidptr,
2514 .child_tid = child_tidptr,
2515 .parent_tid = parent_tidptr,
2516 .exit_signal = (clone_flags & CSIGNAL),
2517 .stack = newsp,
2518 .tls = tls,
2519 };
2520
2521 if (!legacy_clone_args_valid(&args))
2522 return -EINVAL;
2523
2524 return _do_fork(&args);
2525}
2526#endif
2527
2528#ifdef __ARCH_WANT_SYS_CLONE3
2529noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
2530 struct clone_args __user *uargs,
2531 size_t size)
2532{
2533 struct clone_args args;
2534
2535 if (unlikely(size > PAGE_SIZE))
2536 return -E2BIG;
2537
2538 if (unlikely(size < sizeof(struct clone_args)))
2539 return -EINVAL;
2540
2541 if (unlikely(!access_ok(uargs, size)))
2542 return -EFAULT;
2543
2544 if (size > sizeof(struct clone_args)) {
2545 unsigned char __user *addr;
2546 unsigned char __user *end;
2547 unsigned char val;
2548
2549 addr = (void __user *)uargs + sizeof(struct clone_args);
2550 end = (void __user *)uargs + size;
2551
2552 for (; addr < end; addr++) {
2553 if (get_user(val, addr))
2554 return -EFAULT;
2555 if (val)
2556 return -E2BIG;
2557 }
2558
2559 size = sizeof(struct clone_args);
2560 }
2561
2562 if (copy_from_user(&args, uargs, size))
2563 return -EFAULT;
2564
2565 *kargs = (struct kernel_clone_args){
2566 .flags = args.flags,
2567 .pidfd = u64_to_user_ptr(args.pidfd),
2568 .child_tid = u64_to_user_ptr(args.child_tid),
2569 .parent_tid = u64_to_user_ptr(args.parent_tid),
2570 .exit_signal = args.exit_signal,
2571 .stack = args.stack,
2572 .stack_size = args.stack_size,
2573 .tls = args.tls,
2574 };
2575
2576 return 0;
2577}
2578
2579static bool clone3_args_valid(const struct kernel_clone_args *kargs)
2580{
2581 /*
2582 * All lower bits of the flag word are taken.
2583 * Verify that no other unknown flags are passed along.
2584 */
2585 if (kargs->flags & ~CLONE_LEGACY_FLAGS)
2586 return false;
2587
2588 /*
2589 * - make the CLONE_DETACHED bit reuseable for clone3
2590 * - make the CSIGNAL bits reuseable for clone3
2591 */
2592 if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
2593 return false;
2594
2595 if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
2596 kargs->exit_signal)
2597 return false;
2598
2599 return true;
2600}
2601
2602SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
2603{
2604 int err;
2605
2606 struct kernel_clone_args kargs;
2607
2608 err = copy_clone_args_from_user(&kargs, uargs, size);
2609 if (err)
2610 return err;
2611
2612 if (!clone3_args_valid(&kargs))
2613 return -EINVAL;
2614
2615 return _do_fork(&kargs);
2459} 2616}
2460#endif 2617#endif
2461 2618