aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c315
1 files changed, 235 insertions, 80 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 737db1828437..8f3e2d97d771 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0-only
1/* 2/*
2 * linux/kernel/fork.c 3 * linux/kernel/fork.c
3 * 4 *
@@ -122,7 +123,7 @@
122unsigned long total_forks; /* Handle normal Linux uptimes. */ 123unsigned long total_forks; /* Handle normal Linux uptimes. */
123int nr_threads; /* The idle threads do not count.. */ 124int nr_threads; /* The idle threads do not count.. */
124 125
125int max_threads; /* tunable limit on nr_threads */ 126static int max_threads; /* tunable limit on nr_threads */
126 127
127DEFINE_PER_CPU(unsigned long, process_counts) = 0; 128DEFINE_PER_CPU(unsigned long, process_counts) = 0;
128 129
@@ -247,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
247 struct page *page = alloc_pages_node(node, THREADINFO_GFP, 248 struct page *page = alloc_pages_node(node, THREADINFO_GFP,
248 THREAD_SIZE_ORDER); 249 THREAD_SIZE_ORDER);
249 250
250 return page ? page_address(page) : NULL; 251 if (likely(page)) {
252 tsk->stack = page_address(page);
253 return tsk->stack;
254 }
255 return NULL;
251#endif 256#endif
252} 257}
253 258
@@ -893,6 +898,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
893#ifdef CONFIG_STACKPROTECTOR 898#ifdef CONFIG_STACKPROTECTOR
894 tsk->stack_canary = get_random_canary(); 899 tsk->stack_canary = get_random_canary();
895#endif 900#endif
901 if (orig->cpus_ptr == &orig->cpus_mask)
902 tsk->cpus_ptr = &tsk->cpus_mask;
896 903
897 /* 904 /*
898 * One for us, one for whoever does the "release_task()" (usually 905 * One for us, one for whoever does the "release_task()" (usually
@@ -955,6 +962,15 @@ static void mm_init_aio(struct mm_struct *mm)
955#endif 962#endif
956} 963}
957 964
965static __always_inline void mm_clear_owner(struct mm_struct *mm,
966 struct task_struct *p)
967{
968#ifdef CONFIG_MEMCG
969 if (mm->owner == p)
970 WRITE_ONCE(mm->owner, NULL);
971#endif
972}
973
958static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) 974static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
959{ 975{
960#ifdef CONFIG_MEMCG 976#ifdef CONFIG_MEMCG
@@ -1343,6 +1359,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
1343free_pt: 1359free_pt:
1344 /* don't put binfmt in mmput, we haven't got module yet */ 1360 /* don't put binfmt in mmput, we haven't got module yet */
1345 mm->binfmt = NULL; 1361 mm->binfmt = NULL;
1362 mm_init_owner(mm, NULL);
1346 mmput(mm); 1363 mmput(mm);
1347 1364
1348fail_nomem: 1365fail_nomem:
@@ -1694,36 +1711,52 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
1694} 1711}
1695#endif 1712#endif
1696 1713
1714/*
1715 * Poll support for process exit notification.
1716 */
1717static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
1718{
1719 struct task_struct *task;
1720 struct pid *pid = file->private_data;
1721 int poll_flags = 0;
1722
1723 poll_wait(file, &pid->wait_pidfd, pts);
1724
1725 rcu_read_lock();
1726 task = pid_task(pid, PIDTYPE_PID);
1727 /*
1728 * Inform pollers only when the whole thread group exits.
1729 * If the thread group leader exits before all other threads in the
1730 * group, then poll(2) should block, similar to the wait(2) family.
1731 */
1732 if (!task || (task->exit_state && thread_group_empty(task)))
1733 poll_flags = POLLIN | POLLRDNORM;
1734 rcu_read_unlock();
1735
1736 return poll_flags;
1737}
1738
1697const struct file_operations pidfd_fops = { 1739const struct file_operations pidfd_fops = {
1698 .release = pidfd_release, 1740 .release = pidfd_release,
1741 .poll = pidfd_poll,
1699#ifdef CONFIG_PROC_FS 1742#ifdef CONFIG_PROC_FS
1700 .show_fdinfo = pidfd_show_fdinfo, 1743 .show_fdinfo = pidfd_show_fdinfo,
1701#endif 1744#endif
1702}; 1745};
1703 1746
1704/** 1747static void __delayed_free_task(struct rcu_head *rhp)
1705 * pidfd_create() - Create a new pid file descriptor.
1706 *
1707 * @pid: struct pid that the pidfd will reference
1708 *
1709 * This creates a new pid file descriptor with the O_CLOEXEC flag set.
1710 *
1711 * Note, that this function can only be called after the fd table has
1712 * been unshared to avoid leaking the pidfd to the new process.
1713 *
1714 * Return: On success, a cloexec pidfd is returned.
1715 * On error, a negative errno number will be returned.
1716 */
1717static int pidfd_create(struct pid *pid)
1718{ 1748{
1719 int fd; 1749 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
1720 1750
1721 fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), 1751 free_task(tsk);
1722 O_RDWR | O_CLOEXEC); 1752}
1723 if (fd < 0)
1724 put_pid(pid);
1725 1753
1726 return fd; 1754static __always_inline void delayed_free_task(struct task_struct *tsk)
1755{
1756 if (IS_ENABLED(CONFIG_MEMCG))
1757 call_rcu(&tsk->rcu, __delayed_free_task);
1758 else
1759 free_task(tsk);
1727} 1760}
1728 1761
1729/* 1762/*
@@ -1735,19 +1768,16 @@ static int pidfd_create(struct pid *pid)
1735 * flags). The actual kick-off is left to the caller. 1768 * flags). The actual kick-off is left to the caller.
1736 */ 1769 */
1737static __latent_entropy struct task_struct *copy_process( 1770static __latent_entropy struct task_struct *copy_process(
1738 unsigned long clone_flags,
1739 unsigned long stack_start,
1740 unsigned long stack_size,
1741 int __user *parent_tidptr,
1742 int __user *child_tidptr,
1743 struct pid *pid, 1771 struct pid *pid,
1744 int trace, 1772 int trace,
1745 unsigned long tls, 1773 int node,
1746 int node) 1774 struct kernel_clone_args *args)
1747{ 1775{
1748 int pidfd = -1, retval; 1776 int pidfd = -1, retval;
1749 struct task_struct *p; 1777 struct task_struct *p;
1750 struct multiprocess_signals delayed; 1778 struct multiprocess_signals delayed;
1779 struct file *pidfile = NULL;
1780 u64 clone_flags = args->flags;
1751 1781
1752 /* 1782 /*
1753 * Don't allow sharing the root directory with processes in a different 1783 * Don't allow sharing the root directory with processes in a different
@@ -1796,27 +1826,12 @@ static __latent_entropy struct task_struct *copy_process(
1796 } 1826 }
1797 1827
1798 if (clone_flags & CLONE_PIDFD) { 1828 if (clone_flags & CLONE_PIDFD) {
1799 int reserved;
1800
1801 /* 1829 /*
1802 * - CLONE_PARENT_SETTID is useless for pidfds and also
1803 * parent_tidptr is used to return pidfds.
1804 * - CLONE_DETACHED is blocked so that we can potentially 1830 * - CLONE_DETACHED is blocked so that we can potentially
1805 * reuse it later for CLONE_PIDFD. 1831 * reuse it later for CLONE_PIDFD.
1806 * - CLONE_THREAD is blocked until someone really needs it. 1832 * - CLONE_THREAD is blocked until someone really needs it.
1807 */ 1833 */
1808 if (clone_flags & 1834 if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
1809 (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
1810 return ERR_PTR(-EINVAL);
1811
1812 /*
1813 * Verify that parent_tidptr is sane so we can potentially
1814 * reuse it later.
1815 */
1816 if (get_user(reserved, parent_tidptr))
1817 return ERR_PTR(-EFAULT);
1818
1819 if (reserved != 0)
1820 return ERR_PTR(-EINVAL); 1835 return ERR_PTR(-EINVAL);
1821 } 1836 }
1822 1837
@@ -1849,11 +1864,11 @@ static __latent_entropy struct task_struct *copy_process(
1849 * p->set_child_tid which is (ab)used as a kthread's data pointer for 1864 * p->set_child_tid which is (ab)used as a kthread's data pointer for
1850 * kernel threads (PF_KTHREAD). 1865 * kernel threads (PF_KTHREAD).
1851 */ 1866 */
1852 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1867 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
1853 /* 1868 /*
1854 * Clear TID on mm_release()? 1869 * Clear TID on mm_release()?
1855 */ 1870 */
1856 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; 1871 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
1857 1872
1858 ftrace_graph_init_task(p); 1873 ftrace_graph_init_task(p);
1859 1874
@@ -1958,9 +1973,6 @@ static __latent_entropy struct task_struct *copy_process(
1958 p->pagefault_disabled = 0; 1973 p->pagefault_disabled = 0;
1959 1974
1960#ifdef CONFIG_LOCKDEP 1975#ifdef CONFIG_LOCKDEP
1961 p->lockdep_depth = 0; /* no locks held yet */
1962 p->curr_chain_key = 0;
1963 p->lockdep_recursion = 0;
1964 lockdep_init_task(p); 1976 lockdep_init_task(p);
1965#endif 1977#endif
1966 1978
@@ -2012,7 +2024,8 @@ static __latent_entropy struct task_struct *copy_process(
2012 retval = copy_io(clone_flags, p); 2024 retval = copy_io(clone_flags, p);
2013 if (retval) 2025 if (retval)
2014 goto bad_fork_cleanup_namespaces; 2026 goto bad_fork_cleanup_namespaces;
2015 retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); 2027 retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p,
2028 args->tls);
2016 if (retval) 2029 if (retval)
2017 goto bad_fork_cleanup_io; 2030 goto bad_fork_cleanup_io;
2018 2031
@@ -2032,12 +2045,22 @@ static __latent_entropy struct task_struct *copy_process(
2032 * if the fd table isn't shared). 2045 * if the fd table isn't shared).
2033 */ 2046 */
2034 if (clone_flags & CLONE_PIDFD) { 2047 if (clone_flags & CLONE_PIDFD) {
2035 retval = pidfd_create(pid); 2048 retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
2036 if (retval < 0) 2049 if (retval < 0)
2037 goto bad_fork_free_pid; 2050 goto bad_fork_free_pid;
2038 2051
2039 pidfd = retval; 2052 pidfd = retval;
2040 retval = put_user(pidfd, parent_tidptr); 2053
2054 pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
2055 O_RDWR | O_CLOEXEC);
2056 if (IS_ERR(pidfile)) {
2057 put_unused_fd(pidfd);
2058 retval = PTR_ERR(pidfile);
2059 goto bad_fork_free_pid;
2060 }
2061 get_pid(pid); /* held by pidfile now */
2062
2063 retval = put_user(pidfd, args->pidfd);
2041 if (retval) 2064 if (retval)
2042 goto bad_fork_put_pidfd; 2065 goto bad_fork_put_pidfd;
2043 } 2066 }
@@ -2068,7 +2091,7 @@ static __latent_entropy struct task_struct *copy_process(
2068#ifdef TIF_SYSCALL_EMU 2091#ifdef TIF_SYSCALL_EMU
2069 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 2092 clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
2070#endif 2093#endif
2071 clear_all_latency_tracing(p); 2094 clear_tsk_latency_tracing(p);
2072 2095
2073 /* ok, now we should be set up.. */ 2096 /* ok, now we should be set up.. */
2074 p->pid = pid_nr(pid); 2097 p->pid = pid_nr(pid);
@@ -2080,7 +2103,7 @@ static __latent_entropy struct task_struct *copy_process(
2080 if (clone_flags & CLONE_PARENT) 2103 if (clone_flags & CLONE_PARENT)
2081 p->exit_signal = current->group_leader->exit_signal; 2104 p->exit_signal = current->group_leader->exit_signal;
2082 else 2105 else
2083 p->exit_signal = (clone_flags & CSIGNAL); 2106 p->exit_signal = args->exit_signal;
2084 p->group_leader = p; 2107 p->group_leader = p;
2085 p->tgid = p->pid; 2108 p->tgid = p->pid;
2086 } 2109 }
@@ -2113,7 +2136,7 @@ static __latent_entropy struct task_struct *copy_process(
2113 */ 2136 */
2114 2137
2115 p->start_time = ktime_get_ns(); 2138 p->start_time = ktime_get_ns();
2116 p->real_start_time = ktime_get_boot_ns(); 2139 p->real_start_time = ktime_get_boottime_ns();
2117 2140
2118 /* 2141 /*
2119 * Make it visible to the rest of the system, but dont wake it up yet. 2142 * Make it visible to the rest of the system, but dont wake it up yet.
@@ -2154,6 +2177,9 @@ static __latent_entropy struct task_struct *copy_process(
2154 goto bad_fork_cancel_cgroup; 2177 goto bad_fork_cancel_cgroup;
2155 } 2178 }
2156 2179
2180 /* past the last point of failure */
2181 if (pidfile)
2182 fd_install(pidfd, pidfile);
2157 2183
2158 init_task_pid_links(p); 2184 init_task_pid_links(p);
2159 if (likely(p->pid)) { 2185 if (likely(p->pid)) {
@@ -2220,8 +2246,10 @@ bad_fork_cancel_cgroup:
2220bad_fork_cgroup_threadgroup_change_end: 2246bad_fork_cgroup_threadgroup_change_end:
2221 cgroup_threadgroup_change_end(current); 2247 cgroup_threadgroup_change_end(current);
2222bad_fork_put_pidfd: 2248bad_fork_put_pidfd:
2223 if (clone_flags & CLONE_PIDFD) 2249 if (clone_flags & CLONE_PIDFD) {
2224 ksys_close(pidfd); 2250 fput(pidfile);
2251 put_unused_fd(pidfd);
2252 }
2225bad_fork_free_pid: 2253bad_fork_free_pid:
2226 if (pid != &init_struct_pid) 2254 if (pid != &init_struct_pid)
2227 free_pid(pid); 2255 free_pid(pid);
@@ -2233,8 +2261,10 @@ bad_fork_cleanup_io:
2233bad_fork_cleanup_namespaces: 2261bad_fork_cleanup_namespaces:
2234 exit_task_namespaces(p); 2262 exit_task_namespaces(p);
2235bad_fork_cleanup_mm: 2263bad_fork_cleanup_mm:
2236 if (p->mm) 2264 if (p->mm) {
2265 mm_clear_owner(p->mm, p);
2237 mmput(p->mm); 2266 mmput(p->mm);
2267 }
2238bad_fork_cleanup_signal: 2268bad_fork_cleanup_signal:
2239 if (!(clone_flags & CLONE_THREAD)) 2269 if (!(clone_flags & CLONE_THREAD))
2240 free_signal_struct(p->signal); 2270 free_signal_struct(p->signal);
@@ -2265,7 +2295,7 @@ bad_fork_cleanup_count:
2265bad_fork_free: 2295bad_fork_free:
2266 p->state = TASK_DEAD; 2296 p->state = TASK_DEAD;
2267 put_task_stack(p); 2297 put_task_stack(p);
2268 free_task(p); 2298 delayed_free_task(p);
2269fork_out: 2299fork_out:
2270 spin_lock_irq(&current->sighand->siglock); 2300 spin_lock_irq(&current->sighand->siglock);
2271 hlist_del_init(&delayed.node); 2301 hlist_del_init(&delayed.node);
@@ -2286,8 +2316,11 @@ static inline void init_idle_pids(struct task_struct *idle)
2286struct task_struct *fork_idle(int cpu) 2316struct task_struct *fork_idle(int cpu)
2287{ 2317{
2288 struct task_struct *task; 2318 struct task_struct *task;
2289 task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0, 2319 struct kernel_clone_args args = {
2290 cpu_to_node(cpu)); 2320 .flags = CLONE_VM,
2321 };
2322
2323 task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
2291 if (!IS_ERR(task)) { 2324 if (!IS_ERR(task)) {
2292 init_idle_pids(task); 2325 init_idle_pids(task);
2293 init_idle(task, cpu); 2326 init_idle(task, cpu);
@@ -2307,13 +2340,9 @@ struct mm_struct *copy_init_mm(void)
2307 * It copies the process, and if successful kick-starts 2340 * It copies the process, and if successful kick-starts
2308 * it and waits for it to finish using the VM if required. 2341 * it and waits for it to finish using the VM if required.
2309 */ 2342 */
2310long _do_fork(unsigned long clone_flags, 2343long _do_fork(struct kernel_clone_args *args)
2311 unsigned long stack_start,
2312 unsigned long stack_size,
2313 int __user *parent_tidptr,
2314 int __user *child_tidptr,
2315 unsigned long tls)
2316{ 2344{
2345 u64 clone_flags = args->flags;
2317 struct completion vfork; 2346 struct completion vfork;
2318 struct pid *pid; 2347 struct pid *pid;
2319 struct task_struct *p; 2348 struct task_struct *p;
@@ -2329,7 +2358,7 @@ long _do_fork(unsigned long clone_flags,
2329 if (!(clone_flags & CLONE_UNTRACED)) { 2358 if (!(clone_flags & CLONE_UNTRACED)) {
2330 if (clone_flags & CLONE_VFORK) 2359 if (clone_flags & CLONE_VFORK)
2331 trace = PTRACE_EVENT_VFORK; 2360 trace = PTRACE_EVENT_VFORK;
2332 else if ((clone_flags & CSIGNAL) != SIGCHLD) 2361 else if (args->exit_signal != SIGCHLD)
2333 trace = PTRACE_EVENT_CLONE; 2362 trace = PTRACE_EVENT_CLONE;
2334 else 2363 else
2335 trace = PTRACE_EVENT_FORK; 2364 trace = PTRACE_EVENT_FORK;
@@ -2338,8 +2367,7 @@ long _do_fork(unsigned long clone_flags,
2338 trace = 0; 2367 trace = 0;
2339 } 2368 }
2340 2369
2341 p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr, 2370 p = copy_process(NULL, trace, NUMA_NO_NODE, args);
2342 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
2343 add_latent_entropy(); 2371 add_latent_entropy();
2344 2372
2345 if (IS_ERR(p)) 2373 if (IS_ERR(p))
@@ -2355,7 +2383,7 @@ long _do_fork(unsigned long clone_flags,
2355 nr = pid_vnr(pid); 2383 nr = pid_vnr(pid);
2356 2384
2357 if (clone_flags & CLONE_PARENT_SETTID) 2385 if (clone_flags & CLONE_PARENT_SETTID)
2358 put_user(nr, parent_tidptr); 2386 put_user(nr, args->parent_tid);
2359 2387
2360 if (clone_flags & CLONE_VFORK) { 2388 if (clone_flags & CLONE_VFORK) {
2361 p->vfork_done = &vfork; 2389 p->vfork_done = &vfork;
@@ -2387,8 +2415,16 @@ long do_fork(unsigned long clone_flags,
2387 int __user *parent_tidptr, 2415 int __user *parent_tidptr,
2388 int __user *child_tidptr) 2416 int __user *child_tidptr)
2389{ 2417{
2390 return _do_fork(clone_flags, stack_start, stack_size, 2418 struct kernel_clone_args args = {
2391 parent_tidptr, child_tidptr, 0); 2419 .flags = (clone_flags & ~CSIGNAL),
2420 .child_tid = child_tidptr,
2421 .parent_tid = parent_tidptr,
2422 .exit_signal = (clone_flags & CSIGNAL),
2423 .stack = stack_start,
2424 .stack_size = stack_size,
2425 };
2426
2427 return _do_fork(&args);
2392} 2428}
2393#endif 2429#endif
2394 2430
@@ -2397,15 +2433,25 @@ long do_fork(unsigned long clone_flags,
2397 */ 2433 */
2398pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) 2434pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
2399{ 2435{
2400 return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, 2436 struct kernel_clone_args args = {
2401 (unsigned long)arg, NULL, NULL, 0); 2437 .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
2438 .exit_signal = (flags & CSIGNAL),
2439 .stack = (unsigned long)fn,
2440 .stack_size = (unsigned long)arg,
2441 };
2442
2443 return _do_fork(&args);
2402} 2444}
2403 2445
2404#ifdef __ARCH_WANT_SYS_FORK 2446#ifdef __ARCH_WANT_SYS_FORK
2405SYSCALL_DEFINE0(fork) 2447SYSCALL_DEFINE0(fork)
2406{ 2448{
2407#ifdef CONFIG_MMU 2449#ifdef CONFIG_MMU
2408 return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); 2450 struct kernel_clone_args args = {
2451 .exit_signal = SIGCHLD,
2452 };
2453
2454 return _do_fork(&args);
2409#else 2455#else
2410 /* can not support in nommu mode */ 2456 /* can not support in nommu mode */
2411 return -EINVAL; 2457 return -EINVAL;
@@ -2416,8 +2462,12 @@ SYSCALL_DEFINE0(fork)
2416#ifdef __ARCH_WANT_SYS_VFORK 2462#ifdef __ARCH_WANT_SYS_VFORK
2417SYSCALL_DEFINE0(vfork) 2463SYSCALL_DEFINE0(vfork)
2418{ 2464{
2419 return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 2465 struct kernel_clone_args args = {
2420 0, NULL, NULL, 0); 2466 .flags = CLONE_VFORK | CLONE_VM,
2467 .exit_signal = SIGCHLD,
2468 };
2469
2470 return _do_fork(&args);
2421} 2471}
2422#endif 2472#endif
2423 2473
@@ -2445,7 +2495,112 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
2445 unsigned long, tls) 2495 unsigned long, tls)
2446#endif 2496#endif
2447{ 2497{
2448 return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); 2498 struct kernel_clone_args args = {
2499 .flags = (clone_flags & ~CSIGNAL),
2500 .pidfd = parent_tidptr,
2501 .child_tid = child_tidptr,
2502 .parent_tid = parent_tidptr,
2503 .exit_signal = (clone_flags & CSIGNAL),
2504 .stack = newsp,
2505 .tls = tls,
2506 };
2507
2508 /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
2509 if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID))
2510 return -EINVAL;
2511
2512 return _do_fork(&args);
2513}
2514#endif
2515
2516#ifdef __ARCH_WANT_SYS_CLONE3
2517noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
2518 struct clone_args __user *uargs,
2519 size_t size)
2520{
2521 struct clone_args args;
2522
2523 if (unlikely(size > PAGE_SIZE))
2524 return -E2BIG;
2525
2526 if (unlikely(size < sizeof(struct clone_args)))
2527 return -EINVAL;
2528
2529 if (unlikely(!access_ok(uargs, size)))
2530 return -EFAULT;
2531
2532 if (size > sizeof(struct clone_args)) {
2533 unsigned char __user *addr;
2534 unsigned char __user *end;
2535 unsigned char val;
2536
2537 addr = (void __user *)uargs + sizeof(struct clone_args);
2538 end = (void __user *)uargs + size;
2539
2540 for (; addr < end; addr++) {
2541 if (get_user(val, addr))
2542 return -EFAULT;
2543 if (val)
2544 return -E2BIG;
2545 }
2546
2547 size = sizeof(struct clone_args);
2548 }
2549
2550 if (copy_from_user(&args, uargs, size))
2551 return -EFAULT;
2552
2553 *kargs = (struct kernel_clone_args){
2554 .flags = args.flags,
2555 .pidfd = u64_to_user_ptr(args.pidfd),
2556 .child_tid = u64_to_user_ptr(args.child_tid),
2557 .parent_tid = u64_to_user_ptr(args.parent_tid),
2558 .exit_signal = args.exit_signal,
2559 .stack = args.stack,
2560 .stack_size = args.stack_size,
2561 .tls = args.tls,
2562 };
2563
2564 return 0;
2565}
2566
2567static bool clone3_args_valid(const struct kernel_clone_args *kargs)
2568{
2569 /*
2570 * All lower bits of the flag word are taken.
2571 * Verify that no other unknown flags are passed along.
2572 */
2573 if (kargs->flags & ~CLONE_LEGACY_FLAGS)
2574 return false;
2575
2576 /*
2577 * - make the CLONE_DETACHED bit reuseable for clone3
2578 * - make the CSIGNAL bits reuseable for clone3
2579 */
2580 if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
2581 return false;
2582
2583 if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
2584 kargs->exit_signal)
2585 return false;
2586
2587 return true;
2588}
2589
2590SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
2591{
2592 int err;
2593
2594 struct kernel_clone_args kargs;
2595
2596 err = copy_clone_args_from_user(&kargs, uargs, size);
2597 if (err)
2598 return err;
2599
2600 if (!clone3_args_valid(&kargs))
2601 return -EINVAL;
2602
2603 return _do_fork(&kargs);
2449} 2604}
2450#endif 2605#endif
2451 2606