diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 132 |
1 files changed, 122 insertions, 10 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 9dcd18aa210b..8b03d93ba068 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -11,6 +11,7 @@ | |||
11 | * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' | 11 | * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/anon_inodes.h> | ||
14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
15 | #include <linux/sched/autogroup.h> | 16 | #include <linux/sched/autogroup.h> |
16 | #include <linux/sched/mm.h> | 17 | #include <linux/sched/mm.h> |
@@ -21,6 +22,7 @@ | |||
21 | #include <linux/sched/task.h> | 22 | #include <linux/sched/task.h> |
22 | #include <linux/sched/task_stack.h> | 23 | #include <linux/sched/task_stack.h> |
23 | #include <linux/sched/cputime.h> | 24 | #include <linux/sched/cputime.h> |
25 | #include <linux/seq_file.h> | ||
24 | #include <linux/rtmutex.h> | 26 | #include <linux/rtmutex.h> |
25 | #include <linux/init.h> | 27 | #include <linux/init.h> |
26 | #include <linux/unistd.h> | 28 | #include <linux/unistd.h> |
@@ -815,6 +817,7 @@ void __init fork_init(void) | |||
815 | #endif | 817 | #endif |
816 | 818 | ||
817 | lockdep_init_task(&init_task); | 819 | lockdep_init_task(&init_task); |
820 | uprobes_init(); | ||
818 | } | 821 | } |
819 | 822 | ||
820 | int __weak arch_dup_task_struct(struct task_struct *dst, | 823 | int __weak arch_dup_task_struct(struct task_struct *dst, |
@@ -1298,13 +1301,20 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
1298 | complete_vfork_done(tsk); | 1301 | complete_vfork_done(tsk); |
1299 | } | 1302 | } |
1300 | 1303 | ||
1301 | /* | 1304 | /** |
1302 | * Allocate a new mm structure and copy contents from the | 1305 | * dup_mm() - duplicates an existing mm structure |
1303 | * mm structure of the passed in task structure. | 1306 | * @tsk: the task_struct with which the new mm will be associated. |
1307 | * @oldmm: the mm to duplicate. | ||
1308 | * | ||
1309 | * Allocates a new mm structure and duplicates the provided @oldmm structure | ||
1310 | * content into it. | ||
1311 | * | ||
1312 | * Return: the duplicated mm or NULL on failure. | ||
1304 | */ | 1313 | */ |
1305 | static struct mm_struct *dup_mm(struct task_struct *tsk) | 1314 | static struct mm_struct *dup_mm(struct task_struct *tsk, |
1315 | struct mm_struct *oldmm) | ||
1306 | { | 1316 | { |
1307 | struct mm_struct *mm, *oldmm = current->mm; | 1317 | struct mm_struct *mm; |
1308 | int err; | 1318 | int err; |
1309 | 1319 | ||
1310 | mm = allocate_mm(); | 1320 | mm = allocate_mm(); |
@@ -1371,7 +1381,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) | |||
1371 | } | 1381 | } |
1372 | 1382 | ||
1373 | retval = -ENOMEM; | 1383 | retval = -ENOMEM; |
1374 | mm = dup_mm(tsk); | 1384 | mm = dup_mm(tsk, current->mm); |
1375 | if (!mm) | 1385 | if (!mm) |
1376 | goto fail_nomem; | 1386 | goto fail_nomem; |
1377 | 1387 | ||
@@ -1662,6 +1672,58 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1662 | #endif /* #ifdef CONFIG_TASKS_RCU */ | 1672 | #endif /* #ifdef CONFIG_TASKS_RCU */ |
1663 | } | 1673 | } |
1664 | 1674 | ||
1675 | static int pidfd_release(struct inode *inode, struct file *file) | ||
1676 | { | ||
1677 | struct pid *pid = file->private_data; | ||
1678 | |||
1679 | file->private_data = NULL; | ||
1680 | put_pid(pid); | ||
1681 | return 0; | ||
1682 | } | ||
1683 | |||
1684 | #ifdef CONFIG_PROC_FS | ||
1685 | static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) | ||
1686 | { | ||
1687 | struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); | ||
1688 | struct pid *pid = f->private_data; | ||
1689 | |||
1690 | seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); | ||
1691 | seq_putc(m, '\n'); | ||
1692 | } | ||
1693 | #endif | ||
1694 | |||
1695 | const struct file_operations pidfd_fops = { | ||
1696 | .release = pidfd_release, | ||
1697 | #ifdef CONFIG_PROC_FS | ||
1698 | .show_fdinfo = pidfd_show_fdinfo, | ||
1699 | #endif | ||
1700 | }; | ||
1701 | |||
1702 | /** | ||
1703 | * pidfd_create() - Create a new pid file descriptor. | ||
1704 | * | ||
1705 | * @pid: struct pid that the pidfd will reference | ||
1706 | * | ||
1707 | * This creates a new pid file descriptor with the O_CLOEXEC flag set. | ||
1708 | * | ||
1709 | * Note, that this function can only be called after the fd table has | ||
1710 | * been unshared to avoid leaking the pidfd to the new process. | ||
1711 | * | ||
1712 | * Return: On success, a cloexec pidfd is returned. | ||
1713 | * On error, a negative errno number will be returned. | ||
1714 | */ | ||
1715 | static int pidfd_create(struct pid *pid) | ||
1716 | { | ||
1717 | int fd; | ||
1718 | |||
1719 | fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), | ||
1720 | O_RDWR | O_CLOEXEC); | ||
1721 | if (fd < 0) | ||
1722 | put_pid(pid); | ||
1723 | |||
1724 | return fd; | ||
1725 | } | ||
1726 | |||
1665 | /* | 1727 | /* |
1666 | * This creates a new process as a copy of the old one, | 1728 | * This creates a new process as a copy of the old one, |
1667 | * but does not actually start it yet. | 1729 | * but does not actually start it yet. |
@@ -1674,13 +1736,14 @@ static __latent_entropy struct task_struct *copy_process( | |||
1674 | unsigned long clone_flags, | 1736 | unsigned long clone_flags, |
1675 | unsigned long stack_start, | 1737 | unsigned long stack_start, |
1676 | unsigned long stack_size, | 1738 | unsigned long stack_size, |
1739 | int __user *parent_tidptr, | ||
1677 | int __user *child_tidptr, | 1740 | int __user *child_tidptr, |
1678 | struct pid *pid, | 1741 | struct pid *pid, |
1679 | int trace, | 1742 | int trace, |
1680 | unsigned long tls, | 1743 | unsigned long tls, |
1681 | int node) | 1744 | int node) |
1682 | { | 1745 | { |
1683 | int retval; | 1746 | int pidfd = -1, retval; |
1684 | struct task_struct *p; | 1747 | struct task_struct *p; |
1685 | struct multiprocess_signals delayed; | 1748 | struct multiprocess_signals delayed; |
1686 | 1749 | ||
@@ -1730,6 +1793,31 @@ static __latent_entropy struct task_struct *copy_process( | |||
1730 | return ERR_PTR(-EINVAL); | 1793 | return ERR_PTR(-EINVAL); |
1731 | } | 1794 | } |
1732 | 1795 | ||
1796 | if (clone_flags & CLONE_PIDFD) { | ||
1797 | int reserved; | ||
1798 | |||
1799 | /* | ||
1800 | * - CLONE_PARENT_SETTID is useless for pidfds and also | ||
1801 | * parent_tidptr is used to return pidfds. | ||
1802 | * - CLONE_DETACHED is blocked so that we can potentially | ||
1803 | * reuse it later for CLONE_PIDFD. | ||
1804 | * - CLONE_THREAD is blocked until someone really needs it. | ||
1805 | */ | ||
1806 | if (clone_flags & | ||
1807 | (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) | ||
1808 | return ERR_PTR(-EINVAL); | ||
1809 | |||
1810 | /* | ||
1811 | * Verify that parent_tidptr is sane so we can potentially | ||
1812 | * reuse it later. | ||
1813 | */ | ||
1814 | if (get_user(reserved, parent_tidptr)) | ||
1815 | return ERR_PTR(-EFAULT); | ||
1816 | |||
1817 | if (reserved != 0) | ||
1818 | return ERR_PTR(-EINVAL); | ||
1819 | } | ||
1820 | |||
1733 | /* | 1821 | /* |
1734 | * Force any signals received before this point to be delivered | 1822 | * Force any signals received before this point to be delivered |
1735 | * before the fork happens. Collect up signals sent to multiple | 1823 | * before the fork happens. Collect up signals sent to multiple |
@@ -1936,6 +2024,22 @@ static __latent_entropy struct task_struct *copy_process( | |||
1936 | } | 2024 | } |
1937 | } | 2025 | } |
1938 | 2026 | ||
2027 | /* | ||
2028 | * This has to happen after we've potentially unshared the file | ||
2029 | * descriptor table (so that the pidfd doesn't leak into the child | ||
2030 | * if the fd table isn't shared). | ||
2031 | */ | ||
2032 | if (clone_flags & CLONE_PIDFD) { | ||
2033 | retval = pidfd_create(pid); | ||
2034 | if (retval < 0) | ||
2035 | goto bad_fork_free_pid; | ||
2036 | |||
2037 | pidfd = retval; | ||
2038 | retval = put_user(pidfd, parent_tidptr); | ||
2039 | if (retval) | ||
2040 | goto bad_fork_put_pidfd; | ||
2041 | } | ||
2042 | |||
1939 | #ifdef CONFIG_BLOCK | 2043 | #ifdef CONFIG_BLOCK |
1940 | p->plug = NULL; | 2044 | p->plug = NULL; |
1941 | #endif | 2045 | #endif |
@@ -1996,7 +2100,7 @@ static __latent_entropy struct task_struct *copy_process( | |||
1996 | */ | 2100 | */ |
1997 | retval = cgroup_can_fork(p); | 2101 | retval = cgroup_can_fork(p); |
1998 | if (retval) | 2102 | if (retval) |
1999 | goto bad_fork_free_pid; | 2103 | goto bad_fork_put_pidfd; |
2000 | 2104 | ||
2001 | /* | 2105 | /* |
2002 | * From this point on we must avoid any synchronous user-space | 2106 | * From this point on we must avoid any synchronous user-space |
@@ -2111,6 +2215,9 @@ bad_fork_cancel_cgroup: | |||
2111 | spin_unlock(¤t->sighand->siglock); | 2215 | spin_unlock(¤t->sighand->siglock); |
2112 | write_unlock_irq(&tasklist_lock); | 2216 | write_unlock_irq(&tasklist_lock); |
2113 | cgroup_cancel_fork(p); | 2217 | cgroup_cancel_fork(p); |
2218 | bad_fork_put_pidfd: | ||
2219 | if (clone_flags & CLONE_PIDFD) | ||
2220 | ksys_close(pidfd); | ||
2114 | bad_fork_free_pid: | 2221 | bad_fork_free_pid: |
2115 | cgroup_threadgroup_change_end(current); | 2222 | cgroup_threadgroup_change_end(current); |
2116 | if (pid != &init_struct_pid) | 2223 | if (pid != &init_struct_pid) |
@@ -2176,7 +2283,7 @@ static inline void init_idle_pids(struct task_struct *idle) | |||
2176 | struct task_struct *fork_idle(int cpu) | 2283 | struct task_struct *fork_idle(int cpu) |
2177 | { | 2284 | { |
2178 | struct task_struct *task; | 2285 | struct task_struct *task; |
2179 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, | 2286 | task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0, |
2180 | cpu_to_node(cpu)); | 2287 | cpu_to_node(cpu)); |
2181 | if (!IS_ERR(task)) { | 2288 | if (!IS_ERR(task)) { |
2182 | init_idle_pids(task); | 2289 | init_idle_pids(task); |
@@ -2186,6 +2293,11 @@ struct task_struct *fork_idle(int cpu) | |||
2186 | return task; | 2293 | return task; |
2187 | } | 2294 | } |
2188 | 2295 | ||
2296 | struct mm_struct *copy_init_mm(void) | ||
2297 | { | ||
2298 | return dup_mm(NULL, &init_mm); | ||
2299 | } | ||
2300 | |||
2189 | /* | 2301 | /* |
2190 | * Ok, this is the main fork-routine. | 2302 | * Ok, this is the main fork-routine. |
2191 | * | 2303 | * |
@@ -2223,7 +2335,7 @@ long _do_fork(unsigned long clone_flags, | |||
2223 | trace = 0; | 2335 | trace = 0; |
2224 | } | 2336 | } |
2225 | 2337 | ||
2226 | p = copy_process(clone_flags, stack_start, stack_size, | 2338 | p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr, |
2227 | child_tidptr, NULL, trace, tls, NUMA_NO_NODE); | 2339 | child_tidptr, NULL, trace, tls, NUMA_NO_NODE); |
2228 | add_latent_entropy(); | 2340 | add_latent_entropy(); |
2229 | 2341 | ||