diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 107 |
1 files changed, 103 insertions, 4 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index fbe9dfcd8680..8b03d93ba068 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -11,6 +11,7 @@ | |||
11 | * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' | 11 | * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/anon_inodes.h> | ||
14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
15 | #include <linux/sched/autogroup.h> | 16 | #include <linux/sched/autogroup.h> |
16 | #include <linux/sched/mm.h> | 17 | #include <linux/sched/mm.h> |
@@ -21,6 +22,7 @@ | |||
21 | #include <linux/sched/task.h> | 22 | #include <linux/sched/task.h> |
22 | #include <linux/sched/task_stack.h> | 23 | #include <linux/sched/task_stack.h> |
23 | #include <linux/sched/cputime.h> | 24 | #include <linux/sched/cputime.h> |
25 | #include <linux/seq_file.h> | ||
24 | #include <linux/rtmutex.h> | 26 | #include <linux/rtmutex.h> |
25 | #include <linux/init.h> | 27 | #include <linux/init.h> |
26 | #include <linux/unistd.h> | 28 | #include <linux/unistd.h> |
@@ -1670,6 +1672,58 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1670 | #endif /* #ifdef CONFIG_TASKS_RCU */ | 1672 | #endif /* #ifdef CONFIG_TASKS_RCU */ |
1671 | } | 1673 | } |
1672 | 1674 | ||
1675 | static int pidfd_release(struct inode *inode, struct file *file) | ||
1676 | { | ||
1677 | struct pid *pid = file->private_data; | ||
1678 | |||
1679 | file->private_data = NULL; | ||
1680 | put_pid(pid); | ||
1681 | return 0; | ||
1682 | } | ||
1683 | |||
1684 | #ifdef CONFIG_PROC_FS | ||
1685 | static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) | ||
1686 | { | ||
1687 | struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); | ||
1688 | struct pid *pid = f->private_data; | ||
1689 | |||
1690 | seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); | ||
1691 | seq_putc(m, '\n'); | ||
1692 | } | ||
1693 | #endif | ||
1694 | |||
1695 | const struct file_operations pidfd_fops = { | ||
1696 | .release = pidfd_release, | ||
1697 | #ifdef CONFIG_PROC_FS | ||
1698 | .show_fdinfo = pidfd_show_fdinfo, | ||
1699 | #endif | ||
1700 | }; | ||
1701 | |||
1702 | /** | ||
1703 | * pidfd_create() - Create a new pid file descriptor. | ||
1704 | * | ||
1705 | * @pid: struct pid that the pidfd will reference | ||
1706 | * | ||
1707 | * This creates a new pid file descriptor with the O_CLOEXEC flag set. | ||
1708 | * | ||
1709 | * Note, that this function can only be called after the fd table has | ||
1710 | * been unshared to avoid leaking the pidfd to the new process. | ||
1711 | * | ||
1712 | * Return: On success, a cloexec pidfd is returned. | ||
1713 | * On error, a negative errno number will be returned. | ||
1714 | */ | ||
1715 | static int pidfd_create(struct pid *pid) | ||
1716 | { | ||
1717 | int fd; | ||
1718 | |||
1719 | fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), | ||
1720 | O_RDWR | O_CLOEXEC); | ||
1721 | if (fd < 0) | ||
1722 | put_pid(pid); | ||
1723 | |||
1724 | return fd; | ||
1725 | } | ||
1726 | |||
1673 | /* | 1727 | /* |
1674 | * This creates a new process as a copy of the old one, | 1728 | * This creates a new process as a copy of the old one, |
1675 | * but does not actually start it yet. | 1729 | * but does not actually start it yet. |
@@ -1682,13 +1736,14 @@ static __latent_entropy struct task_struct *copy_process( | |||
1682 | unsigned long clone_flags, | 1736 | unsigned long clone_flags, |
1683 | unsigned long stack_start, | 1737 | unsigned long stack_start, |
1684 | unsigned long stack_size, | 1738 | unsigned long stack_size, |
1739 | int __user *parent_tidptr, | ||
1685 | int __user *child_tidptr, | 1740 | int __user *child_tidptr, |
1686 | struct pid *pid, | 1741 | struct pid *pid, |
1687 | int trace, | 1742 | int trace, |
1688 | unsigned long tls, | 1743 | unsigned long tls, |
1689 | int node) | 1744 | int node) |
1690 | { | 1745 | { |
1691 | int retval; | 1746 | int pidfd = -1, retval; |
1692 | struct task_struct *p; | 1747 | struct task_struct *p; |
1693 | struct multiprocess_signals delayed; | 1748 | struct multiprocess_signals delayed; |
1694 | 1749 | ||
@@ -1738,6 +1793,31 @@ static __latent_entropy struct task_struct *copy_process( | |||
1738 | return ERR_PTR(-EINVAL); | 1793 | return ERR_PTR(-EINVAL); |
1739 | } | 1794 | } |
1740 | 1795 | ||
1796 | if (clone_flags & CLONE_PIDFD) { | ||
1797 | int reserved; | ||
1798 | |||
1799 | /* | ||
1800 | * - CLONE_PARENT_SETTID is useless for pidfds and also | ||
1801 | * parent_tidptr is used to return pidfds. | ||
1802 | * - CLONE_DETACHED is blocked so that we can potentially | ||
1803 | * reuse it later for CLONE_PIDFD. | ||
1804 | * - CLONE_THREAD is blocked until someone really needs it. | ||
1805 | */ | ||
1806 | if (clone_flags & | ||
1807 | (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) | ||
1808 | return ERR_PTR(-EINVAL); | ||
1809 | |||
1810 | /* | ||
1811 | * Verify that parent_tidptr is sane so we can potentially | ||
1812 | * reuse it later. | ||
1813 | */ | ||
1814 | if (get_user(reserved, parent_tidptr)) | ||
1815 | return ERR_PTR(-EFAULT); | ||
1816 | |||
1817 | if (reserved != 0) | ||
1818 | return ERR_PTR(-EINVAL); | ||
1819 | } | ||
1820 | |||
1741 | /* | 1821 | /* |
1742 | * Force any signals received before this point to be delivered | 1822 | * Force any signals received before this point to be delivered |
1743 | * before the fork happens. Collect up signals sent to multiple | 1823 | * before the fork happens. Collect up signals sent to multiple |
@@ -1944,6 +2024,22 @@ static __latent_entropy struct task_struct *copy_process( | |||
1944 | } | 2024 | } |
1945 | } | 2025 | } |
1946 | 2026 | ||
2027 | /* | ||
2028 | * This has to happen after we've potentially unshared the file | ||
2029 | * descriptor table (so that the pidfd doesn't leak into the child | ||
2030 | * if the fd table isn't shared). | ||
2031 | */ | ||
2032 | if (clone_flags & CLONE_PIDFD) { | ||
2033 | retval = pidfd_create(pid); | ||
2034 | if (retval < 0) | ||
2035 | goto bad_fork_free_pid; | ||
2036 | |||
2037 | pidfd = retval; | ||
2038 | retval = put_user(pidfd, parent_tidptr); | ||
2039 | if (retval) | ||
2040 | goto bad_fork_put_pidfd; | ||
2041 | } | ||
2042 | |||
1947 | #ifdef CONFIG_BLOCK | 2043 | #ifdef CONFIG_BLOCK |
1948 | p->plug = NULL; | 2044 | p->plug = NULL; |
1949 | #endif | 2045 | #endif |
@@ -2004,7 +2100,7 @@ static __latent_entropy struct task_struct *copy_process( | |||
2004 | */ | 2100 | */ |
2005 | retval = cgroup_can_fork(p); | 2101 | retval = cgroup_can_fork(p); |
2006 | if (retval) | 2102 | if (retval) |
2007 | goto bad_fork_free_pid; | 2103 | goto bad_fork_put_pidfd; |
2008 | 2104 | ||
2009 | /* | 2105 | /* |
2010 | * From this point on we must avoid any synchronous user-space | 2106 | * From this point on we must avoid any synchronous user-space |
@@ -2119,6 +2215,9 @@ bad_fork_cancel_cgroup: | |||
2119 | spin_unlock(¤t->sighand->siglock); | 2215 | spin_unlock(¤t->sighand->siglock); |
2120 | write_unlock_irq(&tasklist_lock); | 2216 | write_unlock_irq(&tasklist_lock); |
2121 | cgroup_cancel_fork(p); | 2217 | cgroup_cancel_fork(p); |
2218 | bad_fork_put_pidfd: | ||
2219 | if (clone_flags & CLONE_PIDFD) | ||
2220 | ksys_close(pidfd); | ||
2122 | bad_fork_free_pid: | 2221 | bad_fork_free_pid: |
2123 | cgroup_threadgroup_change_end(current); | 2222 | cgroup_threadgroup_change_end(current); |
2124 | if (pid != &init_struct_pid) | 2223 | if (pid != &init_struct_pid) |
@@ -2184,7 +2283,7 @@ static inline void init_idle_pids(struct task_struct *idle) | |||
2184 | struct task_struct *fork_idle(int cpu) | 2283 | struct task_struct *fork_idle(int cpu) |
2185 | { | 2284 | { |
2186 | struct task_struct *task; | 2285 | struct task_struct *task; |
2187 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, | 2286 | task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0, |
2188 | cpu_to_node(cpu)); | 2287 | cpu_to_node(cpu)); |
2189 | if (!IS_ERR(task)) { | 2288 | if (!IS_ERR(task)) { |
2190 | init_idle_pids(task); | 2289 | init_idle_pids(task); |
@@ -2236,7 +2335,7 @@ long _do_fork(unsigned long clone_flags, | |||
2236 | trace = 0; | 2335 | trace = 0; |
2237 | } | 2336 | } |
2238 | 2337 | ||
2239 | p = copy_process(clone_flags, stack_start, stack_size, | 2338 | p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr, |
2240 | child_tidptr, NULL, trace, tls, NUMA_NO_NODE); | 2339 | child_tidptr, NULL, trace, tls, NUMA_NO_NODE); |
2241 | add_latent_entropy(); | 2340 | add_latent_entropy(); |
2242 | 2341 | ||