aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c161
1 files changed, 112 insertions, 49 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..65ca6d27f24e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
146static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, 146static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
147 int node) 147 int node)
148{ 148{
149 struct page *page = alloc_pages_node(node, THREADINFO_GFP, 149 struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
150 THREAD_SIZE_ORDER); 150 THREAD_SIZE_ORDER);
151 151
152 return page ? page_address(page) : NULL; 152 return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
154 154
155static inline void free_thread_info(struct thread_info *ti) 155static inline void free_thread_info(struct thread_info *ti)
156{ 156{
157 free_pages((unsigned long)ti, THREAD_SIZE_ORDER); 157 free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
158} 158}
159# else 159# else
160static struct kmem_cache *thread_info_cache; 160static struct kmem_cache *thread_info_cache;
@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
352 unsigned long charge; 352 unsigned long charge;
353 struct mempolicy *pol; 353 struct mempolicy *pol;
354 354
355 uprobe_start_dup_mmap();
355 down_write(&oldmm->mmap_sem); 356 down_write(&oldmm->mmap_sem);
356 flush_cache_dup_mm(oldmm); 357 flush_cache_dup_mm(oldmm);
357 uprobe_dup_mmap(oldmm, mm); 358 uprobe_dup_mmap(oldmm, mm);
@@ -469,6 +470,7 @@ out:
469 up_write(&mm->mmap_sem); 470 up_write(&mm->mmap_sem);
470 flush_tlb_mm(oldmm); 471 flush_tlb_mm(oldmm);
471 up_write(&oldmm->mmap_sem); 472 up_write(&oldmm->mmap_sem);
473 uprobe_end_dup_mmap();
472 return retval; 474 return retval;
473fail_nomem_anon_vma_fork: 475fail_nomem_anon_vma_fork:
474 mpol_put(pol); 476 mpol_put(pol);
@@ -821,6 +823,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
821#ifdef CONFIG_TRANSPARENT_HUGEPAGE 823#ifdef CONFIG_TRANSPARENT_HUGEPAGE
822 mm->pmd_huge_pte = NULL; 824 mm->pmd_huge_pte = NULL;
823#endif 825#endif
826#ifdef CONFIG_NUMA_BALANCING
827 mm->first_nid = NUMA_PTE_SCAN_INIT;
828#endif
824 if (!mm_init(mm, tsk)) 829 if (!mm_init(mm, tsk))
825 goto fail_nomem; 830 goto fail_nomem;
826 831
@@ -1039,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1039 atomic_set(&sig->live, 1); 1044 atomic_set(&sig->live, 1);
1040 atomic_set(&sig->sigcnt, 1); 1045 atomic_set(&sig->sigcnt, 1);
1041 init_waitqueue_head(&sig->wait_chldexit); 1046 init_waitqueue_head(&sig->wait_chldexit);
1042 if (clone_flags & CLONE_NEWPID)
1043 sig->flags |= SIGNAL_UNKILLABLE;
1044 sig->curr_target = tsk; 1047 sig->curr_target = tsk;
1045 init_sigpending(&sig->shared_pending); 1048 init_sigpending(&sig->shared_pending);
1046 INIT_LIST_HEAD(&sig->posix_timers); 1049 INIT_LIST_HEAD(&sig->posix_timers);
@@ -1127,7 +1130,6 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
1127 */ 1130 */
1128static struct task_struct *copy_process(unsigned long clone_flags, 1131static struct task_struct *copy_process(unsigned long clone_flags,
1129 unsigned long stack_start, 1132 unsigned long stack_start,
1130 struct pt_regs *regs,
1131 unsigned long stack_size, 1133 unsigned long stack_size,
1132 int __user *child_tidptr, 1134 int __user *child_tidptr,
1133 struct pid *pid, 1135 struct pid *pid,
@@ -1135,7 +1137,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1135{ 1137{
1136 int retval; 1138 int retval;
1137 struct task_struct *p; 1139 struct task_struct *p;
1138 int cgroup_callbacks_done = 0;
1139 1140
1140 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 1141 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
1141 return ERR_PTR(-EINVAL); 1142 return ERR_PTR(-EINVAL);
@@ -1165,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1165 current->signal->flags & SIGNAL_UNKILLABLE) 1166 current->signal->flags & SIGNAL_UNKILLABLE)
1166 return ERR_PTR(-EINVAL); 1167 return ERR_PTR(-EINVAL);
1167 1168
1169 /*
1170 * If the new process will be in a different pid namespace
1171 * don't allow the creation of threads.
1172 */
1173 if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
1174 (task_active_pid_ns(current) != current->nsproxy->pid_ns))
1175 return ERR_PTR(-EINVAL);
1176
1168 retval = security_task_create(clone_flags); 1177 retval = security_task_create(clone_flags);
1169 if (retval) 1178 if (retval)
1170 goto fork_out; 1179 goto fork_out;
@@ -1222,7 +1231,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1222 p->utime = p->stime = p->gtime = 0; 1231 p->utime = p->stime = p->gtime = 0;
1223 p->utimescaled = p->stimescaled = 0; 1232 p->utimescaled = p->stimescaled = 0;
1224#ifndef CONFIG_VIRT_CPU_ACCOUNTING 1233#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1225 p->prev_utime = p->prev_stime = 0; 1234 p->prev_cputime.utime = p->prev_cputime.stime = 0;
1226#endif 1235#endif
1227#if defined(SPLIT_RSS_COUNTING) 1236#if defined(SPLIT_RSS_COUNTING)
1228 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); 1237 memset(&p->rss_stat, 0, sizeof(p->rss_stat));
@@ -1320,7 +1329,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1320 retval = copy_io(clone_flags, p); 1329 retval = copy_io(clone_flags, p);
1321 if (retval) 1330 if (retval)
1322 goto bad_fork_cleanup_namespaces; 1331 goto bad_fork_cleanup_namespaces;
1323 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); 1332 retval = copy_thread(clone_flags, stack_start, stack_size, p);
1324 if (retval) 1333 if (retval)
1325 goto bad_fork_cleanup_io; 1334 goto bad_fork_cleanup_io;
1326 1335
@@ -1393,12 +1402,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1393 INIT_LIST_HEAD(&p->thread_group); 1402 INIT_LIST_HEAD(&p->thread_group);
1394 p->task_works = NULL; 1403 p->task_works = NULL;
1395 1404
1396 /* Now that the task is set up, run cgroup callbacks if
1397 * necessary. We need to run them before the task is visible
1398 * on the tasklist. */
1399 cgroup_fork_callbacks(p);
1400 cgroup_callbacks_done = 1;
1401
1402 /* Need tasklist lock for parent etc handling! */ 1405 /* Need tasklist lock for parent etc handling! */
1403 write_lock_irq(&tasklist_lock); 1406 write_lock_irq(&tasklist_lock);
1404 1407
@@ -1441,8 +1444,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1441 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 1444 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
1442 1445
1443 if (thread_group_leader(p)) { 1446 if (thread_group_leader(p)) {
1444 if (is_child_reaper(pid)) 1447 if (is_child_reaper(pid)) {
1445 p->nsproxy->pid_ns->child_reaper = p; 1448 ns_of_pid(pid)->child_reaper = p;
1449 p->signal->flags |= SIGNAL_UNKILLABLE;
1450 }
1446 1451
1447 p->signal->leader_pid = pid; 1452 p->signal->leader_pid = pid;
1448 p->signal->tty = tty_kref_get(current->signal->tty); 1453 p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1476,8 +1481,6 @@ bad_fork_cleanup_io:
1476 if (p->io_context) 1481 if (p->io_context)
1477 exit_io_context(p); 1482 exit_io_context(p);
1478bad_fork_cleanup_namespaces: 1483bad_fork_cleanup_namespaces:
1479 if (unlikely(clone_flags & CLONE_NEWPID))
1480 pid_ns_release_proc(p->nsproxy->pid_ns);
1481 exit_task_namespaces(p); 1484 exit_task_namespaces(p);
1482bad_fork_cleanup_mm: 1485bad_fork_cleanup_mm:
1483 if (p->mm) 1486 if (p->mm)
@@ -1503,7 +1506,7 @@ bad_fork_cleanup_cgroup:
1503#endif 1506#endif
1504 if (clone_flags & CLONE_THREAD) 1507 if (clone_flags & CLONE_THREAD)
1505 threadgroup_change_end(current); 1508 threadgroup_change_end(current);
1506 cgroup_exit(p, cgroup_callbacks_done); 1509 cgroup_exit(p, 0);
1507 delayacct_tsk_free(p); 1510 delayacct_tsk_free(p);
1508 module_put(task_thread_info(p)->exec_domain->module); 1511 module_put(task_thread_info(p)->exec_domain->module);
1509bad_fork_cleanup_count: 1512bad_fork_cleanup_count:
@@ -1515,12 +1518,6 @@ fork_out:
1515 return ERR_PTR(retval); 1518 return ERR_PTR(retval);
1516} 1519}
1517 1520
1518noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1519{
1520 memset(regs, 0, sizeof(struct pt_regs));
1521 return regs;
1522}
1523
1524static inline void init_idle_pids(struct pid_link *links) 1521static inline void init_idle_pids(struct pid_link *links)
1525{ 1522{
1526 enum pid_type type; 1523 enum pid_type type;
@@ -1534,10 +1531,7 @@ static inline void init_idle_pids(struct pid_link *links)
1534struct task_struct * __cpuinit fork_idle(int cpu) 1531struct task_struct * __cpuinit fork_idle(int cpu)
1535{ 1532{
1536 struct task_struct *task; 1533 struct task_struct *task;
1537 struct pt_regs regs; 1534 task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
1538
1539 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1540 &init_struct_pid, 0);
1541 if (!IS_ERR(task)) { 1535 if (!IS_ERR(task)) {
1542 init_idle_pids(task->pids); 1536 init_idle_pids(task->pids);
1543 init_idle(task, cpu); 1537 init_idle(task, cpu);
@@ -1554,7 +1548,6 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1554 */ 1548 */
1555long do_fork(unsigned long clone_flags, 1549long do_fork(unsigned long clone_flags,
1556 unsigned long stack_start, 1550 unsigned long stack_start,
1557 struct pt_regs *regs,
1558 unsigned long stack_size, 1551 unsigned long stack_size,
1559 int __user *parent_tidptr, 1552 int __user *parent_tidptr,
1560 int __user *child_tidptr) 1553 int __user *child_tidptr)
@@ -1567,15 +1560,9 @@ long do_fork(unsigned long clone_flags,
1567 * Do some preliminary argument and permissions checking before we 1560 * Do some preliminary argument and permissions checking before we
1568 * actually start allocating stuff 1561 * actually start allocating stuff
1569 */ 1562 */
1570 if (clone_flags & CLONE_NEWUSER) { 1563 if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
1571 if (clone_flags & CLONE_THREAD) 1564 if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
1572 return -EINVAL; 1565 return -EINVAL;
1573 /* hopefully this check will go away when userns support is
1574 * complete
1575 */
1576 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
1577 !capable(CAP_SETGID))
1578 return -EPERM;
1579 } 1566 }
1580 1567
1581 /* 1568 /*
@@ -1584,7 +1571,7 @@ long do_fork(unsigned long clone_flags,
1584 * requested, no event is reported; otherwise, report if the event 1571 * requested, no event is reported; otherwise, report if the event
1585 * for the type of forking is enabled. 1572 * for the type of forking is enabled.
1586 */ 1573 */
1587 if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) { 1574 if (!(clone_flags & CLONE_UNTRACED)) {
1588 if (clone_flags & CLONE_VFORK) 1575 if (clone_flags & CLONE_VFORK)
1589 trace = PTRACE_EVENT_VFORK; 1576 trace = PTRACE_EVENT_VFORK;
1590 else if ((clone_flags & CSIGNAL) != SIGCHLD) 1577 else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1596,7 +1583,7 @@ long do_fork(unsigned long clone_flags,
1596 trace = 0; 1583 trace = 0;
1597 } 1584 }
1598 1585
1599 p = copy_process(clone_flags, stack_start, regs, stack_size, 1586 p = copy_process(clone_flags, stack_start, stack_size,
1600 child_tidptr, NULL, trace); 1587 child_tidptr, NULL, trace);
1601 /* 1588 /*
1602 * Do this prior waking up the new thread - the thread pointer 1589 * Do this prior waking up the new thread - the thread pointer
@@ -1634,15 +1621,56 @@ long do_fork(unsigned long clone_flags,
1634 return nr; 1621 return nr;
1635} 1622}
1636 1623
1637#ifdef CONFIG_GENERIC_KERNEL_THREAD
1638/* 1624/*
1639 * Create a kernel thread. 1625 * Create a kernel thread.
1640 */ 1626 */
1641pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) 1627pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1642{ 1628{
1643 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL, 1629 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
1644 (unsigned long)arg, NULL, NULL); 1630 (unsigned long)arg, NULL, NULL);
1645} 1631}
1632
1633#ifdef __ARCH_WANT_SYS_FORK
1634SYSCALL_DEFINE0(fork)
1635{
1636#ifdef CONFIG_MMU
1637 return do_fork(SIGCHLD, 0, 0, NULL, NULL);
1638#else
1639 /* can not support in nommu mode */
1640 return(-EINVAL);
1641#endif
1642}
1643#endif
1644
1645#ifdef __ARCH_WANT_SYS_VFORK
1646SYSCALL_DEFINE0(vfork)
1647{
1648 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
1649 0, NULL, NULL);
1650}
1651#endif
1652
1653#ifdef __ARCH_WANT_SYS_CLONE
1654#ifdef CONFIG_CLONE_BACKWARDS
1655SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1656 int __user *, parent_tidptr,
1657 int, tls_val,
1658 int __user *, child_tidptr)
1659#elif defined(CONFIG_CLONE_BACKWARDS2)
1660SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1661 int __user *, parent_tidptr,
1662 int __user *, child_tidptr,
1663 int, tls_val)
1664#else
1665SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1666 int __user *, parent_tidptr,
1667 int __user *, child_tidptr,
1668 int, tls_val)
1669#endif
1670{
1671 return do_fork(clone_flags, newsp, 0,
1672 parent_tidptr, child_tidptr);
1673}
1646#endif 1674#endif
1647 1675
1648#ifndef ARCH_MIN_MMSTRUCT_ALIGN 1676#ifndef ARCH_MIN_MMSTRUCT_ALIGN
@@ -1694,7 +1722,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
1694{ 1722{
1695 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1723 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1696 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1724 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1697 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) 1725 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
1726 CLONE_NEWUSER|CLONE_NEWPID))
1698 return -EINVAL; 1727 return -EINVAL;
1699 /* 1728 /*
1700 * Not implemented, but pretend it works if there is nothing to 1729 * Not implemented, but pretend it works if there is nothing to
@@ -1761,19 +1790,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1761{ 1790{
1762 struct fs_struct *fs, *new_fs = NULL; 1791 struct fs_struct *fs, *new_fs = NULL;
1763 struct files_struct *fd, *new_fd = NULL; 1792 struct files_struct *fd, *new_fd = NULL;
1793 struct cred *new_cred = NULL;
1764 struct nsproxy *new_nsproxy = NULL; 1794 struct nsproxy *new_nsproxy = NULL;
1765 int do_sysvsem = 0; 1795 int do_sysvsem = 0;
1766 int err; 1796 int err;
1767 1797
1768 err = check_unshare_flags(unshare_flags); 1798 /*
1769 if (err) 1799 * If unsharing a user namespace must also unshare the thread.
1770 goto bad_unshare_out; 1800 */
1771 1801 if (unshare_flags & CLONE_NEWUSER)
1802 unshare_flags |= CLONE_THREAD;
1803 /*
1804 * If unsharing a pid namespace must also unshare the thread.
1805 */
1806 if (unshare_flags & CLONE_NEWPID)
1807 unshare_flags |= CLONE_THREAD;
1808 /*
1809 * If unsharing a thread from a thread group, must also unshare vm.
1810 */
1811 if (unshare_flags & CLONE_THREAD)
1812 unshare_flags |= CLONE_VM;
1813 /*
1814 * If unsharing vm, must also unshare signal handlers.
1815 */
1816 if (unshare_flags & CLONE_VM)
1817 unshare_flags |= CLONE_SIGHAND;
1772 /* 1818 /*
1773 * If unsharing namespace, must also unshare filesystem information. 1819 * If unsharing namespace, must also unshare filesystem information.
1774 */ 1820 */
1775 if (unshare_flags & CLONE_NEWNS) 1821 if (unshare_flags & CLONE_NEWNS)
1776 unshare_flags |= CLONE_FS; 1822 unshare_flags |= CLONE_FS;
1823
1824 err = check_unshare_flags(unshare_flags);
1825 if (err)
1826 goto bad_unshare_out;
1777 /* 1827 /*
1778 * CLONE_NEWIPC must also detach from the undolist: after switching 1828 * CLONE_NEWIPC must also detach from the undolist: after switching
1779 * to a new ipc namespace, the semaphore arrays from the old 1829 * to a new ipc namespace, the semaphore arrays from the old
@@ -1787,11 +1837,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1787 err = unshare_fd(unshare_flags, &new_fd); 1837 err = unshare_fd(unshare_flags, &new_fd);
1788 if (err) 1838 if (err)
1789 goto bad_unshare_cleanup_fs; 1839 goto bad_unshare_cleanup_fs;
1790 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); 1840 err = unshare_userns(unshare_flags, &new_cred);
1791 if (err) 1841 if (err)
1792 goto bad_unshare_cleanup_fd; 1842 goto bad_unshare_cleanup_fd;
1843 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1844 new_cred, new_fs);
1845 if (err)
1846 goto bad_unshare_cleanup_cred;
1793 1847
1794 if (new_fs || new_fd || do_sysvsem || new_nsproxy) { 1848 if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
1795 if (do_sysvsem) { 1849 if (do_sysvsem) {
1796 /* 1850 /*
1797 * CLONE_SYSVSEM is equivalent to sys_exit(). 1851 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1824,11 +1878,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1824 } 1878 }
1825 1879
1826 task_unlock(current); 1880 task_unlock(current);
1881
1882 if (new_cred) {
1883 /* Install the new user namespace */
1884 commit_creds(new_cred);
1885 new_cred = NULL;
1886 }
1827 } 1887 }
1828 1888
1829 if (new_nsproxy) 1889 if (new_nsproxy)
1830 put_nsproxy(new_nsproxy); 1890 put_nsproxy(new_nsproxy);
1831 1891
1892bad_unshare_cleanup_cred:
1893 if (new_cred)
1894 put_cred(new_cred);
1832bad_unshare_cleanup_fd: 1895bad_unshare_cleanup_fd:
1833 if (new_fd) 1896 if (new_fd)
1834 put_files_struct(new_fd); 1897 put_files_struct(new_fd);