diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 89 |
1 files changed, 57 insertions, 32 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 389712ffc0ad..a31b823b3c2d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti) | |||
146 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 146 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
147 | int node) | 147 | int node) |
148 | { | 148 | { |
149 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, | 149 | struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, |
150 | THREAD_SIZE_ORDER); | 150 | THREAD_SIZE_ORDER); |
151 | 151 | ||
152 | return page ? page_address(page) : NULL; | 152 | return page ? page_address(page) : NULL; |
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | |||
154 | 154 | ||
155 | static inline void free_thread_info(struct thread_info *ti) | 155 | static inline void free_thread_info(struct thread_info *ti) |
156 | { | 156 | { |
157 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | 157 | free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); |
158 | } | 158 | } |
159 | # else | 159 | # else |
160 | static struct kmem_cache *thread_info_cache; | 160 | static struct kmem_cache *thread_info_cache; |
@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
352 | unsigned long charge; | 352 | unsigned long charge; |
353 | struct mempolicy *pol; | 353 | struct mempolicy *pol; |
354 | 354 | ||
355 | uprobe_start_dup_mmap(); | ||
355 | down_write(&oldmm->mmap_sem); | 356 | down_write(&oldmm->mmap_sem); |
356 | flush_cache_dup_mm(oldmm); | 357 | flush_cache_dup_mm(oldmm); |
357 | uprobe_dup_mmap(oldmm, mm); | 358 | uprobe_dup_mmap(oldmm, mm); |
@@ -469,6 +470,7 @@ out: | |||
469 | up_write(&mm->mmap_sem); | 470 | up_write(&mm->mmap_sem); |
470 | flush_tlb_mm(oldmm); | 471 | flush_tlb_mm(oldmm); |
471 | up_write(&oldmm->mmap_sem); | 472 | up_write(&oldmm->mmap_sem); |
473 | uprobe_end_dup_mmap(); | ||
472 | return retval; | 474 | return retval; |
473 | fail_nomem_anon_vma_fork: | 475 | fail_nomem_anon_vma_fork: |
474 | mpol_put(pol); | 476 | mpol_put(pol); |
@@ -821,6 +823,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
821 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 823 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
822 | mm->pmd_huge_pte = NULL; | 824 | mm->pmd_huge_pte = NULL; |
823 | #endif | 825 | #endif |
826 | #ifdef CONFIG_NUMA_BALANCING | ||
827 | mm->first_nid = NUMA_PTE_SCAN_INIT; | ||
828 | #endif | ||
824 | if (!mm_init(mm, tsk)) | 829 | if (!mm_init(mm, tsk)) |
825 | goto fail_nomem; | 830 | goto fail_nomem; |
826 | 831 | ||
@@ -1039,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1039 | atomic_set(&sig->live, 1); | 1044 | atomic_set(&sig->live, 1); |
1040 | atomic_set(&sig->sigcnt, 1); | 1045 | atomic_set(&sig->sigcnt, 1); |
1041 | init_waitqueue_head(&sig->wait_chldexit); | 1046 | init_waitqueue_head(&sig->wait_chldexit); |
1042 | if (clone_flags & CLONE_NEWPID) | ||
1043 | sig->flags |= SIGNAL_UNKILLABLE; | ||
1044 | sig->curr_target = tsk; | 1047 | sig->curr_target = tsk; |
1045 | init_sigpending(&sig->shared_pending); | 1048 | init_sigpending(&sig->shared_pending); |
1046 | INIT_LIST_HEAD(&sig->posix_timers); | 1049 | INIT_LIST_HEAD(&sig->posix_timers); |
@@ -1134,7 +1137,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1134 | { | 1137 | { |
1135 | int retval; | 1138 | int retval; |
1136 | struct task_struct *p; | 1139 | struct task_struct *p; |
1137 | int cgroup_callbacks_done = 0; | ||
1138 | 1140 | ||
1139 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) | 1141 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) |
1140 | return ERR_PTR(-EINVAL); | 1142 | return ERR_PTR(-EINVAL); |
@@ -1221,7 +1223,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1221 | p->utime = p->stime = p->gtime = 0; | 1223 | p->utime = p->stime = p->gtime = 0; |
1222 | p->utimescaled = p->stimescaled = 0; | 1224 | p->utimescaled = p->stimescaled = 0; |
1223 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 1225 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
1224 | p->prev_utime = p->prev_stime = 0; | 1226 | p->prev_cputime.utime = p->prev_cputime.stime = 0; |
1225 | #endif | 1227 | #endif |
1226 | #if defined(SPLIT_RSS_COUNTING) | 1228 | #if defined(SPLIT_RSS_COUNTING) |
1227 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); | 1229 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); |
@@ -1392,12 +1394,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1392 | INIT_LIST_HEAD(&p->thread_group); | 1394 | INIT_LIST_HEAD(&p->thread_group); |
1393 | p->task_works = NULL; | 1395 | p->task_works = NULL; |
1394 | 1396 | ||
1395 | /* Now that the task is set up, run cgroup callbacks if | ||
1396 | * necessary. We need to run them before the task is visible | ||
1397 | * on the tasklist. */ | ||
1398 | cgroup_fork_callbacks(p); | ||
1399 | cgroup_callbacks_done = 1; | ||
1400 | |||
1401 | /* Need tasklist lock for parent etc handling! */ | 1397 | /* Need tasklist lock for parent etc handling! */ |
1402 | write_lock_irq(&tasklist_lock); | 1398 | write_lock_irq(&tasklist_lock); |
1403 | 1399 | ||
@@ -1440,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1440 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); | 1436 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
1441 | 1437 | ||
1442 | if (thread_group_leader(p)) { | 1438 | if (thread_group_leader(p)) { |
1443 | if (is_child_reaper(pid)) | 1439 | if (is_child_reaper(pid)) { |
1444 | p->nsproxy->pid_ns->child_reaper = p; | 1440 | ns_of_pid(pid)->child_reaper = p; |
1441 | p->signal->flags |= SIGNAL_UNKILLABLE; | ||
1442 | } | ||
1445 | 1443 | ||
1446 | p->signal->leader_pid = pid; | 1444 | p->signal->leader_pid = pid; |
1447 | p->signal->tty = tty_kref_get(current->signal->tty); | 1445 | p->signal->tty = tty_kref_get(current->signal->tty); |
@@ -1475,8 +1473,6 @@ bad_fork_cleanup_io: | |||
1475 | if (p->io_context) | 1473 | if (p->io_context) |
1476 | exit_io_context(p); | 1474 | exit_io_context(p); |
1477 | bad_fork_cleanup_namespaces: | 1475 | bad_fork_cleanup_namespaces: |
1478 | if (unlikely(clone_flags & CLONE_NEWPID)) | ||
1479 | pid_ns_release_proc(p->nsproxy->pid_ns); | ||
1480 | exit_task_namespaces(p); | 1476 | exit_task_namespaces(p); |
1481 | bad_fork_cleanup_mm: | 1477 | bad_fork_cleanup_mm: |
1482 | if (p->mm) | 1478 | if (p->mm) |
@@ -1502,7 +1498,7 @@ bad_fork_cleanup_cgroup: | |||
1502 | #endif | 1498 | #endif |
1503 | if (clone_flags & CLONE_THREAD) | 1499 | if (clone_flags & CLONE_THREAD) |
1504 | threadgroup_change_end(current); | 1500 | threadgroup_change_end(current); |
1505 | cgroup_exit(p, cgroup_callbacks_done); | 1501 | cgroup_exit(p, 0); |
1506 | delayacct_tsk_free(p); | 1502 | delayacct_tsk_free(p); |
1507 | module_put(task_thread_info(p)->exec_domain->module); | 1503 | module_put(task_thread_info(p)->exec_domain->module); |
1508 | bad_fork_cleanup_count: | 1504 | bad_fork_cleanup_count: |
@@ -1556,15 +1552,9 @@ long do_fork(unsigned long clone_flags, | |||
1556 | * Do some preliminary argument and permissions checking before we | 1552 | * Do some preliminary argument and permissions checking before we |
1557 | * actually start allocating stuff | 1553 | * actually start allocating stuff |
1558 | */ | 1554 | */ |
1559 | if (clone_flags & CLONE_NEWUSER) { | 1555 | if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { |
1560 | if (clone_flags & CLONE_THREAD) | 1556 | if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) |
1561 | return -EINVAL; | 1557 | return -EINVAL; |
1562 | /* hopefully this check will go away when userns support is | ||
1563 | * complete | ||
1564 | */ | ||
1565 | if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || | ||
1566 | !capable(CAP_SETGID)) | ||
1567 | return -EPERM; | ||
1568 | } | 1558 | } |
1569 | 1559 | ||
1570 | /* | 1560 | /* |
@@ -1724,7 +1714,8 @@ static int check_unshare_flags(unsigned long unshare_flags) | |||
1724 | { | 1714 | { |
1725 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1715 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1726 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1716 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1727 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | 1717 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| |
1718 | CLONE_NEWUSER|CLONE_NEWPID)) | ||
1728 | return -EINVAL; | 1719 | return -EINVAL; |
1729 | /* | 1720 | /* |
1730 | * Not implemented, but pretend it works if there is nothing to | 1721 | * Not implemented, but pretend it works if there is nothing to |
@@ -1791,19 +1782,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1791 | { | 1782 | { |
1792 | struct fs_struct *fs, *new_fs = NULL; | 1783 | struct fs_struct *fs, *new_fs = NULL; |
1793 | struct files_struct *fd, *new_fd = NULL; | 1784 | struct files_struct *fd, *new_fd = NULL; |
1785 | struct cred *new_cred = NULL; | ||
1794 | struct nsproxy *new_nsproxy = NULL; | 1786 | struct nsproxy *new_nsproxy = NULL; |
1795 | int do_sysvsem = 0; | 1787 | int do_sysvsem = 0; |
1796 | int err; | 1788 | int err; |
1797 | 1789 | ||
1798 | err = check_unshare_flags(unshare_flags); | 1790 | /* |
1799 | if (err) | 1791 | * If unsharing a user namespace must also unshare the thread. |
1800 | goto bad_unshare_out; | 1792 | */ |
1801 | 1793 | if (unshare_flags & CLONE_NEWUSER) | |
1794 | unshare_flags |= CLONE_THREAD; | ||
1795 | /* | ||
1796 | * If unsharing a pid namespace must also unshare the thread. | ||
1797 | */ | ||
1798 | if (unshare_flags & CLONE_NEWPID) | ||
1799 | unshare_flags |= CLONE_THREAD; | ||
1800 | /* | ||
1801 | * If unsharing a thread from a thread group, must also unshare vm. | ||
1802 | */ | ||
1803 | if (unshare_flags & CLONE_THREAD) | ||
1804 | unshare_flags |= CLONE_VM; | ||
1805 | /* | ||
1806 | * If unsharing vm, must also unshare signal handlers. | ||
1807 | */ | ||
1808 | if (unshare_flags & CLONE_VM) | ||
1809 | unshare_flags |= CLONE_SIGHAND; | ||
1802 | /* | 1810 | /* |
1803 | * If unsharing namespace, must also unshare filesystem information. | 1811 | * If unsharing namespace, must also unshare filesystem information. |
1804 | */ | 1812 | */ |
1805 | if (unshare_flags & CLONE_NEWNS) | 1813 | if (unshare_flags & CLONE_NEWNS) |
1806 | unshare_flags |= CLONE_FS; | 1814 | unshare_flags |= CLONE_FS; |
1815 | |||
1816 | err = check_unshare_flags(unshare_flags); | ||
1817 | if (err) | ||
1818 | goto bad_unshare_out; | ||
1807 | /* | 1819 | /* |
1808 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1820 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1809 | * to a new ipc namespace, the semaphore arrays from the old | 1821 | * to a new ipc namespace, the semaphore arrays from the old |
@@ -1817,11 +1829,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1817 | err = unshare_fd(unshare_flags, &new_fd); | 1829 | err = unshare_fd(unshare_flags, &new_fd); |
1818 | if (err) | 1830 | if (err) |
1819 | goto bad_unshare_cleanup_fs; | 1831 | goto bad_unshare_cleanup_fs; |
1820 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); | 1832 | err = unshare_userns(unshare_flags, &new_cred); |
1821 | if (err) | 1833 | if (err) |
1822 | goto bad_unshare_cleanup_fd; | 1834 | goto bad_unshare_cleanup_fd; |
1835 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | ||
1836 | new_cred, new_fs); | ||
1837 | if (err) | ||
1838 | goto bad_unshare_cleanup_cred; | ||
1823 | 1839 | ||
1824 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { | 1840 | if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { |
1825 | if (do_sysvsem) { | 1841 | if (do_sysvsem) { |
1826 | /* | 1842 | /* |
1827 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1843 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1854,11 +1870,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1854 | } | 1870 | } |
1855 | 1871 | ||
1856 | task_unlock(current); | 1872 | task_unlock(current); |
1873 | |||
1874 | if (new_cred) { | ||
1875 | /* Install the new user namespace */ | ||
1876 | commit_creds(new_cred); | ||
1877 | new_cred = NULL; | ||
1878 | } | ||
1857 | } | 1879 | } |
1858 | 1880 | ||
1859 | if (new_nsproxy) | 1881 | if (new_nsproxy) |
1860 | put_nsproxy(new_nsproxy); | 1882 | put_nsproxy(new_nsproxy); |
1861 | 1883 | ||
1884 | bad_unshare_cleanup_cred: | ||
1885 | if (new_cred) | ||
1886 | put_cred(new_cred); | ||
1862 | bad_unshare_cleanup_fd: | 1887 | bad_unshare_cleanup_fd: |
1863 | if (new_fd) | 1888 | if (new_fd) |
1864 | put_files_struct(new_fd); | 1889 | put_files_struct(new_fd); |