diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 149 |
1 files changed, 103 insertions, 46 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 8b20ab7d3aa2..c36c4e301efe 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
352 | unsigned long charge; | 352 | unsigned long charge; |
353 | struct mempolicy *pol; | 353 | struct mempolicy *pol; |
354 | 354 | ||
355 | uprobe_start_dup_mmap(); | ||
355 | down_write(&oldmm->mmap_sem); | 356 | down_write(&oldmm->mmap_sem); |
356 | flush_cache_dup_mm(oldmm); | 357 | flush_cache_dup_mm(oldmm); |
357 | uprobe_dup_mmap(oldmm, mm); | 358 | uprobe_dup_mmap(oldmm, mm); |
@@ -469,6 +470,7 @@ out: | |||
469 | up_write(&mm->mmap_sem); | 470 | up_write(&mm->mmap_sem); |
470 | flush_tlb_mm(oldmm); | 471 | flush_tlb_mm(oldmm); |
471 | up_write(&oldmm->mmap_sem); | 472 | up_write(&oldmm->mmap_sem); |
473 | uprobe_end_dup_mmap(); | ||
472 | return retval; | 474 | return retval; |
473 | fail_nomem_anon_vma_fork: | 475 | fail_nomem_anon_vma_fork: |
474 | mpol_put(pol); | 476 | mpol_put(pol); |
@@ -821,6 +823,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
821 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 823 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
822 | mm->pmd_huge_pte = NULL; | 824 | mm->pmd_huge_pte = NULL; |
823 | #endif | 825 | #endif |
826 | #ifdef CONFIG_NUMA_BALANCING | ||
827 | mm->first_nid = NUMA_PTE_SCAN_INIT; | ||
828 | #endif | ||
824 | if (!mm_init(mm, tsk)) | 829 | if (!mm_init(mm, tsk)) |
825 | goto fail_nomem; | 830 | goto fail_nomem; |
826 | 831 | ||
@@ -1039,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1039 | atomic_set(&sig->live, 1); | 1044 | atomic_set(&sig->live, 1); |
1040 | atomic_set(&sig->sigcnt, 1); | 1045 | atomic_set(&sig->sigcnt, 1); |
1041 | init_waitqueue_head(&sig->wait_chldexit); | 1046 | init_waitqueue_head(&sig->wait_chldexit); |
1042 | if (clone_flags & CLONE_NEWPID) | ||
1043 | sig->flags |= SIGNAL_UNKILLABLE; | ||
1044 | sig->curr_target = tsk; | 1047 | sig->curr_target = tsk; |
1045 | init_sigpending(&sig->shared_pending); | 1048 | init_sigpending(&sig->shared_pending); |
1046 | INIT_LIST_HEAD(&sig->posix_timers); | 1049 | INIT_LIST_HEAD(&sig->posix_timers); |
@@ -1127,7 +1130,6 @@ static void posix_cpu_timers_init(struct task_struct *tsk) | |||
1127 | */ | 1130 | */ |
1128 | static struct task_struct *copy_process(unsigned long clone_flags, | 1131 | static struct task_struct *copy_process(unsigned long clone_flags, |
1129 | unsigned long stack_start, | 1132 | unsigned long stack_start, |
1130 | struct pt_regs *regs, | ||
1131 | unsigned long stack_size, | 1133 | unsigned long stack_size, |
1132 | int __user *child_tidptr, | 1134 | int __user *child_tidptr, |
1133 | struct pid *pid, | 1135 | struct pid *pid, |
@@ -1135,7 +1137,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1135 | { | 1137 | { |
1136 | int retval; | 1138 | int retval; |
1137 | struct task_struct *p; | 1139 | struct task_struct *p; |
1138 | int cgroup_callbacks_done = 0; | ||
1139 | 1140 | ||
1140 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) | 1141 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) |
1141 | return ERR_PTR(-EINVAL); | 1142 | return ERR_PTR(-EINVAL); |
@@ -1222,7 +1223,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1222 | p->utime = p->stime = p->gtime = 0; | 1223 | p->utime = p->stime = p->gtime = 0; |
1223 | p->utimescaled = p->stimescaled = 0; | 1224 | p->utimescaled = p->stimescaled = 0; |
1224 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 1225 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
1225 | p->prev_utime = p->prev_stime = 0; | 1226 | p->prev_cputime.utime = p->prev_cputime.stime = 0; |
1226 | #endif | 1227 | #endif |
1227 | #if defined(SPLIT_RSS_COUNTING) | 1228 | #if defined(SPLIT_RSS_COUNTING) |
1228 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); | 1229 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); |
@@ -1320,7 +1321,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1320 | retval = copy_io(clone_flags, p); | 1321 | retval = copy_io(clone_flags, p); |
1321 | if (retval) | 1322 | if (retval) |
1322 | goto bad_fork_cleanup_namespaces; | 1323 | goto bad_fork_cleanup_namespaces; |
1323 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); | 1324 | retval = copy_thread(clone_flags, stack_start, stack_size, p); |
1324 | if (retval) | 1325 | if (retval) |
1325 | goto bad_fork_cleanup_io; | 1326 | goto bad_fork_cleanup_io; |
1326 | 1327 | ||
@@ -1393,12 +1394,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1393 | INIT_LIST_HEAD(&p->thread_group); | 1394 | INIT_LIST_HEAD(&p->thread_group); |
1394 | p->task_works = NULL; | 1395 | p->task_works = NULL; |
1395 | 1396 | ||
1396 | /* Now that the task is set up, run cgroup callbacks if | ||
1397 | * necessary. We need to run them before the task is visible | ||
1398 | * on the tasklist. */ | ||
1399 | cgroup_fork_callbacks(p); | ||
1400 | cgroup_callbacks_done = 1; | ||
1401 | |||
1402 | /* Need tasklist lock for parent etc handling! */ | 1397 | /* Need tasklist lock for parent etc handling! */ |
1403 | write_lock_irq(&tasklist_lock); | 1398 | write_lock_irq(&tasklist_lock); |
1404 | 1399 | ||
@@ -1441,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1441 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); | 1436 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
1442 | 1437 | ||
1443 | if (thread_group_leader(p)) { | 1438 | if (thread_group_leader(p)) { |
1444 | if (is_child_reaper(pid)) | 1439 | if (is_child_reaper(pid)) { |
1445 | p->nsproxy->pid_ns->child_reaper = p; | 1440 | ns_of_pid(pid)->child_reaper = p; |
1441 | p->signal->flags |= SIGNAL_UNKILLABLE; | ||
1442 | } | ||
1446 | 1443 | ||
1447 | p->signal->leader_pid = pid; | 1444 | p->signal->leader_pid = pid; |
1448 | p->signal->tty = tty_kref_get(current->signal->tty); | 1445 | p->signal->tty = tty_kref_get(current->signal->tty); |
@@ -1476,8 +1473,6 @@ bad_fork_cleanup_io: | |||
1476 | if (p->io_context) | 1473 | if (p->io_context) |
1477 | exit_io_context(p); | 1474 | exit_io_context(p); |
1478 | bad_fork_cleanup_namespaces: | 1475 | bad_fork_cleanup_namespaces: |
1479 | if (unlikely(clone_flags & CLONE_NEWPID)) | ||
1480 | pid_ns_release_proc(p->nsproxy->pid_ns); | ||
1481 | exit_task_namespaces(p); | 1476 | exit_task_namespaces(p); |
1482 | bad_fork_cleanup_mm: | 1477 | bad_fork_cleanup_mm: |
1483 | if (p->mm) | 1478 | if (p->mm) |
@@ -1503,7 +1498,7 @@ bad_fork_cleanup_cgroup: | |||
1503 | #endif | 1498 | #endif |
1504 | if (clone_flags & CLONE_THREAD) | 1499 | if (clone_flags & CLONE_THREAD) |
1505 | threadgroup_change_end(current); | 1500 | threadgroup_change_end(current); |
1506 | cgroup_exit(p, cgroup_callbacks_done); | 1501 | cgroup_exit(p, 0); |
1507 | delayacct_tsk_free(p); | 1502 | delayacct_tsk_free(p); |
1508 | module_put(task_thread_info(p)->exec_domain->module); | 1503 | module_put(task_thread_info(p)->exec_domain->module); |
1509 | bad_fork_cleanup_count: | 1504 | bad_fork_cleanup_count: |
@@ -1515,12 +1510,6 @@ fork_out: | |||
1515 | return ERR_PTR(retval); | 1510 | return ERR_PTR(retval); |
1516 | } | 1511 | } |
1517 | 1512 | ||
1518 | noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs) | ||
1519 | { | ||
1520 | memset(regs, 0, sizeof(struct pt_regs)); | ||
1521 | return regs; | ||
1522 | } | ||
1523 | |||
1524 | static inline void init_idle_pids(struct pid_link *links) | 1513 | static inline void init_idle_pids(struct pid_link *links) |
1525 | { | 1514 | { |
1526 | enum pid_type type; | 1515 | enum pid_type type; |
@@ -1534,10 +1523,7 @@ static inline void init_idle_pids(struct pid_link *links) | |||
1534 | struct task_struct * __cpuinit fork_idle(int cpu) | 1523 | struct task_struct * __cpuinit fork_idle(int cpu) |
1535 | { | 1524 | { |
1536 | struct task_struct *task; | 1525 | struct task_struct *task; |
1537 | struct pt_regs regs; | 1526 | task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); |
1538 | |||
1539 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, | ||
1540 | &init_struct_pid, 0); | ||
1541 | if (!IS_ERR(task)) { | 1527 | if (!IS_ERR(task)) { |
1542 | init_idle_pids(task->pids); | 1528 | init_idle_pids(task->pids); |
1543 | init_idle(task, cpu); | 1529 | init_idle(task, cpu); |
@@ -1554,7 +1540,6 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
1554 | */ | 1540 | */ |
1555 | long do_fork(unsigned long clone_flags, | 1541 | long do_fork(unsigned long clone_flags, |
1556 | unsigned long stack_start, | 1542 | unsigned long stack_start, |
1557 | struct pt_regs *regs, | ||
1558 | unsigned long stack_size, | 1543 | unsigned long stack_size, |
1559 | int __user *parent_tidptr, | 1544 | int __user *parent_tidptr, |
1560 | int __user *child_tidptr) | 1545 | int __user *child_tidptr) |
@@ -1567,15 +1552,9 @@ long do_fork(unsigned long clone_flags, | |||
1567 | * Do some preliminary argument and permissions checking before we | 1552 | * Do some preliminary argument and permissions checking before we |
1568 | * actually start allocating stuff | 1553 | * actually start allocating stuff |
1569 | */ | 1554 | */ |
1570 | if (clone_flags & CLONE_NEWUSER) { | 1555 | if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { |
1571 | if (clone_flags & CLONE_THREAD) | 1556 | if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) |
1572 | return -EINVAL; | 1557 | return -EINVAL; |
1573 | /* hopefully this check will go away when userns support is | ||
1574 | * complete | ||
1575 | */ | ||
1576 | if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || | ||
1577 | !capable(CAP_SETGID)) | ||
1578 | return -EPERM; | ||
1579 | } | 1558 | } |
1580 | 1559 | ||
1581 | /* | 1560 | /* |
@@ -1584,7 +1563,7 @@ long do_fork(unsigned long clone_flags, | |||
1584 | * requested, no event is reported; otherwise, report if the event | 1563 | * requested, no event is reported; otherwise, report if the event |
1585 | * for the type of forking is enabled. | 1564 | * for the type of forking is enabled. |
1586 | */ | 1565 | */ |
1587 | if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) { | 1566 | if (!(clone_flags & CLONE_UNTRACED)) { |
1588 | if (clone_flags & CLONE_VFORK) | 1567 | if (clone_flags & CLONE_VFORK) |
1589 | trace = PTRACE_EVENT_VFORK; | 1568 | trace = PTRACE_EVENT_VFORK; |
1590 | else if ((clone_flags & CSIGNAL) != SIGCHLD) | 1569 | else if ((clone_flags & CSIGNAL) != SIGCHLD) |
@@ -1596,7 +1575,7 @@ long do_fork(unsigned long clone_flags, | |||
1596 | trace = 0; | 1575 | trace = 0; |
1597 | } | 1576 | } |
1598 | 1577 | ||
1599 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1578 | p = copy_process(clone_flags, stack_start, stack_size, |
1600 | child_tidptr, NULL, trace); | 1579 | child_tidptr, NULL, trace); |
1601 | /* | 1580 | /* |
1602 | * Do this prior waking up the new thread - the thread pointer | 1581 | * Do this prior waking up the new thread - the thread pointer |
@@ -1640,11 +1619,54 @@ long do_fork(unsigned long clone_flags, | |||
1640 | */ | 1619 | */ |
1641 | pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | 1620 | pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) |
1642 | { | 1621 | { |
1643 | return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL, | 1622 | return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, |
1644 | (unsigned long)arg, NULL, NULL); | 1623 | (unsigned long)arg, NULL, NULL); |
1645 | } | 1624 | } |
1646 | #endif | 1625 | #endif |
1647 | 1626 | ||
1627 | #ifdef __ARCH_WANT_SYS_FORK | ||
1628 | SYSCALL_DEFINE0(fork) | ||
1629 | { | ||
1630 | #ifdef CONFIG_MMU | ||
1631 | return do_fork(SIGCHLD, 0, 0, NULL, NULL); | ||
1632 | #else | ||
1633 | /* can not support in nommu mode */ | ||
1634 | return(-EINVAL); | ||
1635 | #endif | ||
1636 | } | ||
1637 | #endif | ||
1638 | |||
1639 | #ifdef __ARCH_WANT_SYS_VFORK | ||
1640 | SYSCALL_DEFINE0(vfork) | ||
1641 | { | ||
1642 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, | ||
1643 | 0, NULL, NULL); | ||
1644 | } | ||
1645 | #endif | ||
1646 | |||
1647 | #ifdef __ARCH_WANT_SYS_CLONE | ||
1648 | #ifdef CONFIG_CLONE_BACKWARDS | ||
1649 | SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, | ||
1650 | int __user *, parent_tidptr, | ||
1651 | int, tls_val, | ||
1652 | int __user *, child_tidptr) | ||
1653 | #elif defined(CONFIG_CLONE_BACKWARDS2) | ||
1654 | SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, | ||
1655 | int __user *, parent_tidptr, | ||
1656 | int __user *, child_tidptr, | ||
1657 | int, tls_val) | ||
1658 | #else | ||
1659 | SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, | ||
1660 | int __user *, parent_tidptr, | ||
1661 | int __user *, child_tidptr, | ||
1662 | int, tls_val) | ||
1663 | #endif | ||
1664 | { | ||
1665 | return do_fork(clone_flags, newsp, 0, | ||
1666 | parent_tidptr, child_tidptr); | ||
1667 | } | ||
1668 | #endif | ||
1669 | |||
1648 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN | 1670 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN |
1649 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1671 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
1650 | #endif | 1672 | #endif |
@@ -1694,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags) | |||
1694 | { | 1716 | { |
1695 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1717 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1696 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1718 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1697 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | 1719 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| |
1720 | CLONE_NEWUSER|CLONE_NEWPID)) | ||
1698 | return -EINVAL; | 1721 | return -EINVAL; |
1699 | /* | 1722 | /* |
1700 | * Not implemented, but pretend it works if there is nothing to | 1723 | * Not implemented, but pretend it works if there is nothing to |
@@ -1761,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1761 | { | 1784 | { |
1762 | struct fs_struct *fs, *new_fs = NULL; | 1785 | struct fs_struct *fs, *new_fs = NULL; |
1763 | struct files_struct *fd, *new_fd = NULL; | 1786 | struct files_struct *fd, *new_fd = NULL; |
1787 | struct cred *new_cred = NULL; | ||
1764 | struct nsproxy *new_nsproxy = NULL; | 1788 | struct nsproxy *new_nsproxy = NULL; |
1765 | int do_sysvsem = 0; | 1789 | int do_sysvsem = 0; |
1766 | int err; | 1790 | int err; |
1767 | 1791 | ||
1768 | err = check_unshare_flags(unshare_flags); | 1792 | /* |
1769 | if (err) | 1793 | * If unsharing a user namespace must also unshare the thread. |
1770 | goto bad_unshare_out; | 1794 | */ |
1771 | 1795 | if (unshare_flags & CLONE_NEWUSER) | |
1796 | unshare_flags |= CLONE_THREAD; | ||
1797 | /* | ||
1798 | * If unsharing a pid namespace must also unshare the thread. | ||
1799 | */ | ||
1800 | if (unshare_flags & CLONE_NEWPID) | ||
1801 | unshare_flags |= CLONE_THREAD; | ||
1802 | /* | ||
1803 | * If unsharing a thread from a thread group, must also unshare vm. | ||
1804 | */ | ||
1805 | if (unshare_flags & CLONE_THREAD) | ||
1806 | unshare_flags |= CLONE_VM; | ||
1807 | /* | ||
1808 | * If unsharing vm, must also unshare signal handlers. | ||
1809 | */ | ||
1810 | if (unshare_flags & CLONE_VM) | ||
1811 | unshare_flags |= CLONE_SIGHAND; | ||
1772 | /* | 1812 | /* |
1773 | * If unsharing namespace, must also unshare filesystem information. | 1813 | * If unsharing namespace, must also unshare filesystem information. |
1774 | */ | 1814 | */ |
1775 | if (unshare_flags & CLONE_NEWNS) | 1815 | if (unshare_flags & CLONE_NEWNS) |
1776 | unshare_flags |= CLONE_FS; | 1816 | unshare_flags |= CLONE_FS; |
1817 | |||
1818 | err = check_unshare_flags(unshare_flags); | ||
1819 | if (err) | ||
1820 | goto bad_unshare_out; | ||
1777 | /* | 1821 | /* |
1778 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1822 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1779 | * to a new ipc namespace, the semaphore arrays from the old | 1823 | * to a new ipc namespace, the semaphore arrays from the old |
@@ -1787,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1787 | err = unshare_fd(unshare_flags, &new_fd); | 1831 | err = unshare_fd(unshare_flags, &new_fd); |
1788 | if (err) | 1832 | if (err) |
1789 | goto bad_unshare_cleanup_fs; | 1833 | goto bad_unshare_cleanup_fs; |
1790 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); | 1834 | err = unshare_userns(unshare_flags, &new_cred); |
1791 | if (err) | 1835 | if (err) |
1792 | goto bad_unshare_cleanup_fd; | 1836 | goto bad_unshare_cleanup_fd; |
1837 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | ||
1838 | new_cred, new_fs); | ||
1839 | if (err) | ||
1840 | goto bad_unshare_cleanup_cred; | ||
1793 | 1841 | ||
1794 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { | 1842 | if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { |
1795 | if (do_sysvsem) { | 1843 | if (do_sysvsem) { |
1796 | /* | 1844 | /* |
1797 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1845 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1824,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1824 | } | 1872 | } |
1825 | 1873 | ||
1826 | task_unlock(current); | 1874 | task_unlock(current); |
1875 | |||
1876 | if (new_cred) { | ||
1877 | /* Install the new user namespace */ | ||
1878 | commit_creds(new_cred); | ||
1879 | new_cred = NULL; | ||
1880 | } | ||
1827 | } | 1881 | } |
1828 | 1882 | ||
1829 | if (new_nsproxy) | 1883 | if (new_nsproxy) |
1830 | put_nsproxy(new_nsproxy); | 1884 | put_nsproxy(new_nsproxy); |
1831 | 1885 | ||
1886 | bad_unshare_cleanup_cred: | ||
1887 | if (new_cred) | ||
1888 | put_cred(new_cred); | ||
1832 | bad_unshare_cleanup_fd: | 1889 | bad_unshare_cleanup_fd: |
1833 | if (new_fd) | 1890 | if (new_fd) |
1834 | put_files_struct(new_fd); | 1891 | put_files_struct(new_fd); |