diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 143 |
1 files changed, 38 insertions, 105 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 05b92c457010..f2b494d7c557 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/tracehook.h> | 40 | #include <linux/tracehook.h> |
| 41 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
| 42 | #include <linux/compat.h> | 42 | #include <linux/compat.h> |
| 43 | #include <linux/kthread.h> | ||
| 43 | #include <linux/task_io_accounting_ops.h> | 44 | #include <linux/task_io_accounting_ops.h> |
| 44 | #include <linux/rcupdate.h> | 45 | #include <linux/rcupdate.h> |
| 45 | #include <linux/ptrace.h> | 46 | #include <linux/ptrace.h> |
| @@ -109,20 +110,25 @@ int nr_processes(void) | |||
| 109 | } | 110 | } |
| 110 | 111 | ||
| 111 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 112 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
| 112 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) | 113 | # define alloc_task_struct_node(node) \ |
| 113 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) | 114 | kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) |
| 115 | # define free_task_struct(tsk) \ | ||
| 116 | kmem_cache_free(task_struct_cachep, (tsk)) | ||
| 114 | static struct kmem_cache *task_struct_cachep; | 117 | static struct kmem_cache *task_struct_cachep; |
| 115 | #endif | 118 | #endif |
| 116 | 119 | ||
| 117 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 120 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
| 118 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | 121 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
| 122 | int node) | ||
| 119 | { | 123 | { |
| 120 | #ifdef CONFIG_DEBUG_STACK_USAGE | 124 | #ifdef CONFIG_DEBUG_STACK_USAGE |
| 121 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | 125 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; |
| 122 | #else | 126 | #else |
| 123 | gfp_t mask = GFP_KERNEL; | 127 | gfp_t mask = GFP_KERNEL; |
| 124 | #endif | 128 | #endif |
| 125 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | 129 | struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); |
| 130 | |||
| 131 | return page ? page_address(page) : NULL; | ||
| 126 | } | 132 | } |
| 127 | 133 | ||
| 128 | static inline void free_thread_info(struct thread_info *ti) | 134 | static inline void free_thread_info(struct thread_info *ti) |
| @@ -249,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 249 | struct task_struct *tsk; | 255 | struct task_struct *tsk; |
| 250 | struct thread_info *ti; | 256 | struct thread_info *ti; |
| 251 | unsigned long *stackend; | 257 | unsigned long *stackend; |
| 252 | 258 | int node = tsk_fork_get_node(orig); | |
| 253 | int err; | 259 | int err; |
| 254 | 260 | ||
| 255 | prepare_to_copy(orig); | 261 | prepare_to_copy(orig); |
| 256 | 262 | ||
| 257 | tsk = alloc_task_struct(); | 263 | tsk = alloc_task_struct_node(node); |
| 258 | if (!tsk) | 264 | if (!tsk) |
| 259 | return NULL; | 265 | return NULL; |
| 260 | 266 | ||
| 261 | ti = alloc_thread_info(tsk); | 267 | ti = alloc_thread_info_node(tsk, node); |
| 262 | if (!ti) { | 268 | if (!ti) { |
| 263 | free_task_struct(tsk); | 269 | free_task_struct(tsk); |
| 264 | return NULL; | 270 | return NULL; |
| @@ -1513,38 +1519,24 @@ void __init proc_caches_init(void) | |||
| 1513 | } | 1519 | } |
| 1514 | 1520 | ||
| 1515 | /* | 1521 | /* |
| 1516 | * Check constraints on flags passed to the unshare system call and | 1522 | * Check constraints on flags passed to the unshare system call. |
| 1517 | * force unsharing of additional process context as appropriate. | ||
| 1518 | */ | 1523 | */ |
| 1519 | static void check_unshare_flags(unsigned long *flags_ptr) | 1524 | static int check_unshare_flags(unsigned long unshare_flags) |
| 1520 | { | 1525 | { |
| 1526 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
| 1527 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
| 1528 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
| 1529 | return -EINVAL; | ||
| 1521 | /* | 1530 | /* |
| 1522 | * If unsharing a thread from a thread group, must also | 1531 | * Not implemented, but pretend it works if there is nothing to |
| 1523 | * unshare vm. | 1532 | * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND |
| 1524 | */ | 1533 | * needs to unshare vm. |
| 1525 | if (*flags_ptr & CLONE_THREAD) | ||
| 1526 | *flags_ptr |= CLONE_VM; | ||
| 1527 | |||
| 1528 | /* | ||
| 1529 | * If unsharing vm, must also unshare signal handlers. | ||
| 1530 | */ | ||
| 1531 | if (*flags_ptr & CLONE_VM) | ||
| 1532 | *flags_ptr |= CLONE_SIGHAND; | ||
| 1533 | |||
| 1534 | /* | ||
| 1535 | * If unsharing namespace, must also unshare filesystem information. | ||
| 1536 | */ | 1534 | */ |
| 1537 | if (*flags_ptr & CLONE_NEWNS) | 1535 | if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { |
| 1538 | *flags_ptr |= CLONE_FS; | 1536 | /* FIXME: get_task_mm() increments ->mm_users */ |
| 1539 | } | 1537 | if (atomic_read(¤t->mm->mm_users) > 1) |
| 1540 | 1538 | return -EINVAL; | |
| 1541 | /* | 1539 | } |
| 1542 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
| 1543 | */ | ||
| 1544 | static int unshare_thread(unsigned long unshare_flags) | ||
| 1545 | { | ||
| 1546 | if (unshare_flags & CLONE_THREAD) | ||
| 1547 | return -EINVAL; | ||
| 1548 | 1540 | ||
| 1549 | return 0; | 1541 | return 0; |
| 1550 | } | 1542 | } |
| @@ -1571,34 +1563,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
| 1571 | } | 1563 | } |
| 1572 | 1564 | ||
| 1573 | /* | 1565 | /* |
| 1574 | * Unsharing of sighand is not supported yet | ||
| 1575 | */ | ||
| 1576 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
| 1577 | { | ||
| 1578 | struct sighand_struct *sigh = current->sighand; | ||
| 1579 | |||
| 1580 | if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1) | ||
| 1581 | return -EINVAL; | ||
| 1582 | else | ||
| 1583 | return 0; | ||
| 1584 | } | ||
| 1585 | |||
| 1586 | /* | ||
| 1587 | * Unshare vm if it is being shared | ||
| 1588 | */ | ||
| 1589 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
| 1590 | { | ||
| 1591 | struct mm_struct *mm = current->mm; | ||
| 1592 | |||
| 1593 | if ((unshare_flags & CLONE_VM) && | ||
| 1594 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
| 1595 | return -EINVAL; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | return 0; | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | /* | ||
| 1602 | * Unshare file descriptor table if it is being shared | 1566 | * Unshare file descriptor table if it is being shared |
| 1603 | */ | 1567 | */ |
| 1604 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | 1568 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) |
| @@ -1626,45 +1590,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp | |||
| 1626 | */ | 1590 | */ |
| 1627 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | 1591 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
| 1628 | { | 1592 | { |
| 1629 | int err = 0; | ||
| 1630 | struct fs_struct *fs, *new_fs = NULL; | 1593 | struct fs_struct *fs, *new_fs = NULL; |
| 1631 | struct sighand_struct *new_sigh = NULL; | ||
| 1632 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
| 1633 | struct files_struct *fd, *new_fd = NULL; | 1594 | struct files_struct *fd, *new_fd = NULL; |
| 1634 | struct nsproxy *new_nsproxy = NULL; | 1595 | struct nsproxy *new_nsproxy = NULL; |
| 1635 | int do_sysvsem = 0; | 1596 | int do_sysvsem = 0; |
| 1597 | int err; | ||
| 1636 | 1598 | ||
| 1637 | check_unshare_flags(&unshare_flags); | 1599 | err = check_unshare_flags(unshare_flags); |
| 1638 | 1600 | if (err) | |
| 1639 | /* Return -EINVAL for all unsupported flags */ | ||
| 1640 | err = -EINVAL; | ||
| 1641 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
| 1642 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
| 1643 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
| 1644 | goto bad_unshare_out; | 1601 | goto bad_unshare_out; |
| 1645 | 1602 | ||
| 1646 | /* | 1603 | /* |
| 1604 | * If unsharing namespace, must also unshare filesystem information. | ||
| 1605 | */ | ||
| 1606 | if (unshare_flags & CLONE_NEWNS) | ||
| 1607 | unshare_flags |= CLONE_FS; | ||
| 1608 | /* | ||
| 1647 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1609 | * CLONE_NEWIPC must also detach from the undolist: after switching |
| 1648 | * to a new ipc namespace, the semaphore arrays from the old | 1610 | * to a new ipc namespace, the semaphore arrays from the old |
| 1649 | * namespace are unreachable. | 1611 | * namespace are unreachable. |
| 1650 | */ | 1612 | */ |
| 1651 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | 1613 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
| 1652 | do_sysvsem = 1; | 1614 | do_sysvsem = 1; |
| 1653 | if ((err = unshare_thread(unshare_flags))) | ||
| 1654 | goto bad_unshare_out; | ||
| 1655 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1615 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
| 1656 | goto bad_unshare_cleanup_thread; | 1616 | goto bad_unshare_out; |
| 1657 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
| 1658 | goto bad_unshare_cleanup_fs; | ||
| 1659 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
| 1660 | goto bad_unshare_cleanup_sigh; | ||
| 1661 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1617 | if ((err = unshare_fd(unshare_flags, &new_fd))) |
| 1662 | goto bad_unshare_cleanup_vm; | 1618 | goto bad_unshare_cleanup_fs; |
| 1663 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1619 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, |
| 1664 | new_fs))) | 1620 | new_fs))) |
| 1665 | goto bad_unshare_cleanup_fd; | 1621 | goto bad_unshare_cleanup_fd; |
| 1666 | 1622 | ||
| 1667 | if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { | 1623 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
| 1668 | if (do_sysvsem) { | 1624 | if (do_sysvsem) { |
| 1669 | /* | 1625 | /* |
| 1670 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1626 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
| @@ -1690,19 +1646,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
| 1690 | spin_unlock(&fs->lock); | 1646 | spin_unlock(&fs->lock); |
| 1691 | } | 1647 | } |
| 1692 | 1648 | ||
| 1693 | if (new_mm) { | ||
| 1694 | mm = current->mm; | ||
| 1695 | active_mm = current->active_mm; | ||
| 1696 | current->mm = new_mm; | ||
| 1697 | current->active_mm = new_mm; | ||
| 1698 | if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
| 1699 | atomic_dec(&mm->oom_disable_count); | ||
| 1700 | atomic_inc(&new_mm->oom_disable_count); | ||
| 1701 | } | ||
| 1702 | activate_mm(active_mm, new_mm); | ||
| 1703 | new_mm = mm; | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | if (new_fd) { | 1649 | if (new_fd) { |
| 1707 | fd = current->files; | 1650 | fd = current->files; |
| 1708 | current->files = new_fd; | 1651 | current->files = new_fd; |
| @@ -1719,20 +1662,10 @@ bad_unshare_cleanup_fd: | |||
| 1719 | if (new_fd) | 1662 | if (new_fd) |
| 1720 | put_files_struct(new_fd); | 1663 | put_files_struct(new_fd); |
| 1721 | 1664 | ||
| 1722 | bad_unshare_cleanup_vm: | ||
| 1723 | if (new_mm) | ||
| 1724 | mmput(new_mm); | ||
| 1725 | |||
| 1726 | bad_unshare_cleanup_sigh: | ||
| 1727 | if (new_sigh) | ||
| 1728 | if (atomic_dec_and_test(&new_sigh->count)) | ||
| 1729 | kmem_cache_free(sighand_cachep, new_sigh); | ||
| 1730 | |||
| 1731 | bad_unshare_cleanup_fs: | 1665 | bad_unshare_cleanup_fs: |
| 1732 | if (new_fs) | 1666 | if (new_fs) |
| 1733 | free_fs_struct(new_fs); | 1667 | free_fs_struct(new_fs); |
| 1734 | 1668 | ||
| 1735 | bad_unshare_cleanup_thread: | ||
| 1736 | bad_unshare_out: | 1669 | bad_unshare_out: |
| 1737 | return err; | 1670 | return err; |
| 1738 | } | 1671 | } |
