diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 154 |
1 files changed, 42 insertions, 112 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 05b92c457010..e7548dee636b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/tracehook.h> | 40 | #include <linux/tracehook.h> |
41 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
42 | #include <linux/compat.h> | 42 | #include <linux/compat.h> |
43 | #include <linux/kthread.h> | ||
43 | #include <linux/task_io_accounting_ops.h> | 44 | #include <linux/task_io_accounting_ops.h> |
44 | #include <linux/rcupdate.h> | 45 | #include <linux/rcupdate.h> |
45 | #include <linux/ptrace.h> | 46 | #include <linux/ptrace.h> |
@@ -109,20 +110,25 @@ int nr_processes(void) | |||
109 | } | 110 | } |
110 | 111 | ||
111 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 112 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
112 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) | 113 | # define alloc_task_struct_node(node) \ |
113 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) | 114 | kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) |
115 | # define free_task_struct(tsk) \ | ||
116 | kmem_cache_free(task_struct_cachep, (tsk)) | ||
114 | static struct kmem_cache *task_struct_cachep; | 117 | static struct kmem_cache *task_struct_cachep; |
115 | #endif | 118 | #endif |
116 | 119 | ||
117 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 120 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
118 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | 121 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
122 | int node) | ||
119 | { | 123 | { |
120 | #ifdef CONFIG_DEBUG_STACK_USAGE | 124 | #ifdef CONFIG_DEBUG_STACK_USAGE |
121 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | 125 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; |
122 | #else | 126 | #else |
123 | gfp_t mask = GFP_KERNEL; | 127 | gfp_t mask = GFP_KERNEL; |
124 | #endif | 128 | #endif |
125 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | 129 | struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); |
130 | |||
131 | return page ? page_address(page) : NULL; | ||
126 | } | 132 | } |
127 | 133 | ||
128 | static inline void free_thread_info(struct thread_info *ti) | 134 | static inline void free_thread_info(struct thread_info *ti) |
@@ -249,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
249 | struct task_struct *tsk; | 255 | struct task_struct *tsk; |
250 | struct thread_info *ti; | 256 | struct thread_info *ti; |
251 | unsigned long *stackend; | 257 | unsigned long *stackend; |
252 | 258 | int node = tsk_fork_get_node(orig); | |
253 | int err; | 259 | int err; |
254 | 260 | ||
255 | prepare_to_copy(orig); | 261 | prepare_to_copy(orig); |
256 | 262 | ||
257 | tsk = alloc_task_struct(); | 263 | tsk = alloc_task_struct_node(node); |
258 | if (!tsk) | 264 | if (!tsk) |
259 | return NULL; | 265 | return NULL; |
260 | 266 | ||
261 | ti = alloc_thread_info(tsk); | 267 | ti = alloc_thread_info_node(tsk, node); |
262 | if (!ti) { | 268 | if (!ti) { |
263 | free_task_struct(tsk); | 269 | free_task_struct(tsk); |
264 | return NULL; | 270 | return NULL; |
@@ -1181,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1181 | pid = alloc_pid(p->nsproxy->pid_ns); | 1187 | pid = alloc_pid(p->nsproxy->pid_ns); |
1182 | if (!pid) | 1188 | if (!pid) |
1183 | goto bad_fork_cleanup_io; | 1189 | goto bad_fork_cleanup_io; |
1184 | |||
1185 | if (clone_flags & CLONE_NEWPID) { | ||
1186 | retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); | ||
1187 | if (retval < 0) | ||
1188 | goto bad_fork_free_pid; | ||
1189 | } | ||
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | p->pid = pid_nr(pid); | 1192 | p->pid = pid_nr(pid); |
@@ -1205,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1205 | * Clear TID on mm_release()? | 1205 | * Clear TID on mm_release()? |
1206 | */ | 1206 | */ |
1207 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1207 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
1208 | #ifdef CONFIG_BLOCK | ||
1209 | p->plug = NULL; | ||
1210 | #endif | ||
1208 | #ifdef CONFIG_FUTEX | 1211 | #ifdef CONFIG_FUTEX |
1209 | p->robust_list = NULL; | 1212 | p->robust_list = NULL; |
1210 | #ifdef CONFIG_COMPAT | 1213 | #ifdef CONFIG_COMPAT |
@@ -1290,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1290 | tracehook_finish_clone(p, clone_flags, trace); | 1293 | tracehook_finish_clone(p, clone_flags, trace); |
1291 | 1294 | ||
1292 | if (thread_group_leader(p)) { | 1295 | if (thread_group_leader(p)) { |
1293 | if (clone_flags & CLONE_NEWPID) | 1296 | if (is_child_reaper(pid)) |
1294 | p->nsproxy->pid_ns->child_reaper = p; | 1297 | p->nsproxy->pid_ns->child_reaper = p; |
1295 | 1298 | ||
1296 | p->signal->leader_pid = pid; | 1299 | p->signal->leader_pid = pid; |
@@ -1513,38 +1516,24 @@ void __init proc_caches_init(void) | |||
1513 | } | 1516 | } |
1514 | 1517 | ||
1515 | /* | 1518 | /* |
1516 | * Check constraints on flags passed to the unshare system call and | 1519 | * Check constraints on flags passed to the unshare system call. |
1517 | * force unsharing of additional process context as appropriate. | ||
1518 | */ | 1520 | */ |
1519 | static void check_unshare_flags(unsigned long *flags_ptr) | 1521 | static int check_unshare_flags(unsigned long unshare_flags) |
1520 | { | 1522 | { |
1523 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1524 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
1525 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
1526 | return -EINVAL; | ||
1521 | /* | 1527 | /* |
1522 | * If unsharing a thread from a thread group, must also | 1528 | * Not implemented, but pretend it works if there is nothing to |
1523 | * unshare vm. | 1529 | * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND |
1524 | */ | 1530 | * needs to unshare vm. |
1525 | if (*flags_ptr & CLONE_THREAD) | ||
1526 | *flags_ptr |= CLONE_VM; | ||
1527 | |||
1528 | /* | ||
1529 | * If unsharing vm, must also unshare signal handlers. | ||
1530 | */ | ||
1531 | if (*flags_ptr & CLONE_VM) | ||
1532 | *flags_ptr |= CLONE_SIGHAND; | ||
1533 | |||
1534 | /* | ||
1535 | * If unsharing namespace, must also unshare filesystem information. | ||
1536 | */ | 1531 | */ |
1537 | if (*flags_ptr & CLONE_NEWNS) | 1532 | if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { |
1538 | *flags_ptr |= CLONE_FS; | 1533 | /* FIXME: get_task_mm() increments ->mm_users */ |
1539 | } | 1534 | if (atomic_read(¤t->mm->mm_users) > 1) |
1540 | 1535 | return -EINVAL; | |
1541 | /* | 1536 | } |
1542 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
1543 | */ | ||
1544 | static int unshare_thread(unsigned long unshare_flags) | ||
1545 | { | ||
1546 | if (unshare_flags & CLONE_THREAD) | ||
1547 | return -EINVAL; | ||
1548 | 1537 | ||
1549 | return 0; | 1538 | return 0; |
1550 | } | 1539 | } |
@@ -1571,34 +1560,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
1571 | } | 1560 | } |
1572 | 1561 | ||
1573 | /* | 1562 | /* |
1574 | * Unsharing of sighand is not supported yet | ||
1575 | */ | ||
1576 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
1577 | { | ||
1578 | struct sighand_struct *sigh = current->sighand; | ||
1579 | |||
1580 | if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1) | ||
1581 | return -EINVAL; | ||
1582 | else | ||
1583 | return 0; | ||
1584 | } | ||
1585 | |||
1586 | /* | ||
1587 | * Unshare vm if it is being shared | ||
1588 | */ | ||
1589 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
1590 | { | ||
1591 | struct mm_struct *mm = current->mm; | ||
1592 | |||
1593 | if ((unshare_flags & CLONE_VM) && | ||
1594 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
1595 | return -EINVAL; | ||
1596 | } | ||
1597 | |||
1598 | return 0; | ||
1599 | } | ||
1600 | |||
1601 | /* | ||
1602 | * Unshare file descriptor table if it is being shared | 1563 | * Unshare file descriptor table if it is being shared |
1603 | */ | 1564 | */ |
1604 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | 1565 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) |
@@ -1626,45 +1587,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp | |||
1626 | */ | 1587 | */ |
1627 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | 1588 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
1628 | { | 1589 | { |
1629 | int err = 0; | ||
1630 | struct fs_struct *fs, *new_fs = NULL; | 1590 | struct fs_struct *fs, *new_fs = NULL; |
1631 | struct sighand_struct *new_sigh = NULL; | ||
1632 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
1633 | struct files_struct *fd, *new_fd = NULL; | 1591 | struct files_struct *fd, *new_fd = NULL; |
1634 | struct nsproxy *new_nsproxy = NULL; | 1592 | struct nsproxy *new_nsproxy = NULL; |
1635 | int do_sysvsem = 0; | 1593 | int do_sysvsem = 0; |
1594 | int err; | ||
1636 | 1595 | ||
1637 | check_unshare_flags(&unshare_flags); | 1596 | err = check_unshare_flags(unshare_flags); |
1638 | 1597 | if (err) | |
1639 | /* Return -EINVAL for all unsupported flags */ | ||
1640 | err = -EINVAL; | ||
1641 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1642 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
1643 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
1644 | goto bad_unshare_out; | 1598 | goto bad_unshare_out; |
1645 | 1599 | ||
1646 | /* | 1600 | /* |
1601 | * If unsharing namespace, must also unshare filesystem information. | ||
1602 | */ | ||
1603 | if (unshare_flags & CLONE_NEWNS) | ||
1604 | unshare_flags |= CLONE_FS; | ||
1605 | /* | ||
1647 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1606 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1648 | * to a new ipc namespace, the semaphore arrays from the old | 1607 | * to a new ipc namespace, the semaphore arrays from the old |
1649 | * namespace are unreachable. | 1608 | * namespace are unreachable. |
1650 | */ | 1609 | */ |
1651 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | 1610 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
1652 | do_sysvsem = 1; | 1611 | do_sysvsem = 1; |
1653 | if ((err = unshare_thread(unshare_flags))) | ||
1654 | goto bad_unshare_out; | ||
1655 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1612 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
1656 | goto bad_unshare_cleanup_thread; | 1613 | goto bad_unshare_out; |
1657 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
1658 | goto bad_unshare_cleanup_fs; | ||
1659 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
1660 | goto bad_unshare_cleanup_sigh; | ||
1661 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1614 | if ((err = unshare_fd(unshare_flags, &new_fd))) |
1662 | goto bad_unshare_cleanup_vm; | 1615 | goto bad_unshare_cleanup_fs; |
1663 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1616 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, |
1664 | new_fs))) | 1617 | new_fs))) |
1665 | goto bad_unshare_cleanup_fd; | 1618 | goto bad_unshare_cleanup_fd; |
1666 | 1619 | ||
1667 | if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { | 1620 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
1668 | if (do_sysvsem) { | 1621 | if (do_sysvsem) { |
1669 | /* | 1622 | /* |
1670 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1623 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1690,19 +1643,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1690 | spin_unlock(&fs->lock); | 1643 | spin_unlock(&fs->lock); |
1691 | } | 1644 | } |
1692 | 1645 | ||
1693 | if (new_mm) { | ||
1694 | mm = current->mm; | ||
1695 | active_mm = current->active_mm; | ||
1696 | current->mm = new_mm; | ||
1697 | current->active_mm = new_mm; | ||
1698 | if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
1699 | atomic_dec(&mm->oom_disable_count); | ||
1700 | atomic_inc(&new_mm->oom_disable_count); | ||
1701 | } | ||
1702 | activate_mm(active_mm, new_mm); | ||
1703 | new_mm = mm; | ||
1704 | } | ||
1705 | |||
1706 | if (new_fd) { | 1646 | if (new_fd) { |
1707 | fd = current->files; | 1647 | fd = current->files; |
1708 | current->files = new_fd; | 1648 | current->files = new_fd; |
@@ -1719,20 +1659,10 @@ bad_unshare_cleanup_fd: | |||
1719 | if (new_fd) | 1659 | if (new_fd) |
1720 | put_files_struct(new_fd); | 1660 | put_files_struct(new_fd); |
1721 | 1661 | ||
1722 | bad_unshare_cleanup_vm: | ||
1723 | if (new_mm) | ||
1724 | mmput(new_mm); | ||
1725 | |||
1726 | bad_unshare_cleanup_sigh: | ||
1727 | if (new_sigh) | ||
1728 | if (atomic_dec_and_test(&new_sigh->count)) | ||
1729 | kmem_cache_free(sighand_cachep, new_sigh); | ||
1730 | |||
1731 | bad_unshare_cleanup_fs: | 1662 | bad_unshare_cleanup_fs: |
1732 | if (new_fs) | 1663 | if (new_fs) |
1733 | free_fs_struct(new_fs); | 1664 | free_fs_struct(new_fs); |
1734 | 1665 | ||
1735 | bad_unshare_cleanup_thread: | ||
1736 | bad_unshare_out: | 1666 | bad_unshare_out: |
1737 | return err; | 1667 | return err; |
1738 | } | 1668 | } |