diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 114 |
1 files changed, 76 insertions, 38 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index b9372a0bff18..ab5211b9e622 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <linux/cgroup.h> | 34 | #include <linux/cgroup.h> |
| 35 | #include <linux/security.h> | 35 | #include <linux/security.h> |
| 36 | #include <linux/hugetlb.h> | 36 | #include <linux/hugetlb.h> |
| 37 | #include <linux/seccomp.h> | ||
| 37 | #include <linux/swap.h> | 38 | #include <linux/swap.h> |
| 38 | #include <linux/syscalls.h> | 39 | #include <linux/syscalls.h> |
| 39 | #include <linux/jiffies.h> | 40 | #include <linux/jiffies.h> |
| @@ -47,6 +48,7 @@ | |||
| 47 | #include <linux/audit.h> | 48 | #include <linux/audit.h> |
| 48 | #include <linux/memcontrol.h> | 49 | #include <linux/memcontrol.h> |
| 49 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
| 51 | #include <linux/proc_fs.h> | ||
| 50 | #include <linux/profile.h> | 52 | #include <linux/profile.h> |
| 51 | #include <linux/rmap.h> | 53 | #include <linux/rmap.h> |
| 52 | #include <linux/ksm.h> | 54 | #include <linux/ksm.h> |
| @@ -67,6 +69,7 @@ | |||
| 67 | #include <linux/oom.h> | 69 | #include <linux/oom.h> |
| 68 | #include <linux/khugepaged.h> | 70 | #include <linux/khugepaged.h> |
| 69 | #include <linux/signalfd.h> | 71 | #include <linux/signalfd.h> |
| 72 | #include <linux/uprobes.h> | ||
| 70 | 73 | ||
| 71 | #include <asm/pgtable.h> | 74 | #include <asm/pgtable.h> |
| 72 | #include <asm/pgalloc.h> | 75 | #include <asm/pgalloc.h> |
| @@ -111,32 +114,67 @@ int nr_processes(void) | |||
| 111 | return total; | 114 | return total; |
| 112 | } | 115 | } |
| 113 | 116 | ||
| 114 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 117 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
| 115 | # define alloc_task_struct_node(node) \ | ||
| 116 | kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) | ||
| 117 | # define free_task_struct(tsk) \ | ||
| 118 | kmem_cache_free(task_struct_cachep, (tsk)) | ||
| 119 | static struct kmem_cache *task_struct_cachep; | 118 | static struct kmem_cache *task_struct_cachep; |
| 119 | |||
| 120 | static inline struct task_struct *alloc_task_struct_node(int node) | ||
| 121 | { | ||
| 122 | return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); | ||
| 123 | } | ||
| 124 | |||
| 125 | void __weak arch_release_task_struct(struct task_struct *tsk) { } | ||
| 126 | |||
| 127 | static inline void free_task_struct(struct task_struct *tsk) | ||
| 128 | { | ||
| 129 | arch_release_task_struct(tsk); | ||
| 130 | kmem_cache_free(task_struct_cachep, tsk); | ||
| 131 | } | ||
| 120 | #endif | 132 | #endif |
| 121 | 133 | ||
| 122 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 134 | #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR |
| 135 | void __weak arch_release_thread_info(struct thread_info *ti) { } | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a | ||
| 139 | * kmemcache based allocator. | ||
| 140 | */ | ||
| 141 | # if THREAD_SIZE >= PAGE_SIZE | ||
| 123 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 142 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
| 124 | int node) | 143 | int node) |
| 125 | { | 144 | { |
| 126 | #ifdef CONFIG_DEBUG_STACK_USAGE | 145 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, |
| 127 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | 146 | THREAD_SIZE_ORDER); |
| 128 | #else | ||
| 129 | gfp_t mask = GFP_KERNEL; | ||
| 130 | #endif | ||
| 131 | struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); | ||
| 132 | 147 | ||
| 133 | return page ? page_address(page) : NULL; | 148 | return page ? page_address(page) : NULL; |
| 134 | } | 149 | } |
| 135 | 150 | ||
| 136 | static inline void free_thread_info(struct thread_info *ti) | 151 | static inline void free_thread_info(struct thread_info *ti) |
| 137 | { | 152 | { |
| 153 | arch_release_thread_info(ti); | ||
| 138 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | 154 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); |
| 139 | } | 155 | } |
| 156 | # else | ||
| 157 | static struct kmem_cache *thread_info_cache; | ||
| 158 | |||
| 159 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | ||
| 160 | int node) | ||
| 161 | { | ||
| 162 | return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); | ||
| 163 | } | ||
| 164 | |||
| 165 | static void free_thread_info(struct thread_info *ti) | ||
| 166 | { | ||
| 167 | arch_release_thread_info(ti); | ||
| 168 | kmem_cache_free(thread_info_cache, ti); | ||
| 169 | } | ||
| 170 | |||
| 171 | void thread_info_cache_init(void) | ||
| 172 | { | ||
| 173 | thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, | ||
| 174 | THREAD_SIZE, 0, NULL); | ||
| 175 | BUG_ON(thread_info_cache == NULL); | ||
| 176 | } | ||
| 177 | # endif | ||
| 140 | #endif | 178 | #endif |
| 141 | 179 | ||
| 142 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 180 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
| @@ -170,6 +208,7 @@ void free_task(struct task_struct *tsk) | |||
| 170 | free_thread_info(tsk->stack); | 208 | free_thread_info(tsk->stack); |
| 171 | rt_mutex_debug_task_free(tsk); | 209 | rt_mutex_debug_task_free(tsk); |
| 172 | ftrace_graph_exit_task(tsk); | 210 | ftrace_graph_exit_task(tsk); |
| 211 | put_seccomp_filter(tsk); | ||
| 173 | free_task_struct(tsk); | 212 | free_task_struct(tsk); |
| 174 | } | 213 | } |
| 175 | EXPORT_SYMBOL(free_task); | 214 | EXPORT_SYMBOL(free_task); |
| @@ -203,17 +242,11 @@ void __put_task_struct(struct task_struct *tsk) | |||
| 203 | } | 242 | } |
| 204 | EXPORT_SYMBOL_GPL(__put_task_struct); | 243 | EXPORT_SYMBOL_GPL(__put_task_struct); |
| 205 | 244 | ||
| 206 | /* | 245 | void __init __weak arch_task_cache_init(void) { } |
| 207 | * macro override instead of weak attribute alias, to workaround | ||
| 208 | * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. | ||
| 209 | */ | ||
| 210 | #ifndef arch_task_cache_init | ||
| 211 | #define arch_task_cache_init() | ||
| 212 | #endif | ||
| 213 | 246 | ||
| 214 | void __init fork_init(unsigned long mempages) | 247 | void __init fork_init(unsigned long mempages) |
| 215 | { | 248 | { |
| 216 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 249 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
| 217 | #ifndef ARCH_MIN_TASKALIGN | 250 | #ifndef ARCH_MIN_TASKALIGN |
| 218 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES | 251 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES |
| 219 | #endif | 252 | #endif |
| @@ -260,8 +293,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 260 | int node = tsk_fork_get_node(orig); | 293 | int node = tsk_fork_get_node(orig); |
| 261 | int err; | 294 | int err; |
| 262 | 295 | ||
| 263 | prepare_to_copy(orig); | ||
| 264 | |||
| 265 | tsk = alloc_task_struct_node(node); | 296 | tsk = alloc_task_struct_node(node); |
| 266 | if (!tsk) | 297 | if (!tsk) |
| 267 | return NULL; | 298 | return NULL; |
| @@ -355,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 355 | } | 386 | } |
| 356 | charge = 0; | 387 | charge = 0; |
| 357 | if (mpnt->vm_flags & VM_ACCOUNT) { | 388 | if (mpnt->vm_flags & VM_ACCOUNT) { |
| 358 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | 389 | unsigned long len; |
| 390 | len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | ||
| 359 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ | 391 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
| 360 | goto fail_nomem; | 392 | goto fail_nomem; |
| 361 | charge = len; | 393 | charge = len; |
| @@ -421,6 +453,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 421 | 453 | ||
| 422 | if (retval) | 454 | if (retval) |
| 423 | goto out; | 455 | goto out; |
| 456 | |||
| 457 | if (file && uprobe_mmap(tmp)) | ||
| 458 | goto out; | ||
| 424 | } | 459 | } |
| 425 | /* a new mm has just been created */ | 460 | /* a new mm has just been created */ |
| 426 | arch_dup_mmap(oldmm, mm); | 461 | arch_dup_mmap(oldmm, mm); |
| @@ -569,6 +604,7 @@ void mmput(struct mm_struct *mm) | |||
| 569 | might_sleep(); | 604 | might_sleep(); |
| 570 | 605 | ||
| 571 | if (atomic_dec_and_test(&mm->mm_users)) { | 606 | if (atomic_dec_and_test(&mm->mm_users)) { |
| 607 | uprobe_clear_state(mm); | ||
| 572 | exit_aio(mm); | 608 | exit_aio(mm); |
| 573 | ksm_exit(mm); | 609 | ksm_exit(mm); |
| 574 | khugepaged_exit(mm); /* must run before exit_mmap */ | 610 | khugepaged_exit(mm); /* must run before exit_mmap */ |
| @@ -579,7 +615,6 @@ void mmput(struct mm_struct *mm) | |||
| 579 | list_del(&mm->mmlist); | 615 | list_del(&mm->mmlist); |
| 580 | spin_unlock(&mmlist_lock); | 616 | spin_unlock(&mmlist_lock); |
| 581 | } | 617 | } |
| 582 | put_swap_token(mm); | ||
| 583 | if (mm->binfmt) | 618 | if (mm->binfmt) |
| 584 | module_put(mm->binfmt->module); | 619 | module_put(mm->binfmt->module); |
| 585 | mmdrop(mm); | 620 | mmdrop(mm); |
| @@ -747,12 +782,11 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
| 747 | exit_pi_state_list(tsk); | 782 | exit_pi_state_list(tsk); |
| 748 | #endif | 783 | #endif |
| 749 | 784 | ||
| 785 | uprobe_free_utask(tsk); | ||
| 786 | |||
| 750 | /* Get rid of any cached register state */ | 787 | /* Get rid of any cached register state */ |
| 751 | deactivate_mm(tsk, mm); | 788 | deactivate_mm(tsk, mm); |
| 752 | 789 | ||
| 753 | if (tsk->vfork_done) | ||
| 754 | complete_vfork_done(tsk); | ||
| 755 | |||
| 756 | /* | 790 | /* |
| 757 | * If we're exiting normally, clear a user-space tid field if | 791 | * If we're exiting normally, clear a user-space tid field if |
| 758 | * requested. We leave this alone when dying by signal, to leave | 792 | * requested. We leave this alone when dying by signal, to leave |
| @@ -773,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
| 773 | } | 807 | } |
| 774 | tsk->clear_child_tid = NULL; | 808 | tsk->clear_child_tid = NULL; |
| 775 | } | 809 | } |
| 810 | |||
| 811 | /* | ||
| 812 | * All done, finally we can wake up parent and return this mm to him. | ||
| 813 | * Also kthread_stop() uses this completion for synchronization. | ||
| 814 | */ | ||
| 815 | if (tsk->vfork_done) | ||
| 816 | complete_vfork_done(tsk); | ||
| 776 | } | 817 | } |
| 777 | 818 | ||
| 778 | /* | 819 | /* |
| @@ -794,13 +835,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
| 794 | memcpy(mm, oldmm, sizeof(*mm)); | 835 | memcpy(mm, oldmm, sizeof(*mm)); |
| 795 | mm_init_cpumask(mm); | 836 | mm_init_cpumask(mm); |
| 796 | 837 | ||
| 797 | /* Initializing for Swap token stuff */ | ||
| 798 | mm->token_priority = 0; | ||
| 799 | mm->last_interval = 0; | ||
| 800 | |||
| 801 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 838 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 802 | mm->pmd_huge_pte = NULL; | 839 | mm->pmd_huge_pte = NULL; |
| 803 | #endif | 840 | #endif |
| 841 | uprobe_reset_state(mm); | ||
| 804 | 842 | ||
| 805 | if (!mm_init(mm, tsk)) | 843 | if (!mm_init(mm, tsk)) |
| 806 | goto fail_nomem; | 844 | goto fail_nomem; |
| @@ -875,10 +913,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) | |||
| 875 | goto fail_nomem; | 913 | goto fail_nomem; |
| 876 | 914 | ||
| 877 | good_mm: | 915 | good_mm: |
| 878 | /* Initializing for Swap token stuff */ | ||
| 879 | mm->token_priority = 0; | ||
| 880 | mm->last_interval = 0; | ||
| 881 | |||
| 882 | tsk->mm = mm; | 916 | tsk->mm = mm; |
| 883 | tsk->active_mm = mm; | 917 | tsk->active_mm = mm; |
| 884 | return 0; | 918 | return 0; |
| @@ -946,9 +980,8 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) | |||
| 946 | * Share io context with parent, if CLONE_IO is set | 980 | * Share io context with parent, if CLONE_IO is set |
| 947 | */ | 981 | */ |
| 948 | if (clone_flags & CLONE_IO) { | 982 | if (clone_flags & CLONE_IO) { |
| 949 | tsk->io_context = ioc_task_link(ioc); | 983 | ioc_task_link(ioc); |
| 950 | if (unlikely(!tsk->io_context)) | 984 | tsk->io_context = ioc; |
| 951 | return -ENOMEM; | ||
| 952 | } else if (ioprio_valid(ioc->ioprio)) { | 985 | } else if (ioprio_valid(ioc->ioprio)) { |
| 953 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); | 986 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); |
| 954 | if (unlikely(!new_ioc)) | 987 | if (unlikely(!new_ioc)) |
| @@ -1162,6 +1195,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1162 | goto fork_out; | 1195 | goto fork_out; |
| 1163 | 1196 | ||
| 1164 | ftrace_graph_init_task(p); | 1197 | ftrace_graph_init_task(p); |
| 1198 | get_seccomp_filter(p); | ||
| 1165 | 1199 | ||
| 1166 | rt_mutex_init_task(p); | 1200 | rt_mutex_init_task(p); |
| 1167 | 1201 | ||
| @@ -1342,6 +1376,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1342 | INIT_LIST_HEAD(&p->pi_state_list); | 1376 | INIT_LIST_HEAD(&p->pi_state_list); |
| 1343 | p->pi_state_cache = NULL; | 1377 | p->pi_state_cache = NULL; |
| 1344 | #endif | 1378 | #endif |
| 1379 | uprobe_copy_process(p); | ||
| 1345 | /* | 1380 | /* |
| 1346 | * sigaltstack should be cleared when sharing the same VM | 1381 | * sigaltstack should be cleared when sharing the same VM |
| 1347 | */ | 1382 | */ |
| @@ -1380,6 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1380 | */ | 1415 | */ |
| 1381 | p->group_leader = p; | 1416 | p->group_leader = p; |
| 1382 | INIT_LIST_HEAD(&p->thread_group); | 1417 | INIT_LIST_HEAD(&p->thread_group); |
| 1418 | INIT_HLIST_HEAD(&p->task_works); | ||
| 1383 | 1419 | ||
| 1384 | /* Now that the task is set up, run cgroup callbacks if | 1420 | /* Now that the task is set up, run cgroup callbacks if |
| 1385 | * necessary. We need to run them before the task is visible | 1421 | * necessary. We need to run them before the task is visible |
| @@ -1464,6 +1500,8 @@ bad_fork_cleanup_io: | |||
| 1464 | if (p->io_context) | 1500 | if (p->io_context) |
| 1465 | exit_io_context(p); | 1501 | exit_io_context(p); |
| 1466 | bad_fork_cleanup_namespaces: | 1502 | bad_fork_cleanup_namespaces: |
| 1503 | if (unlikely(clone_flags & CLONE_NEWPID)) | ||
| 1504 | pid_ns_release_proc(p->nsproxy->pid_ns); | ||
| 1467 | exit_task_namespaces(p); | 1505 | exit_task_namespaces(p); |
| 1468 | bad_fork_cleanup_mm: | 1506 | bad_fork_cleanup_mm: |
| 1469 | if (p->mm) | 1507 | if (p->mm) |
