diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 125 |
1 files changed, 84 insertions, 41 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index b9372a0bff18..f00e319d8376 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/cgroup.h> | 34 | #include <linux/cgroup.h> |
35 | #include <linux/security.h> | 35 | #include <linux/security.h> |
36 | #include <linux/hugetlb.h> | 36 | #include <linux/hugetlb.h> |
37 | #include <linux/seccomp.h> | ||
37 | #include <linux/swap.h> | 38 | #include <linux/swap.h> |
38 | #include <linux/syscalls.h> | 39 | #include <linux/syscalls.h> |
39 | #include <linux/jiffies.h> | 40 | #include <linux/jiffies.h> |
@@ -47,6 +48,7 @@ | |||
47 | #include <linux/audit.h> | 48 | #include <linux/audit.h> |
48 | #include <linux/memcontrol.h> | 49 | #include <linux/memcontrol.h> |
49 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
51 | #include <linux/proc_fs.h> | ||
50 | #include <linux/profile.h> | 52 | #include <linux/profile.h> |
51 | #include <linux/rmap.h> | 53 | #include <linux/rmap.h> |
52 | #include <linux/ksm.h> | 54 | #include <linux/ksm.h> |
@@ -67,6 +69,7 @@ | |||
67 | #include <linux/oom.h> | 69 | #include <linux/oom.h> |
68 | #include <linux/khugepaged.h> | 70 | #include <linux/khugepaged.h> |
69 | #include <linux/signalfd.h> | 71 | #include <linux/signalfd.h> |
72 | #include <linux/uprobes.h> | ||
70 | 73 | ||
71 | #include <asm/pgtable.h> | 74 | #include <asm/pgtable.h> |
72 | #include <asm/pgalloc.h> | 75 | #include <asm/pgalloc.h> |
@@ -111,32 +114,67 @@ int nr_processes(void) | |||
111 | return total; | 114 | return total; |
112 | } | 115 | } |
113 | 116 | ||
114 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 117 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
115 | # define alloc_task_struct_node(node) \ | ||
116 | kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) | ||
117 | # define free_task_struct(tsk) \ | ||
118 | kmem_cache_free(task_struct_cachep, (tsk)) | ||
119 | static struct kmem_cache *task_struct_cachep; | 118 | static struct kmem_cache *task_struct_cachep; |
119 | |||
120 | static inline struct task_struct *alloc_task_struct_node(int node) | ||
121 | { | ||
122 | return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); | ||
123 | } | ||
124 | |||
125 | void __weak arch_release_task_struct(struct task_struct *tsk) { } | ||
126 | |||
127 | static inline void free_task_struct(struct task_struct *tsk) | ||
128 | { | ||
129 | arch_release_task_struct(tsk); | ||
130 | kmem_cache_free(task_struct_cachep, tsk); | ||
131 | } | ||
120 | #endif | 132 | #endif |
121 | 133 | ||
122 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 134 | #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR |
135 | void __weak arch_release_thread_info(struct thread_info *ti) { } | ||
136 | |||
137 | /* | ||
138 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a | ||
139 | * kmemcache based allocator. | ||
140 | */ | ||
141 | # if THREAD_SIZE >= PAGE_SIZE | ||
123 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 142 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
124 | int node) | 143 | int node) |
125 | { | 144 | { |
126 | #ifdef CONFIG_DEBUG_STACK_USAGE | 145 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, |
127 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | 146 | THREAD_SIZE_ORDER); |
128 | #else | ||
129 | gfp_t mask = GFP_KERNEL; | ||
130 | #endif | ||
131 | struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); | ||
132 | 147 | ||
133 | return page ? page_address(page) : NULL; | 148 | return page ? page_address(page) : NULL; |
134 | } | 149 | } |
135 | 150 | ||
136 | static inline void free_thread_info(struct thread_info *ti) | 151 | static inline void free_thread_info(struct thread_info *ti) |
137 | { | 152 | { |
153 | arch_release_thread_info(ti); | ||
138 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | 154 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); |
139 | } | 155 | } |
156 | # else | ||
157 | static struct kmem_cache *thread_info_cache; | ||
158 | |||
159 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | ||
160 | int node) | ||
161 | { | ||
162 | return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); | ||
163 | } | ||
164 | |||
165 | static void free_thread_info(struct thread_info *ti) | ||
166 | { | ||
167 | arch_release_thread_info(ti); | ||
168 | kmem_cache_free(thread_info_cache, ti); | ||
169 | } | ||
170 | |||
171 | void thread_info_cache_init(void) | ||
172 | { | ||
173 | thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, | ||
174 | THREAD_SIZE, 0, NULL); | ||
175 | BUG_ON(thread_info_cache == NULL); | ||
176 | } | ||
177 | # endif | ||
140 | #endif | 178 | #endif |
141 | 179 | ||
142 | /* SLAB cache for signal_struct structures (tsk->signal) */ | 180 | /* SLAB cache for signal_struct structures (tsk->signal) */ |
@@ -170,6 +208,7 @@ void free_task(struct task_struct *tsk) | |||
170 | free_thread_info(tsk->stack); | 208 | free_thread_info(tsk->stack); |
171 | rt_mutex_debug_task_free(tsk); | 209 | rt_mutex_debug_task_free(tsk); |
172 | ftrace_graph_exit_task(tsk); | 210 | ftrace_graph_exit_task(tsk); |
211 | put_seccomp_filter(tsk); | ||
173 | free_task_struct(tsk); | 212 | free_task_struct(tsk); |
174 | } | 213 | } |
175 | EXPORT_SYMBOL(free_task); | 214 | EXPORT_SYMBOL(free_task); |
@@ -203,17 +242,11 @@ void __put_task_struct(struct task_struct *tsk) | |||
203 | } | 242 | } |
204 | EXPORT_SYMBOL_GPL(__put_task_struct); | 243 | EXPORT_SYMBOL_GPL(__put_task_struct); |
205 | 244 | ||
206 | /* | 245 | void __init __weak arch_task_cache_init(void) { } |
207 | * macro override instead of weak attribute alias, to workaround | ||
208 | * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. | ||
209 | */ | ||
210 | #ifndef arch_task_cache_init | ||
211 | #define arch_task_cache_init() | ||
212 | #endif | ||
213 | 246 | ||
214 | void __init fork_init(unsigned long mempages) | 247 | void __init fork_init(unsigned long mempages) |
215 | { | 248 | { |
216 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 249 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
217 | #ifndef ARCH_MIN_TASKALIGN | 250 | #ifndef ARCH_MIN_TASKALIGN |
218 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES | 251 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES |
219 | #endif | 252 | #endif |
@@ -260,8 +293,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
260 | int node = tsk_fork_get_node(orig); | 293 | int node = tsk_fork_get_node(orig); |
261 | int err; | 294 | int err; |
262 | 295 | ||
263 | prepare_to_copy(orig); | ||
264 | |||
265 | tsk = alloc_task_struct_node(node); | 296 | tsk = alloc_task_struct_node(node); |
266 | if (!tsk) | 297 | if (!tsk) |
267 | return NULL; | 298 | return NULL; |
@@ -273,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
273 | } | 304 | } |
274 | 305 | ||
275 | err = arch_dup_task_struct(tsk, orig); | 306 | err = arch_dup_task_struct(tsk, orig); |
276 | if (err) | ||
277 | goto out; | ||
278 | 307 | ||
308 | /* | ||
309 | * We defer looking at err, because we will need this setup | ||
310 | * for the clean up path to work correctly. | ||
311 | */ | ||
279 | tsk->stack = ti; | 312 | tsk->stack = ti; |
280 | |||
281 | setup_thread_stack(tsk, orig); | 313 | setup_thread_stack(tsk, orig); |
314 | |||
315 | if (err) | ||
316 | goto out; | ||
317 | |||
282 | clear_user_return_notifier(tsk); | 318 | clear_user_return_notifier(tsk); |
283 | clear_tsk_need_resched(tsk); | 319 | clear_tsk_need_resched(tsk); |
284 | stackend = end_of_stack(tsk); | 320 | stackend = end_of_stack(tsk); |
@@ -355,7 +391,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
355 | } | 391 | } |
356 | charge = 0; | 392 | charge = 0; |
357 | if (mpnt->vm_flags & VM_ACCOUNT) { | 393 | if (mpnt->vm_flags & VM_ACCOUNT) { |
358 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | 394 | unsigned long len; |
395 | len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | ||
359 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ | 396 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
360 | goto fail_nomem; | 397 | goto fail_nomem; |
361 | charge = len; | 398 | charge = len; |
@@ -421,6 +458,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
421 | 458 | ||
422 | if (retval) | 459 | if (retval) |
423 | goto out; | 460 | goto out; |
461 | |||
462 | if (file && uprobe_mmap(tmp)) | ||
463 | goto out; | ||
424 | } | 464 | } |
425 | /* a new mm has just been created */ | 465 | /* a new mm has just been created */ |
426 | arch_dup_mmap(oldmm, mm); | 466 | arch_dup_mmap(oldmm, mm); |
@@ -569,6 +609,7 @@ void mmput(struct mm_struct *mm) | |||
569 | might_sleep(); | 609 | might_sleep(); |
570 | 610 | ||
571 | if (atomic_dec_and_test(&mm->mm_users)) { | 611 | if (atomic_dec_and_test(&mm->mm_users)) { |
612 | uprobe_clear_state(mm); | ||
572 | exit_aio(mm); | 613 | exit_aio(mm); |
573 | ksm_exit(mm); | 614 | ksm_exit(mm); |
574 | khugepaged_exit(mm); /* must run before exit_mmap */ | 615 | khugepaged_exit(mm); /* must run before exit_mmap */ |
@@ -579,7 +620,6 @@ void mmput(struct mm_struct *mm) | |||
579 | list_del(&mm->mmlist); | 620 | list_del(&mm->mmlist); |
580 | spin_unlock(&mmlist_lock); | 621 | spin_unlock(&mmlist_lock); |
581 | } | 622 | } |
582 | put_swap_token(mm); | ||
583 | if (mm->binfmt) | 623 | if (mm->binfmt) |
584 | module_put(mm->binfmt->module); | 624 | module_put(mm->binfmt->module); |
585 | mmdrop(mm); | 625 | mmdrop(mm); |
@@ -747,12 +787,11 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
747 | exit_pi_state_list(tsk); | 787 | exit_pi_state_list(tsk); |
748 | #endif | 788 | #endif |
749 | 789 | ||
790 | uprobe_free_utask(tsk); | ||
791 | |||
750 | /* Get rid of any cached register state */ | 792 | /* Get rid of any cached register state */ |
751 | deactivate_mm(tsk, mm); | 793 | deactivate_mm(tsk, mm); |
752 | 794 | ||
753 | if (tsk->vfork_done) | ||
754 | complete_vfork_done(tsk); | ||
755 | |||
756 | /* | 795 | /* |
757 | * If we're exiting normally, clear a user-space tid field if | 796 | * If we're exiting normally, clear a user-space tid field if |
758 | * requested. We leave this alone when dying by signal, to leave | 797 | * requested. We leave this alone when dying by signal, to leave |
@@ -773,6 +812,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
773 | } | 812 | } |
774 | tsk->clear_child_tid = NULL; | 813 | tsk->clear_child_tid = NULL; |
775 | } | 814 | } |
815 | |||
816 | /* | ||
817 | * All done, finally we can wake up parent and return this mm to him. | ||
818 | * Also kthread_stop() uses this completion for synchronization. | ||
819 | */ | ||
820 | if (tsk->vfork_done) | ||
821 | complete_vfork_done(tsk); | ||
776 | } | 822 | } |
777 | 823 | ||
778 | /* | 824 | /* |
@@ -794,13 +840,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
794 | memcpy(mm, oldmm, sizeof(*mm)); | 840 | memcpy(mm, oldmm, sizeof(*mm)); |
795 | mm_init_cpumask(mm); | 841 | mm_init_cpumask(mm); |
796 | 842 | ||
797 | /* Initializing for Swap token stuff */ | ||
798 | mm->token_priority = 0; | ||
799 | mm->last_interval = 0; | ||
800 | |||
801 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 843 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
802 | mm->pmd_huge_pte = NULL; | 844 | mm->pmd_huge_pte = NULL; |
803 | #endif | 845 | #endif |
846 | uprobe_reset_state(mm); | ||
804 | 847 | ||
805 | if (!mm_init(mm, tsk)) | 848 | if (!mm_init(mm, tsk)) |
806 | goto fail_nomem; | 849 | goto fail_nomem; |
@@ -875,10 +918,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) | |||
875 | goto fail_nomem; | 918 | goto fail_nomem; |
876 | 919 | ||
877 | good_mm: | 920 | good_mm: |
878 | /* Initializing for Swap token stuff */ | ||
879 | mm->token_priority = 0; | ||
880 | mm->last_interval = 0; | ||
881 | |||
882 | tsk->mm = mm; | 921 | tsk->mm = mm; |
883 | tsk->active_mm = mm; | 922 | tsk->active_mm = mm; |
884 | return 0; | 923 | return 0; |
@@ -946,9 +985,8 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) | |||
946 | * Share io context with parent, if CLONE_IO is set | 985 | * Share io context with parent, if CLONE_IO is set |
947 | */ | 986 | */ |
948 | if (clone_flags & CLONE_IO) { | 987 | if (clone_flags & CLONE_IO) { |
949 | tsk->io_context = ioc_task_link(ioc); | 988 | ioc_task_link(ioc); |
950 | if (unlikely(!tsk->io_context)) | 989 | tsk->io_context = ioc; |
951 | return -ENOMEM; | ||
952 | } else if (ioprio_valid(ioc->ioprio)) { | 990 | } else if (ioprio_valid(ioc->ioprio)) { |
953 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); | 991 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); |
954 | if (unlikely(!new_ioc)) | 992 | if (unlikely(!new_ioc)) |
@@ -1162,6 +1200,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1162 | goto fork_out; | 1200 | goto fork_out; |
1163 | 1201 | ||
1164 | ftrace_graph_init_task(p); | 1202 | ftrace_graph_init_task(p); |
1203 | get_seccomp_filter(p); | ||
1165 | 1204 | ||
1166 | rt_mutex_init_task(p); | 1205 | rt_mutex_init_task(p); |
1167 | 1206 | ||
@@ -1342,6 +1381,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1342 | INIT_LIST_HEAD(&p->pi_state_list); | 1381 | INIT_LIST_HEAD(&p->pi_state_list); |
1343 | p->pi_state_cache = NULL; | 1382 | p->pi_state_cache = NULL; |
1344 | #endif | 1383 | #endif |
1384 | uprobe_copy_process(p); | ||
1345 | /* | 1385 | /* |
1346 | * sigaltstack should be cleared when sharing the same VM | 1386 | * sigaltstack should be cleared when sharing the same VM |
1347 | */ | 1387 | */ |
@@ -1380,6 +1420,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1380 | */ | 1420 | */ |
1381 | p->group_leader = p; | 1421 | p->group_leader = p; |
1382 | INIT_LIST_HEAD(&p->thread_group); | 1422 | INIT_LIST_HEAD(&p->thread_group); |
1423 | INIT_HLIST_HEAD(&p->task_works); | ||
1383 | 1424 | ||
1384 | /* Now that the task is set up, run cgroup callbacks if | 1425 | /* Now that the task is set up, run cgroup callbacks if |
1385 | * necessary. We need to run them before the task is visible | 1426 | * necessary. We need to run them before the task is visible |
@@ -1464,6 +1505,8 @@ bad_fork_cleanup_io: | |||
1464 | if (p->io_context) | 1505 | if (p->io_context) |
1465 | exit_io_context(p); | 1506 | exit_io_context(p); |
1466 | bad_fork_cleanup_namespaces: | 1507 | bad_fork_cleanup_namespaces: |
1508 | if (unlikely(clone_flags & CLONE_NEWPID)) | ||
1509 | pid_ns_release_proc(p->nsproxy->pid_ns); | ||
1467 | exit_task_namespaces(p); | 1510 | exit_task_namespaces(p); |
1468 | bad_fork_cleanup_mm: | 1511 | bad_fork_cleanup_mm: |
1469 | if (p->mm) | 1512 | if (p->mm) |