aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c125
1 files changed, 84 insertions, 41 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index b9372a0bff18..f00e319d8376 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -34,6 +34,7 @@
34#include <linux/cgroup.h> 34#include <linux/cgroup.h>
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/hugetlb.h> 36#include <linux/hugetlb.h>
37#include <linux/seccomp.h>
37#include <linux/swap.h> 38#include <linux/swap.h>
38#include <linux/syscalls.h> 39#include <linux/syscalls.h>
39#include <linux/jiffies.h> 40#include <linux/jiffies.h>
@@ -47,6 +48,7 @@
47#include <linux/audit.h> 48#include <linux/audit.h>
48#include <linux/memcontrol.h> 49#include <linux/memcontrol.h>
49#include <linux/ftrace.h> 50#include <linux/ftrace.h>
51#include <linux/proc_fs.h>
50#include <linux/profile.h> 52#include <linux/profile.h>
51#include <linux/rmap.h> 53#include <linux/rmap.h>
52#include <linux/ksm.h> 54#include <linux/ksm.h>
@@ -67,6 +69,7 @@
67#include <linux/oom.h> 69#include <linux/oom.h>
68#include <linux/khugepaged.h> 70#include <linux/khugepaged.h>
69#include <linux/signalfd.h> 71#include <linux/signalfd.h>
72#include <linux/uprobes.h>
70 73
71#include <asm/pgtable.h> 74#include <asm/pgtable.h>
72#include <asm/pgalloc.h> 75#include <asm/pgalloc.h>
@@ -111,32 +114,67 @@ int nr_processes(void)
111 return total; 114 return total;
112} 115}
113 116
114#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 117#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
115# define alloc_task_struct_node(node) \
116 kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
117# define free_task_struct(tsk) \
118 kmem_cache_free(task_struct_cachep, (tsk))
119static struct kmem_cache *task_struct_cachep; 118static struct kmem_cache *task_struct_cachep;
119
120static inline struct task_struct *alloc_task_struct_node(int node)
121{
122 return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
123}
124
125void __weak arch_release_task_struct(struct task_struct *tsk) { }
126
127static inline void free_task_struct(struct task_struct *tsk)
128{
129 arch_release_task_struct(tsk);
130 kmem_cache_free(task_struct_cachep, tsk);
131}
120#endif 132#endif
121 133
122#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR 134#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
135void __weak arch_release_thread_info(struct thread_info *ti) { }
136
137/*
138 * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
139 * kmemcache based allocator.
140 */
141# if THREAD_SIZE >= PAGE_SIZE
123static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, 142static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
124 int node) 143 int node)
125{ 144{
126#ifdef CONFIG_DEBUG_STACK_USAGE 145 struct page *page = alloc_pages_node(node, THREADINFO_GFP,
127 gfp_t mask = GFP_KERNEL | __GFP_ZERO; 146 THREAD_SIZE_ORDER);
128#else
129 gfp_t mask = GFP_KERNEL;
130#endif
131 struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
132 147
133 return page ? page_address(page) : NULL; 148 return page ? page_address(page) : NULL;
134} 149}
135 150
136static inline void free_thread_info(struct thread_info *ti) 151static inline void free_thread_info(struct thread_info *ti)
137{ 152{
153 arch_release_thread_info(ti);
138 free_pages((unsigned long)ti, THREAD_SIZE_ORDER); 154 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
139} 155}
156# else
157static struct kmem_cache *thread_info_cache;
158
159static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
160 int node)
161{
162 return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
163}
164
165static void free_thread_info(struct thread_info *ti)
166{
167 arch_release_thread_info(ti);
168 kmem_cache_free(thread_info_cache, ti);
169}
170
171void thread_info_cache_init(void)
172{
173 thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
174 THREAD_SIZE, 0, NULL);
175 BUG_ON(thread_info_cache == NULL);
176}
177# endif
140#endif 178#endif
141 179
142/* SLAB cache for signal_struct structures (tsk->signal) */ 180/* SLAB cache for signal_struct structures (tsk->signal) */
@@ -170,6 +208,7 @@ void free_task(struct task_struct *tsk)
170 free_thread_info(tsk->stack); 208 free_thread_info(tsk->stack);
171 rt_mutex_debug_task_free(tsk); 209 rt_mutex_debug_task_free(tsk);
172 ftrace_graph_exit_task(tsk); 210 ftrace_graph_exit_task(tsk);
211 put_seccomp_filter(tsk);
173 free_task_struct(tsk); 212 free_task_struct(tsk);
174} 213}
175EXPORT_SYMBOL(free_task); 214EXPORT_SYMBOL(free_task);
@@ -203,17 +242,11 @@ void __put_task_struct(struct task_struct *tsk)
203} 242}
204EXPORT_SYMBOL_GPL(__put_task_struct); 243EXPORT_SYMBOL_GPL(__put_task_struct);
205 244
206/* 245void __init __weak arch_task_cache_init(void) { }
207 * macro override instead of weak attribute alias, to workaround
208 * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
209 */
210#ifndef arch_task_cache_init
211#define arch_task_cache_init()
212#endif
213 246
214void __init fork_init(unsigned long mempages) 247void __init fork_init(unsigned long mempages)
215{ 248{
216#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 249#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
217#ifndef ARCH_MIN_TASKALIGN 250#ifndef ARCH_MIN_TASKALIGN
218#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 251#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
219#endif 252#endif
@@ -260,8 +293,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
260 int node = tsk_fork_get_node(orig); 293 int node = tsk_fork_get_node(orig);
261 int err; 294 int err;
262 295
263 prepare_to_copy(orig);
264
265 tsk = alloc_task_struct_node(node); 296 tsk = alloc_task_struct_node(node);
266 if (!tsk) 297 if (!tsk)
267 return NULL; 298 return NULL;
@@ -273,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
273 } 304 }
274 305
275 err = arch_dup_task_struct(tsk, orig); 306 err = arch_dup_task_struct(tsk, orig);
276 if (err)
277 goto out;
278 307
308 /*
309 * We defer looking at err, because we will need this setup
310 * for the clean up path to work correctly.
311 */
279 tsk->stack = ti; 312 tsk->stack = ti;
280
281 setup_thread_stack(tsk, orig); 313 setup_thread_stack(tsk, orig);
314
315 if (err)
316 goto out;
317
282 clear_user_return_notifier(tsk); 318 clear_user_return_notifier(tsk);
283 clear_tsk_need_resched(tsk); 319 clear_tsk_need_resched(tsk);
284 stackend = end_of_stack(tsk); 320 stackend = end_of_stack(tsk);
@@ -355,7 +391,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
355 } 391 }
356 charge = 0; 392 charge = 0;
357 if (mpnt->vm_flags & VM_ACCOUNT) { 393 if (mpnt->vm_flags & VM_ACCOUNT) {
358 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 394 unsigned long len;
395 len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
359 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ 396 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
360 goto fail_nomem; 397 goto fail_nomem;
361 charge = len; 398 charge = len;
@@ -421,6 +458,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
421 458
422 if (retval) 459 if (retval)
423 goto out; 460 goto out;
461
462 if (file && uprobe_mmap(tmp))
463 goto out;
424 } 464 }
425 /* a new mm has just been created */ 465 /* a new mm has just been created */
426 arch_dup_mmap(oldmm, mm); 466 arch_dup_mmap(oldmm, mm);
@@ -569,6 +609,7 @@ void mmput(struct mm_struct *mm)
569 might_sleep(); 609 might_sleep();
570 610
571 if (atomic_dec_and_test(&mm->mm_users)) { 611 if (atomic_dec_and_test(&mm->mm_users)) {
612 uprobe_clear_state(mm);
572 exit_aio(mm); 613 exit_aio(mm);
573 ksm_exit(mm); 614 ksm_exit(mm);
574 khugepaged_exit(mm); /* must run before exit_mmap */ 615 khugepaged_exit(mm); /* must run before exit_mmap */
@@ -579,7 +620,6 @@ void mmput(struct mm_struct *mm)
579 list_del(&mm->mmlist); 620 list_del(&mm->mmlist);
580 spin_unlock(&mmlist_lock); 621 spin_unlock(&mmlist_lock);
581 } 622 }
582 put_swap_token(mm);
583 if (mm->binfmt) 623 if (mm->binfmt)
584 module_put(mm->binfmt->module); 624 module_put(mm->binfmt->module);
585 mmdrop(mm); 625 mmdrop(mm);
@@ -747,12 +787,11 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
747 exit_pi_state_list(tsk); 787 exit_pi_state_list(tsk);
748#endif 788#endif
749 789
790 uprobe_free_utask(tsk);
791
750 /* Get rid of any cached register state */ 792 /* Get rid of any cached register state */
751 deactivate_mm(tsk, mm); 793 deactivate_mm(tsk, mm);
752 794
753 if (tsk->vfork_done)
754 complete_vfork_done(tsk);
755
756 /* 795 /*
757 * If we're exiting normally, clear a user-space tid field if 796 * If we're exiting normally, clear a user-space tid field if
758 * requested. We leave this alone when dying by signal, to leave 797 * requested. We leave this alone when dying by signal, to leave
@@ -773,6 +812,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
773 } 812 }
774 tsk->clear_child_tid = NULL; 813 tsk->clear_child_tid = NULL;
775 } 814 }
815
816 /*
817 * All done, finally we can wake up parent and return this mm to him.
818 * Also kthread_stop() uses this completion for synchronization.
819 */
820 if (tsk->vfork_done)
821 complete_vfork_done(tsk);
776} 822}
777 823
778/* 824/*
@@ -794,13 +840,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
794 memcpy(mm, oldmm, sizeof(*mm)); 840 memcpy(mm, oldmm, sizeof(*mm));
795 mm_init_cpumask(mm); 841 mm_init_cpumask(mm);
796 842
797 /* Initializing for Swap token stuff */
798 mm->token_priority = 0;
799 mm->last_interval = 0;
800
801#ifdef CONFIG_TRANSPARENT_HUGEPAGE 843#ifdef CONFIG_TRANSPARENT_HUGEPAGE
802 mm->pmd_huge_pte = NULL; 844 mm->pmd_huge_pte = NULL;
803#endif 845#endif
846 uprobe_reset_state(mm);
804 847
805 if (!mm_init(mm, tsk)) 848 if (!mm_init(mm, tsk))
806 goto fail_nomem; 849 goto fail_nomem;
@@ -875,10 +918,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
875 goto fail_nomem; 918 goto fail_nomem;
876 919
877good_mm: 920good_mm:
878 /* Initializing for Swap token stuff */
879 mm->token_priority = 0;
880 mm->last_interval = 0;
881
882 tsk->mm = mm; 921 tsk->mm = mm;
883 tsk->active_mm = mm; 922 tsk->active_mm = mm;
884 return 0; 923 return 0;
@@ -946,9 +985,8 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
946 * Share io context with parent, if CLONE_IO is set 985 * Share io context with parent, if CLONE_IO is set
947 */ 986 */
948 if (clone_flags & CLONE_IO) { 987 if (clone_flags & CLONE_IO) {
949 tsk->io_context = ioc_task_link(ioc); 988 ioc_task_link(ioc);
950 if (unlikely(!tsk->io_context)) 989 tsk->io_context = ioc;
951 return -ENOMEM;
952 } else if (ioprio_valid(ioc->ioprio)) { 990 } else if (ioprio_valid(ioc->ioprio)) {
953 new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); 991 new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
954 if (unlikely(!new_ioc)) 992 if (unlikely(!new_ioc))
@@ -1162,6 +1200,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1162 goto fork_out; 1200 goto fork_out;
1163 1201
1164 ftrace_graph_init_task(p); 1202 ftrace_graph_init_task(p);
1203 get_seccomp_filter(p);
1165 1204
1166 rt_mutex_init_task(p); 1205 rt_mutex_init_task(p);
1167 1206
@@ -1342,6 +1381,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1342 INIT_LIST_HEAD(&p->pi_state_list); 1381 INIT_LIST_HEAD(&p->pi_state_list);
1343 p->pi_state_cache = NULL; 1382 p->pi_state_cache = NULL;
1344#endif 1383#endif
1384 uprobe_copy_process(p);
1345 /* 1385 /*
1346 * sigaltstack should be cleared when sharing the same VM 1386 * sigaltstack should be cleared when sharing the same VM
1347 */ 1387 */
@@ -1380,6 +1420,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1380 */ 1420 */
1381 p->group_leader = p; 1421 p->group_leader = p;
1382 INIT_LIST_HEAD(&p->thread_group); 1422 INIT_LIST_HEAD(&p->thread_group);
1423 INIT_HLIST_HEAD(&p->task_works);
1383 1424
1384 /* Now that the task is set up, run cgroup callbacks if 1425 /* Now that the task is set up, run cgroup callbacks if
1385 * necessary. We need to run them before the task is visible 1426 * necessary. We need to run them before the task is visible
@@ -1464,6 +1505,8 @@ bad_fork_cleanup_io:
1464 if (p->io_context) 1505 if (p->io_context)
1465 exit_io_context(p); 1506 exit_io_context(p);
1466bad_fork_cleanup_namespaces: 1507bad_fork_cleanup_namespaces:
1508 if (unlikely(clone_flags & CLONE_NEWPID))
1509 pid_ns_release_proc(p->nsproxy->pid_ns);
1467 exit_task_namespaces(p); 1510 exit_task_namespaces(p);
1468bad_fork_cleanup_mm: 1511bad_fork_cleanup_mm:
1469 if (p->mm) 1512 if (p->mm)