diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 1274 |
1 files changed, 1274 insertions, 0 deletions
diff --git a/kernel/fork.c b/kernel/fork.c new file mode 100644 index 000000000000..f42a17f88699 --- /dev/null +++ b/kernel/fork.c | |||
@@ -0,0 +1,1274 @@ | |||
1 | /* | ||
2 | * linux/kernel/fork.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * 'fork.c' contains the help-routines for the 'fork' system call | ||
9 | * (see also entry.S and others). | ||
10 | * Fork is rather simple, once you get the hang of it, but the memory | ||
11 | * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/unistd.h> | ||
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/vmalloc.h> | ||
21 | #include <linux/completion.h> | ||
22 | #include <linux/namespace.h> | ||
23 | #include <linux/personality.h> | ||
24 | #include <linux/mempolicy.h> | ||
25 | #include <linux/sem.h> | ||
26 | #include <linux/file.h> | ||
27 | #include <linux/key.h> | ||
28 | #include <linux/binfmts.h> | ||
29 | #include <linux/mman.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/cpu.h> | ||
32 | #include <linux/cpuset.h> | ||
33 | #include <linux/security.h> | ||
34 | #include <linux/swap.h> | ||
35 | #include <linux/syscalls.h> | ||
36 | #include <linux/jiffies.h> | ||
37 | #include <linux/futex.h> | ||
38 | #include <linux/ptrace.h> | ||
39 | #include <linux/mount.h> | ||
40 | #include <linux/audit.h> | ||
41 | #include <linux/profile.h> | ||
42 | #include <linux/rmap.h> | ||
43 | #include <linux/acct.h> | ||
44 | |||
45 | #include <asm/pgtable.h> | ||
46 | #include <asm/pgalloc.h> | ||
47 | #include <asm/uaccess.h> | ||
48 | #include <asm/mmu_context.h> | ||
49 | #include <asm/cacheflush.h> | ||
50 | #include <asm/tlbflush.h> | ||
51 | |||
52 | /* | ||
53 | * Protected counters by write_lock_irq(&tasklist_lock) | ||
54 | */ | ||
55 | unsigned long total_forks; /* Handle normal Linux uptimes. */ | ||
56 | int nr_threads; /* The idle threads do not count.. */ | ||
57 | |||
58 | int max_threads; /* tunable limit on nr_threads */ | ||
59 | |||
60 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; | ||
61 | |||
62 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | ||
63 | |||
64 | EXPORT_SYMBOL(tasklist_lock); | ||
65 | |||
66 | int nr_processes(void) | ||
67 | { | ||
68 | int cpu; | ||
69 | int total = 0; | ||
70 | |||
71 | for_each_online_cpu(cpu) | ||
72 | total += per_cpu(process_counts, cpu); | ||
73 | |||
74 | return total; | ||
75 | } | ||
76 | |||
77 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | ||
78 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) | ||
79 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) | ||
80 | static kmem_cache_t *task_struct_cachep; | ||
81 | #endif | ||
82 | |||
83 | /* SLAB cache for signal_struct structures (tsk->signal) */ | ||
84 | kmem_cache_t *signal_cachep; | ||
85 | |||
86 | /* SLAB cache for sighand_struct structures (tsk->sighand) */ | ||
87 | kmem_cache_t *sighand_cachep; | ||
88 | |||
89 | /* SLAB cache for files_struct structures (tsk->files) */ | ||
90 | kmem_cache_t *files_cachep; | ||
91 | |||
92 | /* SLAB cache for fs_struct structures (tsk->fs) */ | ||
93 | kmem_cache_t *fs_cachep; | ||
94 | |||
95 | /* SLAB cache for vm_area_struct structures */ | ||
96 | kmem_cache_t *vm_area_cachep; | ||
97 | |||
98 | /* SLAB cache for mm_struct structures (tsk->mm) */ | ||
99 | static kmem_cache_t *mm_cachep; | ||
100 | |||
101 | void free_task(struct task_struct *tsk) | ||
102 | { | ||
103 | free_thread_info(tsk->thread_info); | ||
104 | free_task_struct(tsk); | ||
105 | } | ||
106 | EXPORT_SYMBOL(free_task); | ||
107 | |||
108 | void __put_task_struct(struct task_struct *tsk) | ||
109 | { | ||
110 | WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); | ||
111 | WARN_ON(atomic_read(&tsk->usage)); | ||
112 | WARN_ON(tsk == current); | ||
113 | |||
114 | if (unlikely(tsk->audit_context)) | ||
115 | audit_free(tsk); | ||
116 | security_task_free(tsk); | ||
117 | free_uid(tsk->user); | ||
118 | put_group_info(tsk->group_info); | ||
119 | |||
120 | if (!profile_handoff_task(tsk)) | ||
121 | free_task(tsk); | ||
122 | } | ||
123 | |||
124 | void __init fork_init(unsigned long mempages) | ||
125 | { | ||
126 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | ||
127 | #ifndef ARCH_MIN_TASKALIGN | ||
128 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES | ||
129 | #endif | ||
130 | /* create a slab on which task_structs can be allocated */ | ||
131 | task_struct_cachep = | ||
132 | kmem_cache_create("task_struct", sizeof(struct task_struct), | ||
133 | ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL); | ||
134 | #endif | ||
135 | |||
136 | /* | ||
137 | * The default maximum number of threads is set to a safe | ||
138 | * value: the thread structures can take up at most half | ||
139 | * of memory. | ||
140 | */ | ||
141 | max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); | ||
142 | |||
143 | /* | ||
144 | * we need to allow at least 20 threads to boot a system | ||
145 | */ | ||
146 | if(max_threads < 20) | ||
147 | max_threads = 20; | ||
148 | |||
149 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; | ||
150 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; | ||
151 | init_task.signal->rlim[RLIMIT_SIGPENDING] = | ||
152 | init_task.signal->rlim[RLIMIT_NPROC]; | ||
153 | } | ||
154 | |||
155 | static struct task_struct *dup_task_struct(struct task_struct *orig) | ||
156 | { | ||
157 | struct task_struct *tsk; | ||
158 | struct thread_info *ti; | ||
159 | |||
160 | prepare_to_copy(orig); | ||
161 | |||
162 | tsk = alloc_task_struct(); | ||
163 | if (!tsk) | ||
164 | return NULL; | ||
165 | |||
166 | ti = alloc_thread_info(tsk); | ||
167 | if (!ti) { | ||
168 | free_task_struct(tsk); | ||
169 | return NULL; | ||
170 | } | ||
171 | |||
172 | *ti = *orig->thread_info; | ||
173 | *tsk = *orig; | ||
174 | tsk->thread_info = ti; | ||
175 | ti->task = tsk; | ||
176 | |||
177 | /* One for us, one for whoever does the "release_task()" (usually parent) */ | ||
178 | atomic_set(&tsk->usage,2); | ||
179 | return tsk; | ||
180 | } | ||
181 | |||
182 | #ifdef CONFIG_MMU | ||
183 | static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) | ||
184 | { | ||
185 | struct vm_area_struct * mpnt, *tmp, **pprev; | ||
186 | struct rb_node **rb_link, *rb_parent; | ||
187 | int retval; | ||
188 | unsigned long charge; | ||
189 | struct mempolicy *pol; | ||
190 | |||
191 | down_write(&oldmm->mmap_sem); | ||
192 | flush_cache_mm(current->mm); | ||
193 | mm->locked_vm = 0; | ||
194 | mm->mmap = NULL; | ||
195 | mm->mmap_cache = NULL; | ||
196 | mm->free_area_cache = oldmm->mmap_base; | ||
197 | mm->map_count = 0; | ||
198 | set_mm_counter(mm, rss, 0); | ||
199 | set_mm_counter(mm, anon_rss, 0); | ||
200 | cpus_clear(mm->cpu_vm_mask); | ||
201 | mm->mm_rb = RB_ROOT; | ||
202 | rb_link = &mm->mm_rb.rb_node; | ||
203 | rb_parent = NULL; | ||
204 | pprev = &mm->mmap; | ||
205 | |||
206 | for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { | ||
207 | struct file *file; | ||
208 | |||
209 | if (mpnt->vm_flags & VM_DONTCOPY) { | ||
210 | __vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, | ||
211 | -vma_pages(mpnt)); | ||
212 | continue; | ||
213 | } | ||
214 | charge = 0; | ||
215 | if (mpnt->vm_flags & VM_ACCOUNT) { | ||
216 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | ||
217 | if (security_vm_enough_memory(len)) | ||
218 | goto fail_nomem; | ||
219 | charge = len; | ||
220 | } | ||
221 | tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); | ||
222 | if (!tmp) | ||
223 | goto fail_nomem; | ||
224 | *tmp = *mpnt; | ||
225 | pol = mpol_copy(vma_policy(mpnt)); | ||
226 | retval = PTR_ERR(pol); | ||
227 | if (IS_ERR(pol)) | ||
228 | goto fail_nomem_policy; | ||
229 | vma_set_policy(tmp, pol); | ||
230 | tmp->vm_flags &= ~VM_LOCKED; | ||
231 | tmp->vm_mm = mm; | ||
232 | tmp->vm_next = NULL; | ||
233 | anon_vma_link(tmp); | ||
234 | file = tmp->vm_file; | ||
235 | if (file) { | ||
236 | struct inode *inode = file->f_dentry->d_inode; | ||
237 | get_file(file); | ||
238 | if (tmp->vm_flags & VM_DENYWRITE) | ||
239 | atomic_dec(&inode->i_writecount); | ||
240 | |||
241 | /* insert tmp into the share list, just after mpnt */ | ||
242 | spin_lock(&file->f_mapping->i_mmap_lock); | ||
243 | tmp->vm_truncate_count = mpnt->vm_truncate_count; | ||
244 | flush_dcache_mmap_lock(file->f_mapping); | ||
245 | vma_prio_tree_add(tmp, mpnt); | ||
246 | flush_dcache_mmap_unlock(file->f_mapping); | ||
247 | spin_unlock(&file->f_mapping->i_mmap_lock); | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Link in the new vma and copy the page table entries: | ||
252 | * link in first so that swapoff can see swap entries, | ||
253 | * and try_to_unmap_one's find_vma find the new vma. | ||
254 | */ | ||
255 | spin_lock(&mm->page_table_lock); | ||
256 | *pprev = tmp; | ||
257 | pprev = &tmp->vm_next; | ||
258 | |||
259 | __vma_link_rb(mm, tmp, rb_link, rb_parent); | ||
260 | rb_link = &tmp->vm_rb.rb_right; | ||
261 | rb_parent = &tmp->vm_rb; | ||
262 | |||
263 | mm->map_count++; | ||
264 | retval = copy_page_range(mm, current->mm, tmp); | ||
265 | spin_unlock(&mm->page_table_lock); | ||
266 | |||
267 | if (tmp->vm_ops && tmp->vm_ops->open) | ||
268 | tmp->vm_ops->open(tmp); | ||
269 | |||
270 | if (retval) | ||
271 | goto out; | ||
272 | } | ||
273 | retval = 0; | ||
274 | |||
275 | out: | ||
276 | flush_tlb_mm(current->mm); | ||
277 | up_write(&oldmm->mmap_sem); | ||
278 | return retval; | ||
279 | fail_nomem_policy: | ||
280 | kmem_cache_free(vm_area_cachep, tmp); | ||
281 | fail_nomem: | ||
282 | retval = -ENOMEM; | ||
283 | vm_unacct_memory(charge); | ||
284 | goto out; | ||
285 | } | ||
286 | |||
287 | static inline int mm_alloc_pgd(struct mm_struct * mm) | ||
288 | { | ||
289 | mm->pgd = pgd_alloc(mm); | ||
290 | if (unlikely(!mm->pgd)) | ||
291 | return -ENOMEM; | ||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | static inline void mm_free_pgd(struct mm_struct * mm) | ||
296 | { | ||
297 | pgd_free(mm->pgd); | ||
298 | } | ||
299 | #else | ||
300 | #define dup_mmap(mm, oldmm) (0) | ||
301 | #define mm_alloc_pgd(mm) (0) | ||
302 | #define mm_free_pgd(mm) | ||
303 | #endif /* CONFIG_MMU */ | ||
304 | |||
305 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); | ||
306 | |||
307 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) | ||
308 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) | ||
309 | |||
310 | #include <linux/init_task.h> | ||
311 | |||
312 | static struct mm_struct * mm_init(struct mm_struct * mm) | ||
313 | { | ||
314 | atomic_set(&mm->mm_users, 1); | ||
315 | atomic_set(&mm->mm_count, 1); | ||
316 | init_rwsem(&mm->mmap_sem); | ||
317 | INIT_LIST_HEAD(&mm->mmlist); | ||
318 | mm->core_waiters = 0; | ||
319 | mm->nr_ptes = 0; | ||
320 | spin_lock_init(&mm->page_table_lock); | ||
321 | rwlock_init(&mm->ioctx_list_lock); | ||
322 | mm->ioctx_list = NULL; | ||
323 | mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); | ||
324 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
325 | |||
326 | if (likely(!mm_alloc_pgd(mm))) { | ||
327 | mm->def_flags = 0; | ||
328 | return mm; | ||
329 | } | ||
330 | free_mm(mm); | ||
331 | return NULL; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * Allocate and initialize an mm_struct. | ||
336 | */ | ||
337 | struct mm_struct * mm_alloc(void) | ||
338 | { | ||
339 | struct mm_struct * mm; | ||
340 | |||
341 | mm = allocate_mm(); | ||
342 | if (mm) { | ||
343 | memset(mm, 0, sizeof(*mm)); | ||
344 | mm = mm_init(mm); | ||
345 | } | ||
346 | return mm; | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * Called when the last reference to the mm | ||
351 | * is dropped: either by a lazy thread or by | ||
352 | * mmput. Free the page directory and the mm. | ||
353 | */ | ||
354 | void fastcall __mmdrop(struct mm_struct *mm) | ||
355 | { | ||
356 | BUG_ON(mm == &init_mm); | ||
357 | mm_free_pgd(mm); | ||
358 | destroy_context(mm); | ||
359 | free_mm(mm); | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * Decrement the use count and release all resources for an mm. | ||
364 | */ | ||
365 | void mmput(struct mm_struct *mm) | ||
366 | { | ||
367 | if (atomic_dec_and_test(&mm->mm_users)) { | ||
368 | exit_aio(mm); | ||
369 | exit_mmap(mm); | ||
370 | if (!list_empty(&mm->mmlist)) { | ||
371 | spin_lock(&mmlist_lock); | ||
372 | list_del(&mm->mmlist); | ||
373 | spin_unlock(&mmlist_lock); | ||
374 | } | ||
375 | put_swap_token(mm); | ||
376 | mmdrop(mm); | ||
377 | } | ||
378 | } | ||
379 | EXPORT_SYMBOL_GPL(mmput); | ||
380 | |||
381 | /** | ||
382 | * get_task_mm - acquire a reference to the task's mm | ||
383 | * | ||
384 | * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning | ||
385 | * this kernel workthread has transiently adopted a user mm with use_mm, | ||
386 | * to do its AIO) is not set and if so returns a reference to it, after | ||
387 | * bumping up the use count. User must release the mm via mmput() | ||
388 | * after use. Typically used by /proc and ptrace. | ||
389 | */ | ||
390 | struct mm_struct *get_task_mm(struct task_struct *task) | ||
391 | { | ||
392 | struct mm_struct *mm; | ||
393 | |||
394 | task_lock(task); | ||
395 | mm = task->mm; | ||
396 | if (mm) { | ||
397 | if (task->flags & PF_BORROWED_MM) | ||
398 | mm = NULL; | ||
399 | else | ||
400 | atomic_inc(&mm->mm_users); | ||
401 | } | ||
402 | task_unlock(task); | ||
403 | return mm; | ||
404 | } | ||
405 | EXPORT_SYMBOL_GPL(get_task_mm); | ||
406 | |||
407 | /* Please note the differences between mmput and mm_release. | ||
408 | * mmput is called whenever we stop holding onto a mm_struct, | ||
409 | * error success whatever. | ||
410 | * | ||
411 | * mm_release is called after a mm_struct has been removed | ||
412 | * from the current process. | ||
413 | * | ||
414 | * This difference is important for error handling, when we | ||
415 | * only half set up a mm_struct for a new process and need to restore | ||
416 | * the old one. Because we mmput the new mm_struct before | ||
417 | * restoring the old one. . . | ||
418 | * Eric Biederman 10 January 1998 | ||
419 | */ | ||
420 | void mm_release(struct task_struct *tsk, struct mm_struct *mm) | ||
421 | { | ||
422 | struct completion *vfork_done = tsk->vfork_done; | ||
423 | |||
424 | /* Get rid of any cached register state */ | ||
425 | deactivate_mm(tsk, mm); | ||
426 | |||
427 | /* notify parent sleeping on vfork() */ | ||
428 | if (vfork_done) { | ||
429 | tsk->vfork_done = NULL; | ||
430 | complete(vfork_done); | ||
431 | } | ||
432 | if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) { | ||
433 | u32 __user * tidptr = tsk->clear_child_tid; | ||
434 | tsk->clear_child_tid = NULL; | ||
435 | |||
436 | /* | ||
437 | * We don't check the error code - if userspace has | ||
438 | * not set up a proper pointer then tough luck. | ||
439 | */ | ||
440 | put_user(0, tidptr); | ||
441 | sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); | ||
442 | } | ||
443 | } | ||
444 | |||
445 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | ||
446 | { | ||
447 | struct mm_struct * mm, *oldmm; | ||
448 | int retval; | ||
449 | |||
450 | tsk->min_flt = tsk->maj_flt = 0; | ||
451 | tsk->nvcsw = tsk->nivcsw = 0; | ||
452 | |||
453 | tsk->mm = NULL; | ||
454 | tsk->active_mm = NULL; | ||
455 | |||
456 | /* | ||
457 | * Are we cloning a kernel thread? | ||
458 | * | ||
459 | * We need to steal a active VM for that.. | ||
460 | */ | ||
461 | oldmm = current->mm; | ||
462 | if (!oldmm) | ||
463 | return 0; | ||
464 | |||
465 | if (clone_flags & CLONE_VM) { | ||
466 | atomic_inc(&oldmm->mm_users); | ||
467 | mm = oldmm; | ||
468 | /* | ||
469 | * There are cases where the PTL is held to ensure no | ||
470 | * new threads start up in user mode using an mm, which | ||
471 | * allows optimizing out ipis; the tlb_gather_mmu code | ||
472 | * is an example. | ||
473 | */ | ||
474 | spin_unlock_wait(&oldmm->page_table_lock); | ||
475 | goto good_mm; | ||
476 | } | ||
477 | |||
478 | retval = -ENOMEM; | ||
479 | mm = allocate_mm(); | ||
480 | if (!mm) | ||
481 | goto fail_nomem; | ||
482 | |||
483 | /* Copy the current MM stuff.. */ | ||
484 | memcpy(mm, oldmm, sizeof(*mm)); | ||
485 | if (!mm_init(mm)) | ||
486 | goto fail_nomem; | ||
487 | |||
488 | if (init_new_context(tsk,mm)) | ||
489 | goto fail_nocontext; | ||
490 | |||
491 | retval = dup_mmap(mm, oldmm); | ||
492 | if (retval) | ||
493 | goto free_pt; | ||
494 | |||
495 | mm->hiwater_rss = get_mm_counter(mm,rss); | ||
496 | mm->hiwater_vm = mm->total_vm; | ||
497 | |||
498 | good_mm: | ||
499 | tsk->mm = mm; | ||
500 | tsk->active_mm = mm; | ||
501 | return 0; | ||
502 | |||
503 | free_pt: | ||
504 | mmput(mm); | ||
505 | fail_nomem: | ||
506 | return retval; | ||
507 | |||
508 | fail_nocontext: | ||
509 | /* | ||
510 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
511 | * because it calls destroy_context() | ||
512 | */ | ||
513 | mm_free_pgd(mm); | ||
514 | free_mm(mm); | ||
515 | return retval; | ||
516 | } | ||
517 | |||
518 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) | ||
519 | { | ||
520 | struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); | ||
521 | /* We don't need to lock fs - think why ;-) */ | ||
522 | if (fs) { | ||
523 | atomic_set(&fs->count, 1); | ||
524 | rwlock_init(&fs->lock); | ||
525 | fs->umask = old->umask; | ||
526 | read_lock(&old->lock); | ||
527 | fs->rootmnt = mntget(old->rootmnt); | ||
528 | fs->root = dget(old->root); | ||
529 | fs->pwdmnt = mntget(old->pwdmnt); | ||
530 | fs->pwd = dget(old->pwd); | ||
531 | if (old->altroot) { | ||
532 | fs->altrootmnt = mntget(old->altrootmnt); | ||
533 | fs->altroot = dget(old->altroot); | ||
534 | } else { | ||
535 | fs->altrootmnt = NULL; | ||
536 | fs->altroot = NULL; | ||
537 | } | ||
538 | read_unlock(&old->lock); | ||
539 | } | ||
540 | return fs; | ||
541 | } | ||
542 | |||
543 | struct fs_struct *copy_fs_struct(struct fs_struct *old) | ||
544 | { | ||
545 | return __copy_fs_struct(old); | ||
546 | } | ||
547 | |||
548 | EXPORT_SYMBOL_GPL(copy_fs_struct); | ||
549 | |||
550 | static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) | ||
551 | { | ||
552 | if (clone_flags & CLONE_FS) { | ||
553 | atomic_inc(¤t->fs->count); | ||
554 | return 0; | ||
555 | } | ||
556 | tsk->fs = __copy_fs_struct(current->fs); | ||
557 | if (!tsk->fs) | ||
558 | return -ENOMEM; | ||
559 | return 0; | ||
560 | } | ||
561 | |||
562 | static int count_open_files(struct files_struct *files, int size) | ||
563 | { | ||
564 | int i; | ||
565 | |||
566 | /* Find the last open fd */ | ||
567 | for (i = size/(8*sizeof(long)); i > 0; ) { | ||
568 | if (files->open_fds->fds_bits[--i]) | ||
569 | break; | ||
570 | } | ||
571 | i = (i+1) * 8 * sizeof(long); | ||
572 | return i; | ||
573 | } | ||
574 | |||
575 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | ||
576 | { | ||
577 | struct files_struct *oldf, *newf; | ||
578 | struct file **old_fds, **new_fds; | ||
579 | int open_files, size, i, error = 0, expand; | ||
580 | |||
581 | /* | ||
582 | * A background process may not have any files ... | ||
583 | */ | ||
584 | oldf = current->files; | ||
585 | if (!oldf) | ||
586 | goto out; | ||
587 | |||
588 | if (clone_flags & CLONE_FILES) { | ||
589 | atomic_inc(&oldf->count); | ||
590 | goto out; | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * Note: we may be using current for both targets (See exec.c) | ||
595 | * This works because we cache current->files (old) as oldf. Don't | ||
596 | * break this. | ||
597 | */ | ||
598 | tsk->files = NULL; | ||
599 | error = -ENOMEM; | ||
600 | newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); | ||
601 | if (!newf) | ||
602 | goto out; | ||
603 | |||
604 | atomic_set(&newf->count, 1); | ||
605 | |||
606 | spin_lock_init(&newf->file_lock); | ||
607 | newf->next_fd = 0; | ||
608 | newf->max_fds = NR_OPEN_DEFAULT; | ||
609 | newf->max_fdset = __FD_SETSIZE; | ||
610 | newf->close_on_exec = &newf->close_on_exec_init; | ||
611 | newf->open_fds = &newf->open_fds_init; | ||
612 | newf->fd = &newf->fd_array[0]; | ||
613 | |||
614 | spin_lock(&oldf->file_lock); | ||
615 | |||
616 | open_files = count_open_files(oldf, oldf->max_fdset); | ||
617 | expand = 0; | ||
618 | |||
619 | /* | ||
620 | * Check whether we need to allocate a larger fd array or fd set. | ||
621 | * Note: we're not a clone task, so the open count won't change. | ||
622 | */ | ||
623 | if (open_files > newf->max_fdset) { | ||
624 | newf->max_fdset = 0; | ||
625 | expand = 1; | ||
626 | } | ||
627 | if (open_files > newf->max_fds) { | ||
628 | newf->max_fds = 0; | ||
629 | expand = 1; | ||
630 | } | ||
631 | |||
632 | /* if the old fdset gets grown now, we'll only copy up to "size" fds */ | ||
633 | if (expand) { | ||
634 | spin_unlock(&oldf->file_lock); | ||
635 | spin_lock(&newf->file_lock); | ||
636 | error = expand_files(newf, open_files-1); | ||
637 | spin_unlock(&newf->file_lock); | ||
638 | if (error < 0) | ||
639 | goto out_release; | ||
640 | spin_lock(&oldf->file_lock); | ||
641 | } | ||
642 | |||
643 | old_fds = oldf->fd; | ||
644 | new_fds = newf->fd; | ||
645 | |||
646 | memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); | ||
647 | memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); | ||
648 | |||
649 | for (i = open_files; i != 0; i--) { | ||
650 | struct file *f = *old_fds++; | ||
651 | if (f) { | ||
652 | get_file(f); | ||
653 | } else { | ||
654 | /* | ||
655 | * The fd may be claimed in the fd bitmap but not yet | ||
656 | * instantiated in the files array if a sibling thread | ||
657 | * is partway through open(). So make sure that this | ||
658 | * fd is available to the new process. | ||
659 | */ | ||
660 | FD_CLR(open_files - i, newf->open_fds); | ||
661 | } | ||
662 | *new_fds++ = f; | ||
663 | } | ||
664 | spin_unlock(&oldf->file_lock); | ||
665 | |||
666 | /* compute the remainder to be cleared */ | ||
667 | size = (newf->max_fds - open_files) * sizeof(struct file *); | ||
668 | |||
669 | /* This is long word aligned thus could use a optimized version */ | ||
670 | memset(new_fds, 0, size); | ||
671 | |||
672 | if (newf->max_fdset > open_files) { | ||
673 | int left = (newf->max_fdset-open_files)/8; | ||
674 | int start = open_files / (8 * sizeof(unsigned long)); | ||
675 | |||
676 | memset(&newf->open_fds->fds_bits[start], 0, left); | ||
677 | memset(&newf->close_on_exec->fds_bits[start], 0, left); | ||
678 | } | ||
679 | |||
680 | tsk->files = newf; | ||
681 | error = 0; | ||
682 | out: | ||
683 | return error; | ||
684 | |||
685 | out_release: | ||
686 | free_fdset (newf->close_on_exec, newf->max_fdset); | ||
687 | free_fdset (newf->open_fds, newf->max_fdset); | ||
688 | free_fd_array(newf->fd, newf->max_fds); | ||
689 | kmem_cache_free(files_cachep, newf); | ||
690 | goto out; | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * Helper to unshare the files of the current task. | ||
695 | * We don't want to expose copy_files internals to | ||
696 | * the exec layer of the kernel. | ||
697 | */ | ||
698 | |||
699 | int unshare_files(void) | ||
700 | { | ||
701 | struct files_struct *files = current->files; | ||
702 | int rc; | ||
703 | |||
704 | if(!files) | ||
705 | BUG(); | ||
706 | |||
707 | /* This can race but the race causes us to copy when we don't | ||
708 | need to and drop the copy */ | ||
709 | if(atomic_read(&files->count) == 1) | ||
710 | { | ||
711 | atomic_inc(&files->count); | ||
712 | return 0; | ||
713 | } | ||
714 | rc = copy_files(0, current); | ||
715 | if(rc) | ||
716 | current->files = files; | ||
717 | return rc; | ||
718 | } | ||
719 | |||
720 | EXPORT_SYMBOL(unshare_files); | ||
721 | |||
722 | static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) | ||
723 | { | ||
724 | struct sighand_struct *sig; | ||
725 | |||
726 | if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { | ||
727 | atomic_inc(¤t->sighand->count); | ||
728 | return 0; | ||
729 | } | ||
730 | sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); | ||
731 | tsk->sighand = sig; | ||
732 | if (!sig) | ||
733 | return -ENOMEM; | ||
734 | spin_lock_init(&sig->siglock); | ||
735 | atomic_set(&sig->count, 1); | ||
736 | memcpy(sig->action, current->sighand->action, sizeof(sig->action)); | ||
737 | return 0; | ||
738 | } | ||
739 | |||
740 | static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) | ||
741 | { | ||
742 | struct signal_struct *sig; | ||
743 | int ret; | ||
744 | |||
745 | if (clone_flags & CLONE_THREAD) { | ||
746 | atomic_inc(¤t->signal->count); | ||
747 | atomic_inc(¤t->signal->live); | ||
748 | return 0; | ||
749 | } | ||
750 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
751 | tsk->signal = sig; | ||
752 | if (!sig) | ||
753 | return -ENOMEM; | ||
754 | |||
755 | ret = copy_thread_group_keys(tsk); | ||
756 | if (ret < 0) { | ||
757 | kmem_cache_free(signal_cachep, sig); | ||
758 | return ret; | ||
759 | } | ||
760 | |||
761 | atomic_set(&sig->count, 1); | ||
762 | atomic_set(&sig->live, 1); | ||
763 | init_waitqueue_head(&sig->wait_chldexit); | ||
764 | sig->flags = 0; | ||
765 | sig->group_exit_code = 0; | ||
766 | sig->group_exit_task = NULL; | ||
767 | sig->group_stop_count = 0; | ||
768 | sig->curr_target = NULL; | ||
769 | init_sigpending(&sig->shared_pending); | ||
770 | INIT_LIST_HEAD(&sig->posix_timers); | ||
771 | |||
772 | sig->it_real_value = sig->it_real_incr = 0; | ||
773 | sig->real_timer.function = it_real_fn; | ||
774 | sig->real_timer.data = (unsigned long) tsk; | ||
775 | init_timer(&sig->real_timer); | ||
776 | |||
777 | sig->it_virt_expires = cputime_zero; | ||
778 | sig->it_virt_incr = cputime_zero; | ||
779 | sig->it_prof_expires = cputime_zero; | ||
780 | sig->it_prof_incr = cputime_zero; | ||
781 | |||
782 | sig->tty = current->signal->tty; | ||
783 | sig->pgrp = process_group(current); | ||
784 | sig->session = current->signal->session; | ||
785 | sig->leader = 0; /* session leadership doesn't inherit */ | ||
786 | sig->tty_old_pgrp = 0; | ||
787 | |||
788 | sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; | ||
789 | sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; | ||
790 | sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; | ||
791 | sig->sched_time = 0; | ||
792 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | ||
793 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | ||
794 | INIT_LIST_HEAD(&sig->cpu_timers[2]); | ||
795 | |||
796 | task_lock(current->group_leader); | ||
797 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | ||
798 | task_unlock(current->group_leader); | ||
799 | |||
800 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
801 | /* | ||
802 | * New sole thread in the process gets an expiry time | ||
803 | * of the whole CPU time limit. | ||
804 | */ | ||
805 | tsk->it_prof_expires = | ||
806 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
807 | } | ||
808 | |||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) | ||
813 | { | ||
814 | unsigned long new_flags = p->flags; | ||
815 | |||
816 | new_flags &= ~PF_SUPERPRIV; | ||
817 | new_flags |= PF_FORKNOEXEC; | ||
818 | if (!(clone_flags & CLONE_PTRACE)) | ||
819 | p->ptrace = 0; | ||
820 | p->flags = new_flags; | ||
821 | } | ||
822 | |||
823 | asmlinkage long sys_set_tid_address(int __user *tidptr) | ||
824 | { | ||
825 | current->clear_child_tid = tidptr; | ||
826 | |||
827 | return current->pid; | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * This creates a new process as a copy of the old one, | ||
832 | * but does not actually start it yet. | ||
833 | * | ||
834 | * It copies the registers, and all the appropriate | ||
835 | * parts of the process environment (as per the clone | ||
836 | * flags). The actual kick-off is left to the caller. | ||
837 | */ | ||
838 | static task_t *copy_process(unsigned long clone_flags, | ||
839 | unsigned long stack_start, | ||
840 | struct pt_regs *regs, | ||
841 | unsigned long stack_size, | ||
842 | int __user *parent_tidptr, | ||
843 | int __user *child_tidptr, | ||
844 | int pid) | ||
845 | { | ||
846 | int retval; | ||
847 | struct task_struct *p = NULL; | ||
848 | |||
849 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) | ||
850 | return ERR_PTR(-EINVAL); | ||
851 | |||
852 | /* | ||
853 | * Thread groups must share signals as well, and detached threads | ||
854 | * can only be started up within the thread group. | ||
855 | */ | ||
856 | if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) | ||
857 | return ERR_PTR(-EINVAL); | ||
858 | |||
859 | /* | ||
860 | * Shared signal handlers imply shared VM. By way of the above, | ||
861 | * thread groups also imply shared VM. Blocking this case allows | ||
862 | * for various simplifications in other code. | ||
863 | */ | ||
864 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) | ||
865 | return ERR_PTR(-EINVAL); | ||
866 | |||
867 | retval = security_task_create(clone_flags); | ||
868 | if (retval) | ||
869 | goto fork_out; | ||
870 | |||
871 | retval = -ENOMEM; | ||
872 | p = dup_task_struct(current); | ||
873 | if (!p) | ||
874 | goto fork_out; | ||
875 | |||
876 | retval = -EAGAIN; | ||
877 | if (atomic_read(&p->user->processes) >= | ||
878 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | ||
879 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | ||
880 | p->user != &root_user) | ||
881 | goto bad_fork_free; | ||
882 | } | ||
883 | |||
884 | atomic_inc(&p->user->__count); | ||
885 | atomic_inc(&p->user->processes); | ||
886 | get_group_info(p->group_info); | ||
887 | |||
888 | /* | ||
889 | * If multiple threads are within copy_process(), then this check | ||
890 | * triggers too late. This doesn't hurt, the check is only there | ||
891 | * to stop root fork bombs. | ||
892 | */ | ||
893 | if (nr_threads >= max_threads) | ||
894 | goto bad_fork_cleanup_count; | ||
895 | |||
896 | if (!try_module_get(p->thread_info->exec_domain->module)) | ||
897 | goto bad_fork_cleanup_count; | ||
898 | |||
899 | if (p->binfmt && !try_module_get(p->binfmt->module)) | ||
900 | goto bad_fork_cleanup_put_domain; | ||
901 | |||
902 | p->did_exec = 0; | ||
903 | copy_flags(clone_flags, p); | ||
904 | p->pid = pid; | ||
905 | retval = -EFAULT; | ||
906 | if (clone_flags & CLONE_PARENT_SETTID) | ||
907 | if (put_user(p->pid, parent_tidptr)) | ||
908 | goto bad_fork_cleanup; | ||
909 | |||
910 | p->proc_dentry = NULL; | ||
911 | |||
912 | INIT_LIST_HEAD(&p->children); | ||
913 | INIT_LIST_HEAD(&p->sibling); | ||
914 | p->vfork_done = NULL; | ||
915 | spin_lock_init(&p->alloc_lock); | ||
916 | spin_lock_init(&p->proc_lock); | ||
917 | |||
918 | clear_tsk_thread_flag(p, TIF_SIGPENDING); | ||
919 | init_sigpending(&p->pending); | ||
920 | |||
921 | p->utime = cputime_zero; | ||
922 | p->stime = cputime_zero; | ||
923 | p->sched_time = 0; | ||
924 | p->rchar = 0; /* I/O counter: bytes read */ | ||
925 | p->wchar = 0; /* I/O counter: bytes written */ | ||
926 | p->syscr = 0; /* I/O counter: read syscalls */ | ||
927 | p->syscw = 0; /* I/O counter: write syscalls */ | ||
928 | acct_clear_integrals(p); | ||
929 | |||
930 | p->it_virt_expires = cputime_zero; | ||
931 | p->it_prof_expires = cputime_zero; | ||
932 | p->it_sched_expires = 0; | ||
933 | INIT_LIST_HEAD(&p->cpu_timers[0]); | ||
934 | INIT_LIST_HEAD(&p->cpu_timers[1]); | ||
935 | INIT_LIST_HEAD(&p->cpu_timers[2]); | ||
936 | |||
937 | p->lock_depth = -1; /* -1 = no lock */ | ||
938 | do_posix_clock_monotonic_gettime(&p->start_time); | ||
939 | p->security = NULL; | ||
940 | p->io_context = NULL; | ||
941 | p->io_wait = NULL; | ||
942 | p->audit_context = NULL; | ||
943 | #ifdef CONFIG_NUMA | ||
944 | p->mempolicy = mpol_copy(p->mempolicy); | ||
945 | if (IS_ERR(p->mempolicy)) { | ||
946 | retval = PTR_ERR(p->mempolicy); | ||
947 | p->mempolicy = NULL; | ||
948 | goto bad_fork_cleanup; | ||
949 | } | ||
950 | #endif | ||
951 | |||
952 | p->tgid = p->pid; | ||
953 | if (clone_flags & CLONE_THREAD) | ||
954 | p->tgid = current->tgid; | ||
955 | |||
956 | if ((retval = security_task_alloc(p))) | ||
957 | goto bad_fork_cleanup_policy; | ||
958 | if ((retval = audit_alloc(p))) | ||
959 | goto bad_fork_cleanup_security; | ||
960 | /* copy all the process information */ | ||
961 | if ((retval = copy_semundo(clone_flags, p))) | ||
962 | goto bad_fork_cleanup_audit; | ||
963 | if ((retval = copy_files(clone_flags, p))) | ||
964 | goto bad_fork_cleanup_semundo; | ||
965 | if ((retval = copy_fs(clone_flags, p))) | ||
966 | goto bad_fork_cleanup_files; | ||
967 | if ((retval = copy_sighand(clone_flags, p))) | ||
968 | goto bad_fork_cleanup_fs; | ||
969 | if ((retval = copy_signal(clone_flags, p))) | ||
970 | goto bad_fork_cleanup_sighand; | ||
971 | if ((retval = copy_mm(clone_flags, p))) | ||
972 | goto bad_fork_cleanup_signal; | ||
973 | if ((retval = copy_keys(clone_flags, p))) | ||
974 | goto bad_fork_cleanup_mm; | ||
975 | if ((retval = copy_namespace(clone_flags, p))) | ||
976 | goto bad_fork_cleanup_keys; | ||
977 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | ||
978 | if (retval) | ||
979 | goto bad_fork_cleanup_namespace; | ||
980 | |||
981 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | ||
982 | /* | ||
983 | * Clear TID on mm_release()? | ||
984 | */ | ||
985 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | ||
986 | |||
987 | /* | ||
988 | * Syscall tracing should be turned off in the child regardless | ||
989 | * of CLONE_PTRACE. | ||
990 | */ | ||
991 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); | ||
992 | |||
993 | /* Our parent execution domain becomes current domain | ||
994 | These must match for thread signalling to apply */ | ||
995 | |||
996 | p->parent_exec_id = p->self_exec_id; | ||
997 | |||
998 | /* ok, now we should be set up.. */ | ||
999 | p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); | ||
1000 | p->pdeath_signal = 0; | ||
1001 | p->exit_state = 0; | ||
1002 | |||
1003 | /* Perform scheduler related setup */ | ||
1004 | sched_fork(p); | ||
1005 | |||
1006 | /* | ||
1007 | * Ok, make it visible to the rest of the system. | ||
1008 | * We dont wake it up yet. | ||
1009 | */ | ||
1010 | p->group_leader = p; | ||
1011 | INIT_LIST_HEAD(&p->ptrace_children); | ||
1012 | INIT_LIST_HEAD(&p->ptrace_list); | ||
1013 | |||
1014 | /* Need tasklist lock for parent etc handling! */ | ||
1015 | write_lock_irq(&tasklist_lock); | ||
1016 | |||
1017 | /* | ||
1018 | * The task hasn't been attached yet, so cpus_allowed mask cannot | ||
1019 | * have changed. The cpus_allowed mask of the parent may have | ||
1020 | * changed after it was copied first time, and it may then move to | ||
1021 | * another CPU - so we re-copy it here and set the child's CPU to | ||
1022 | * the parent's CPU. This avoids alot of nasty races. | ||
1023 | */ | ||
1024 | p->cpus_allowed = current->cpus_allowed; | ||
1025 | set_task_cpu(p, smp_processor_id()); | ||
1026 | |||
1027 | /* | ||
1028 | * Check for pending SIGKILL! The new thread should not be allowed | ||
1029 | * to slip out of an OOM kill. (or normal SIGKILL.) | ||
1030 | */ | ||
1031 | if (sigismember(¤t->pending.signal, SIGKILL)) { | ||
1032 | write_unlock_irq(&tasklist_lock); | ||
1033 | retval = -EINTR; | ||
1034 | goto bad_fork_cleanup_namespace; | ||
1035 | } | ||
1036 | |||
1037 | /* CLONE_PARENT re-uses the old parent */ | ||
1038 | if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) | ||
1039 | p->real_parent = current->real_parent; | ||
1040 | else | ||
1041 | p->real_parent = current; | ||
1042 | p->parent = p->real_parent; | ||
1043 | |||
1044 | if (clone_flags & CLONE_THREAD) { | ||
1045 | spin_lock(¤t->sighand->siglock); | ||
1046 | /* | ||
1047 | * Important: if an exit-all has been started then | ||
1048 | * do not create this new thread - the whole thread | ||
1049 | * group is supposed to exit anyway. | ||
1050 | */ | ||
1051 | if (current->signal->flags & SIGNAL_GROUP_EXIT) { | ||
1052 | spin_unlock(¤t->sighand->siglock); | ||
1053 | write_unlock_irq(&tasklist_lock); | ||
1054 | retval = -EAGAIN; | ||
1055 | goto bad_fork_cleanup_namespace; | ||
1056 | } | ||
1057 | p->group_leader = current->group_leader; | ||
1058 | |||
1059 | if (current->signal->group_stop_count > 0) { | ||
1060 | /* | ||
1061 | * There is an all-stop in progress for the group. | ||
1062 | * We ourselves will stop as soon as we check signals. | ||
1063 | * Make the new thread part of that group stop too. | ||
1064 | */ | ||
1065 | current->signal->group_stop_count++; | ||
1066 | set_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1067 | } | ||
1068 | |||
1069 | if (!cputime_eq(current->signal->it_virt_expires, | ||
1070 | cputime_zero) || | ||
1071 | !cputime_eq(current->signal->it_prof_expires, | ||
1072 | cputime_zero) || | ||
1073 | current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || | ||
1074 | !list_empty(¤t->signal->cpu_timers[0]) || | ||
1075 | !list_empty(¤t->signal->cpu_timers[1]) || | ||
1076 | !list_empty(¤t->signal->cpu_timers[2])) { | ||
1077 | /* | ||
1078 | * Have child wake up on its first tick to check | ||
1079 | * for process CPU timers. | ||
1080 | */ | ||
1081 | p->it_prof_expires = jiffies_to_cputime(1); | ||
1082 | } | ||
1083 | |||
1084 | spin_unlock(¤t->sighand->siglock); | ||
1085 | } | ||
1086 | |||
1087 | SET_LINKS(p); | ||
1088 | if (unlikely(p->ptrace & PT_PTRACED)) | ||
1089 | __ptrace_link(p, current->parent); | ||
1090 | |||
1091 | cpuset_fork(p); | ||
1092 | |||
1093 | attach_pid(p, PIDTYPE_PID, p->pid); | ||
1094 | attach_pid(p, PIDTYPE_TGID, p->tgid); | ||
1095 | if (thread_group_leader(p)) { | ||
1096 | attach_pid(p, PIDTYPE_PGID, process_group(p)); | ||
1097 | attach_pid(p, PIDTYPE_SID, p->signal->session); | ||
1098 | if (p->pid) | ||
1099 | __get_cpu_var(process_counts)++; | ||
1100 | } | ||
1101 | |||
1102 | nr_threads++; | ||
1103 | total_forks++; | ||
1104 | write_unlock_irq(&tasklist_lock); | ||
1105 | retval = 0; | ||
1106 | |||
1107 | fork_out: | ||
1108 | if (retval) | ||
1109 | return ERR_PTR(retval); | ||
1110 | return p; | ||
1111 | |||
1112 | bad_fork_cleanup_namespace: | ||
1113 | exit_namespace(p); | ||
1114 | bad_fork_cleanup_keys: | ||
1115 | exit_keys(p); | ||
1116 | bad_fork_cleanup_mm: | ||
1117 | if (p->mm) | ||
1118 | mmput(p->mm); | ||
1119 | bad_fork_cleanup_signal: | ||
1120 | exit_signal(p); | ||
1121 | bad_fork_cleanup_sighand: | ||
1122 | exit_sighand(p); | ||
1123 | bad_fork_cleanup_fs: | ||
1124 | exit_fs(p); /* blocking */ | ||
1125 | bad_fork_cleanup_files: | ||
1126 | exit_files(p); /* blocking */ | ||
1127 | bad_fork_cleanup_semundo: | ||
1128 | exit_sem(p); | ||
1129 | bad_fork_cleanup_audit: | ||
1130 | audit_free(p); | ||
1131 | bad_fork_cleanup_security: | ||
1132 | security_task_free(p); | ||
1133 | bad_fork_cleanup_policy: | ||
1134 | #ifdef CONFIG_NUMA | ||
1135 | mpol_free(p->mempolicy); | ||
1136 | #endif | ||
1137 | bad_fork_cleanup: | ||
1138 | if (p->binfmt) | ||
1139 | module_put(p->binfmt->module); | ||
1140 | bad_fork_cleanup_put_domain: | ||
1141 | module_put(p->thread_info->exec_domain->module); | ||
1142 | bad_fork_cleanup_count: | ||
1143 | put_group_info(p->group_info); | ||
1144 | atomic_dec(&p->user->processes); | ||
1145 | free_uid(p->user); | ||
1146 | bad_fork_free: | ||
1147 | free_task(p); | ||
1148 | goto fork_out; | ||
1149 | } | ||
1150 | |||
1151 | struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) | ||
1152 | { | ||
1153 | memset(regs, 0, sizeof(struct pt_regs)); | ||
1154 | return regs; | ||
1155 | } | ||
1156 | |||
1157 | task_t * __devinit fork_idle(int cpu) | ||
1158 | { | ||
1159 | task_t *task; | ||
1160 | struct pt_regs regs; | ||
1161 | |||
1162 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); | ||
1163 | if (!task) | ||
1164 | return ERR_PTR(-ENOMEM); | ||
1165 | init_idle(task, cpu); | ||
1166 | unhash_process(task); | ||
1167 | return task; | ||
1168 | } | ||
1169 | |||
1170 | static inline int fork_traceflag (unsigned clone_flags) | ||
1171 | { | ||
1172 | if (clone_flags & CLONE_UNTRACED) | ||
1173 | return 0; | ||
1174 | else if (clone_flags & CLONE_VFORK) { | ||
1175 | if (current->ptrace & PT_TRACE_VFORK) | ||
1176 | return PTRACE_EVENT_VFORK; | ||
1177 | } else if ((clone_flags & CSIGNAL) != SIGCHLD) { | ||
1178 | if (current->ptrace & PT_TRACE_CLONE) | ||
1179 | return PTRACE_EVENT_CLONE; | ||
1180 | } else if (current->ptrace & PT_TRACE_FORK) | ||
1181 | return PTRACE_EVENT_FORK; | ||
1182 | |||
1183 | return 0; | ||
1184 | } | ||
1185 | |||
1186 | /* | ||
1187 | * Ok, this is the main fork-routine. | ||
1188 | * | ||
1189 | * It copies the process, and if successful kick-starts | ||
1190 | * it and waits for it to finish using the VM if required. | ||
1191 | */ | ||
1192 | long do_fork(unsigned long clone_flags, | ||
1193 | unsigned long stack_start, | ||
1194 | struct pt_regs *regs, | ||
1195 | unsigned long stack_size, | ||
1196 | int __user *parent_tidptr, | ||
1197 | int __user *child_tidptr) | ||
1198 | { | ||
1199 | struct task_struct *p; | ||
1200 | int trace = 0; | ||
1201 | long pid = alloc_pidmap(); | ||
1202 | |||
1203 | if (pid < 0) | ||
1204 | return -EAGAIN; | ||
1205 | if (unlikely(current->ptrace)) { | ||
1206 | trace = fork_traceflag (clone_flags); | ||
1207 | if (trace) | ||
1208 | clone_flags |= CLONE_PTRACE; | ||
1209 | } | ||
1210 | |||
1211 | p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); | ||
1212 | /* | ||
1213 | * Do this prior waking up the new thread - the thread pointer | ||
1214 | * might get invalid after that point, if the thread exits quickly. | ||
1215 | */ | ||
1216 | if (!IS_ERR(p)) { | ||
1217 | struct completion vfork; | ||
1218 | |||
1219 | if (clone_flags & CLONE_VFORK) { | ||
1220 | p->vfork_done = &vfork; | ||
1221 | init_completion(&vfork); | ||
1222 | } | ||
1223 | |||
1224 | if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { | ||
1225 | /* | ||
1226 | * We'll start up with an immediate SIGSTOP. | ||
1227 | */ | ||
1228 | sigaddset(&p->pending.signal, SIGSTOP); | ||
1229 | set_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1230 | } | ||
1231 | |||
1232 | if (!(clone_flags & CLONE_STOPPED)) | ||
1233 | wake_up_new_task(p, clone_flags); | ||
1234 | else | ||
1235 | p->state = TASK_STOPPED; | ||
1236 | |||
1237 | if (unlikely (trace)) { | ||
1238 | current->ptrace_message = pid; | ||
1239 | ptrace_notify ((trace << 8) | SIGTRAP); | ||
1240 | } | ||
1241 | |||
1242 | if (clone_flags & CLONE_VFORK) { | ||
1243 | wait_for_completion(&vfork); | ||
1244 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) | ||
1245 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | ||
1246 | } | ||
1247 | } else { | ||
1248 | free_pidmap(pid); | ||
1249 | pid = PTR_ERR(p); | ||
1250 | } | ||
1251 | return pid; | ||
1252 | } | ||
1253 | |||
1254 | void __init proc_caches_init(void) | ||
1255 | { | ||
1256 | sighand_cachep = kmem_cache_create("sighand_cache", | ||
1257 | sizeof(struct sighand_struct), 0, | ||
1258 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | ||
1259 | signal_cachep = kmem_cache_create("signal_cache", | ||
1260 | sizeof(struct signal_struct), 0, | ||
1261 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | ||
1262 | files_cachep = kmem_cache_create("files_cache", | ||
1263 | sizeof(struct files_struct), 0, | ||
1264 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | ||
1265 | fs_cachep = kmem_cache_create("fs_cache", | ||
1266 | sizeof(struct fs_struct), 0, | ||
1267 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | ||
1268 | vm_area_cachep = kmem_cache_create("vm_area_struct", | ||
1269 | sizeof(struct vm_area_struct), 0, | ||
1270 | SLAB_PANIC, NULL, NULL); | ||
1271 | mm_cachep = kmem_cache_create("mm_struct", | ||
1272 | sizeof(struct mm_struct), 0, | ||
1273 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | ||
1274 | } | ||