diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 141 |
1 files changed, 107 insertions, 34 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index f2c1e7352298..03c1eaaa6ef5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -74,6 +74,7 @@ | |||
74 | #include <linux/uprobes.h> | 74 | #include <linux/uprobes.h> |
75 | #include <linux/aio.h> | 75 | #include <linux/aio.h> |
76 | #include <linux/compiler.h> | 76 | #include <linux/compiler.h> |
77 | #include <linux/sysctl.h> | ||
77 | 78 | ||
78 | #include <asm/pgtable.h> | 79 | #include <asm/pgtable.h> |
79 | #include <asm/pgalloc.h> | 80 | #include <asm/pgalloc.h> |
@@ -88,6 +89,16 @@ | |||
88 | #include <trace/events/task.h> | 89 | #include <trace/events/task.h> |
89 | 90 | ||
90 | /* | 91 | /* |
92 | * Minimum number of threads to boot the kernel | ||
93 | */ | ||
94 | #define MIN_THREADS 20 | ||
95 | |||
96 | /* | ||
97 | * Maximum number of threads | ||
98 | */ | ||
99 | #define MAX_THREADS FUTEX_TID_MASK | ||
100 | |||
101 | /* | ||
91 | * Protected counters by write_lock_irq(&tasklist_lock) | 102 | * Protected counters by write_lock_irq(&tasklist_lock) |
92 | */ | 103 | */ |
93 | unsigned long total_forks; /* Handle normal Linux uptimes. */ | 104 | unsigned long total_forks; /* Handle normal Linux uptimes. */ |
@@ -253,7 +264,30 @@ EXPORT_SYMBOL_GPL(__put_task_struct); | |||
253 | 264 | ||
254 | void __init __weak arch_task_cache_init(void) { } | 265 | void __init __weak arch_task_cache_init(void) { } |
255 | 266 | ||
256 | void __init fork_init(unsigned long mempages) | 267 | /* |
268 | * set_max_threads | ||
269 | */ | ||
270 | static void set_max_threads(unsigned int max_threads_suggested) | ||
271 | { | ||
272 | u64 threads; | ||
273 | |||
274 | /* | ||
275 | * The number of threads shall be limited such that the thread | ||
276 | * structures may only consume a small part of the available memory. | ||
277 | */ | ||
278 | if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) | ||
279 | threads = MAX_THREADS; | ||
280 | else | ||
281 | threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, | ||
282 | (u64) THREAD_SIZE * 8UL); | ||
283 | |||
284 | if (threads > max_threads_suggested) | ||
285 | threads = max_threads_suggested; | ||
286 | |||
287 | max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); | ||
288 | } | ||
289 | |||
290 | void __init fork_init(void) | ||
257 | { | 291 | { |
258 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR | 292 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
259 | #ifndef ARCH_MIN_TASKALIGN | 293 | #ifndef ARCH_MIN_TASKALIGN |
@@ -268,18 +302,7 @@ void __init fork_init(unsigned long mempages) | |||
268 | /* do the arch specific task caches init */ | 302 | /* do the arch specific task caches init */ |
269 | arch_task_cache_init(); | 303 | arch_task_cache_init(); |
270 | 304 | ||
271 | /* | 305 | set_max_threads(MAX_THREADS); |
272 | * The default maximum number of threads is set to a safe | ||
273 | * value: the thread structures can take up at most half | ||
274 | * of memory. | ||
275 | */ | ||
276 | max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); | ||
277 | |||
278 | /* | ||
279 | * we need to allow at least 20 threads to boot a system | ||
280 | */ | ||
281 | if (max_threads < 20) | ||
282 | max_threads = 20; | ||
283 | 306 | ||
284 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; | 307 | init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; |
285 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; | 308 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; |
@@ -380,6 +403,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
380 | */ | 403 | */ |
381 | down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); | 404 | down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); |
382 | 405 | ||
406 | /* No ordering required: file already has been exposed. */ | ||
407 | RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); | ||
408 | |||
383 | mm->total_vm = oldmm->total_vm; | 409 | mm->total_vm = oldmm->total_vm; |
384 | mm->shared_vm = oldmm->shared_vm; | 410 | mm->shared_vm = oldmm->shared_vm; |
385 | mm->exec_vm = oldmm->exec_vm; | 411 | mm->exec_vm = oldmm->exec_vm; |
@@ -505,7 +531,13 @@ static inline void mm_free_pgd(struct mm_struct *mm) | |||
505 | pgd_free(mm, mm->pgd); | 531 | pgd_free(mm, mm->pgd); |
506 | } | 532 | } |
507 | #else | 533 | #else |
508 | #define dup_mmap(mm, oldmm) (0) | 534 | static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) |
535 | { | ||
536 | down_write(&oldmm->mmap_sem); | ||
537 | RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); | ||
538 | up_write(&oldmm->mmap_sem); | ||
539 | return 0; | ||
540 | } | ||
509 | #define mm_alloc_pgd(mm) (0) | 541 | #define mm_alloc_pgd(mm) (0) |
510 | #define mm_free_pgd(mm) | 542 | #define mm_free_pgd(mm) |
511 | #endif /* CONFIG_MMU */ | 543 | #endif /* CONFIG_MMU */ |
@@ -674,34 +706,53 @@ void mmput(struct mm_struct *mm) | |||
674 | } | 706 | } |
675 | EXPORT_SYMBOL_GPL(mmput); | 707 | EXPORT_SYMBOL_GPL(mmput); |
676 | 708 | ||
709 | /** | ||
710 | * set_mm_exe_file - change a reference to the mm's executable file | ||
711 | * | ||
712 | * This changes mm's executable file (shown as symlink /proc/[pid]/exe). | ||
713 | * | ||
714 | * Main users are mmput() and sys_execve(). Callers prevent concurrent | ||
715 | * invocations: in mmput() nobody alive left, in execve task is single | ||
716 | * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the | ||
717 | * mm->exe_file, but does so without using set_mm_exe_file() in order | ||
718 | * to do avoid the need for any locks. | ||
719 | */ | ||
677 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | 720 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) |
678 | { | 721 | { |
722 | struct file *old_exe_file; | ||
723 | |||
724 | /* | ||
725 | * It is safe to dereference the exe_file without RCU as | ||
726 | * this function is only called if nobody else can access | ||
727 | * this mm -- see comment above for justification. | ||
728 | */ | ||
729 | old_exe_file = rcu_dereference_raw(mm->exe_file); | ||
730 | |||
679 | if (new_exe_file) | 731 | if (new_exe_file) |
680 | get_file(new_exe_file); | 732 | get_file(new_exe_file); |
681 | if (mm->exe_file) | 733 | rcu_assign_pointer(mm->exe_file, new_exe_file); |
682 | fput(mm->exe_file); | 734 | if (old_exe_file) |
683 | mm->exe_file = new_exe_file; | 735 | fput(old_exe_file); |
684 | } | 736 | } |
685 | 737 | ||
738 | /** | ||
739 | * get_mm_exe_file - acquire a reference to the mm's executable file | ||
740 | * | ||
741 | * Returns %NULL if mm has no associated executable file. | ||
742 | * User must release file via fput(). | ||
743 | */ | ||
686 | struct file *get_mm_exe_file(struct mm_struct *mm) | 744 | struct file *get_mm_exe_file(struct mm_struct *mm) |
687 | { | 745 | { |
688 | struct file *exe_file; | 746 | struct file *exe_file; |
689 | 747 | ||
690 | /* We need mmap_sem to protect against races with removal of exe_file */ | 748 | rcu_read_lock(); |
691 | down_read(&mm->mmap_sem); | 749 | exe_file = rcu_dereference(mm->exe_file); |
692 | exe_file = mm->exe_file; | 750 | if (exe_file && !get_file_rcu(exe_file)) |
693 | if (exe_file) | 751 | exe_file = NULL; |
694 | get_file(exe_file); | 752 | rcu_read_unlock(); |
695 | up_read(&mm->mmap_sem); | ||
696 | return exe_file; | 753 | return exe_file; |
697 | } | 754 | } |
698 | 755 | EXPORT_SYMBOL(get_mm_exe_file); | |
699 | static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) | ||
700 | { | ||
701 | /* It's safe to write the exe_file pointer without exe_file_lock because | ||
702 | * this is called during fork when the task is not yet in /proc */ | ||
703 | newmm->exe_file = get_mm_exe_file(oldmm); | ||
704 | } | ||
705 | 756 | ||
706 | /** | 757 | /** |
707 | * get_task_mm - acquire a reference to the task's mm | 758 | * get_task_mm - acquire a reference to the task's mm |
@@ -864,8 +915,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) | |||
864 | if (!mm_init(mm, tsk)) | 915 | if (!mm_init(mm, tsk)) |
865 | goto fail_nomem; | 916 | goto fail_nomem; |
866 | 917 | ||
867 | dup_mm_exe_file(oldmm, mm); | ||
868 | |||
869 | err = dup_mmap(mm, oldmm); | 918 | err = dup_mmap(mm, oldmm); |
870 | if (err) | 919 | if (err) |
871 | goto free_pt; | 920 | goto free_pt; |
@@ -1403,10 +1452,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1403 | goto bad_fork_cleanup_io; | 1452 | goto bad_fork_cleanup_io; |
1404 | 1453 | ||
1405 | if (pid != &init_struct_pid) { | 1454 | if (pid != &init_struct_pid) { |
1406 | retval = -ENOMEM; | ||
1407 | pid = alloc_pid(p->nsproxy->pid_ns_for_children); | 1455 | pid = alloc_pid(p->nsproxy->pid_ns_for_children); |
1408 | if (!pid) | 1456 | if (IS_ERR(pid)) { |
1457 | retval = PTR_ERR(pid); | ||
1409 | goto bad_fork_cleanup_io; | 1458 | goto bad_fork_cleanup_io; |
1459 | } | ||
1410 | } | 1460 | } |
1411 | 1461 | ||
1412 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1462 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
@@ -2000,3 +2050,26 @@ int unshare_files(struct files_struct **displaced) | |||
2000 | task_unlock(task); | 2050 | task_unlock(task); |
2001 | return 0; | 2051 | return 0; |
2002 | } | 2052 | } |
2053 | |||
2054 | int sysctl_max_threads(struct ctl_table *table, int write, | ||
2055 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2056 | { | ||
2057 | struct ctl_table t; | ||
2058 | int ret; | ||
2059 | int threads = max_threads; | ||
2060 | int min = MIN_THREADS; | ||
2061 | int max = MAX_THREADS; | ||
2062 | |||
2063 | t = *table; | ||
2064 | t.data = &threads; | ||
2065 | t.extra1 = &min; | ||
2066 | t.extra2 = &max; | ||
2067 | |||
2068 | ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); | ||
2069 | if (ret || !write) | ||
2070 | return ret; | ||
2071 | |||
2072 | set_max_threads(threads); | ||
2073 | |||
2074 | return 0; | ||
2075 | } | ||