aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c175
1 files changed, 154 insertions, 21 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index beb31725f7e2..c060c7e7c247 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -158,19 +158,83 @@ void __weak arch_release_thread_stack(unsigned long *stack)
158 * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a 158 * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
159 * kmemcache based allocator. 159 * kmemcache based allocator.
160 */ 160 */
161# if THREAD_SIZE >= PAGE_SIZE 161# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
162static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, 162
163 int node) 163#ifdef CONFIG_VMAP_STACK
164/*
165 * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
166 * flush. Try to minimize the number of calls by caching stacks.
167 */
168#define NR_CACHED_STACKS 2
169static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
170#endif
171
172static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
164{ 173{
174#ifdef CONFIG_VMAP_STACK
175 void *stack;
176 int i;
177
178 local_irq_disable();
179 for (i = 0; i < NR_CACHED_STACKS; i++) {
180 struct vm_struct *s = this_cpu_read(cached_stacks[i]);
181
182 if (!s)
183 continue;
184 this_cpu_write(cached_stacks[i], NULL);
185
186 tsk->stack_vm_area = s;
187 local_irq_enable();
188 return s->addr;
189 }
190 local_irq_enable();
191
192 stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
193 VMALLOC_START, VMALLOC_END,
194 THREADINFO_GFP | __GFP_HIGHMEM,
195 PAGE_KERNEL,
196 0, node, __builtin_return_address(0));
197
198 /*
199 * We can't call find_vm_area() in interrupt context, and
200 * free_thread_stack() can be called in interrupt context,
201 * so cache the vm_struct.
202 */
203 if (stack)
204 tsk->stack_vm_area = find_vm_area(stack);
205 return stack;
206#else
165 struct page *page = alloc_pages_node(node, THREADINFO_GFP, 207 struct page *page = alloc_pages_node(node, THREADINFO_GFP,
166 THREAD_SIZE_ORDER); 208 THREAD_SIZE_ORDER);
167 209
168 return page ? page_address(page) : NULL; 210 return page ? page_address(page) : NULL;
211#endif
169} 212}
170 213
171static inline void free_thread_stack(unsigned long *stack) 214static inline void free_thread_stack(struct task_struct *tsk)
172{ 215{
173 __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER); 216#ifdef CONFIG_VMAP_STACK
217 if (task_stack_vm_area(tsk)) {
218 unsigned long flags;
219 int i;
220
221 local_irq_save(flags);
222 for (i = 0; i < NR_CACHED_STACKS; i++) {
223 if (this_cpu_read(cached_stacks[i]))
224 continue;
225
226 this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
227 local_irq_restore(flags);
228 return;
229 }
230 local_irq_restore(flags);
231
232 vfree(tsk->stack);
233 return;
234 }
235#endif
236
237 __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
174} 238}
175# else 239# else
176static struct kmem_cache *thread_stack_cache; 240static struct kmem_cache *thread_stack_cache;
@@ -181,9 +245,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
181 return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); 245 return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
182} 246}
183 247
184static void free_thread_stack(unsigned long *stack) 248static void free_thread_stack(struct task_struct *tsk)
185{ 249{
186 kmem_cache_free(thread_stack_cache, stack); 250 kmem_cache_free(thread_stack_cache, tsk->stack);
187} 251}
188 252
189void thread_stack_cache_init(void) 253void thread_stack_cache_init(void)
@@ -213,24 +277,76 @@ struct kmem_cache *vm_area_cachep;
213/* SLAB cache for mm_struct structures (tsk->mm) */ 277/* SLAB cache for mm_struct structures (tsk->mm) */
214static struct kmem_cache *mm_cachep; 278static struct kmem_cache *mm_cachep;
215 279
216static void account_kernel_stack(unsigned long *stack, int account) 280static void account_kernel_stack(struct task_struct *tsk, int account)
217{ 281{
218 /* All stack pages are in the same zone and belong to the same memcg. */ 282 void *stack = task_stack_page(tsk);
219 struct page *first_page = virt_to_page(stack); 283 struct vm_struct *vm = task_stack_vm_area(tsk);
284
285 BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
286
287 if (vm) {
288 int i;
220 289
221 mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, 290 BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
222 THREAD_SIZE / 1024 * account);
223 291
224 memcg_kmem_update_page_stat( 292 for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
225 first_page, MEMCG_KERNEL_STACK_KB, 293 mod_zone_page_state(page_zone(vm->pages[i]),
226 account * (THREAD_SIZE / 1024)); 294 NR_KERNEL_STACK_KB,
295 PAGE_SIZE / 1024 * account);
296 }
297
298 /* All stack pages belong to the same memcg. */
299 memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
300 account * (THREAD_SIZE / 1024));
301 } else {
302 /*
303 * All stack pages are in the same zone and belong to the
304 * same memcg.
305 */
306 struct page *first_page = virt_to_page(stack);
307
308 mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
309 THREAD_SIZE / 1024 * account);
310
311 memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
312 account * (THREAD_SIZE / 1024));
313 }
227} 314}
228 315
229void free_task(struct task_struct *tsk) 316static void release_task_stack(struct task_struct *tsk)
230{ 317{
231 account_kernel_stack(tsk->stack, -1); 318 account_kernel_stack(tsk, -1);
232 arch_release_thread_stack(tsk->stack); 319 arch_release_thread_stack(tsk->stack);
233 free_thread_stack(tsk->stack); 320 free_thread_stack(tsk);
321 tsk->stack = NULL;
322#ifdef CONFIG_VMAP_STACK
323 tsk->stack_vm_area = NULL;
324#endif
325}
326
327#ifdef CONFIG_THREAD_INFO_IN_TASK
328void put_task_stack(struct task_struct *tsk)
329{
330 if (atomic_dec_and_test(&tsk->stack_refcount))
331 release_task_stack(tsk);
332}
333#endif
334
335void free_task(struct task_struct *tsk)
336{
337#ifndef CONFIG_THREAD_INFO_IN_TASK
338 /*
339 * The task is finally done with both the stack and thread_info,
340 * so free both.
341 */
342 release_task_stack(tsk);
343#else
344 /*
345 * If the task had a separate stack allocation, it should be gone
346 * by now.
347 */
348 WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
349#endif
234 rt_mutex_debug_task_free(tsk); 350 rt_mutex_debug_task_free(tsk);
235 ftrace_graph_exit_task(tsk); 351 ftrace_graph_exit_task(tsk);
236 put_seccomp_filter(tsk); 352 put_seccomp_filter(tsk);
@@ -342,6 +458,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
342{ 458{
343 struct task_struct *tsk; 459 struct task_struct *tsk;
344 unsigned long *stack; 460 unsigned long *stack;
461 struct vm_struct *stack_vm_area;
345 int err; 462 int err;
346 463
347 if (node == NUMA_NO_NODE) 464 if (node == NUMA_NO_NODE)
@@ -354,11 +471,26 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
354 if (!stack) 471 if (!stack)
355 goto free_tsk; 472 goto free_tsk;
356 473
474 stack_vm_area = task_stack_vm_area(tsk);
475
357 err = arch_dup_task_struct(tsk, orig); 476 err = arch_dup_task_struct(tsk, orig);
477
478 /*
479 * arch_dup_task_struct() clobbers the stack-related fields. Make
480 * sure they're properly initialized before using any stack-related
481 * functions again.
482 */
483 tsk->stack = stack;
484#ifdef CONFIG_VMAP_STACK
485 tsk->stack_vm_area = stack_vm_area;
486#endif
487#ifdef CONFIG_THREAD_INFO_IN_TASK
488 atomic_set(&tsk->stack_refcount, 1);
489#endif
490
358 if (err) 491 if (err)
359 goto free_stack; 492 goto free_stack;
360 493
361 tsk->stack = stack;
362#ifdef CONFIG_SECCOMP 494#ifdef CONFIG_SECCOMP
363 /* 495 /*
364 * We must handle setting up seccomp filters once we're under 496 * We must handle setting up seccomp filters once we're under
@@ -390,14 +522,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
390 tsk->task_frag.page = NULL; 522 tsk->task_frag.page = NULL;
391 tsk->wake_q.next = NULL; 523 tsk->wake_q.next = NULL;
392 524
393 account_kernel_stack(stack, 1); 525 account_kernel_stack(tsk, 1);
394 526
395 kcov_task_init(tsk); 527 kcov_task_init(tsk);
396 528
397 return tsk; 529 return tsk;
398 530
399free_stack: 531free_stack:
400 free_thread_stack(stack); 532 free_thread_stack(tsk);
401free_tsk: 533free_tsk:
402 free_task_struct(tsk); 534 free_task_struct(tsk);
403 return NULL; 535 return NULL;
@@ -1715,6 +1847,7 @@ bad_fork_cleanup_count:
1715 atomic_dec(&p->cred->user->processes); 1847 atomic_dec(&p->cred->user->processes);
1716 exit_creds(p); 1848 exit_creds(p);
1717bad_fork_free: 1849bad_fork_free:
1850 put_task_stack(p);
1718 free_task(p); 1851 free_task(p);
1719fork_out: 1852fork_out:
1720 return ERR_PTR(retval); 1853 return ERR_PTR(retval);