diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 62 |
1 files changed, 54 insertions, 8 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 47911e49c2b1..07cddff89c7b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -224,9 +224,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) | |||
224 | return s->addr; | 224 | return s->addr; |
225 | } | 225 | } |
226 | 226 | ||
227 | /* | ||
228 | * Allocated stacks are cached and later reused by new threads, | ||
229 | * so memcg accounting is performed manually on assigning/releasing | ||
230 | * stacks to tasks. Drop __GFP_ACCOUNT. | ||
231 | */ | ||
227 | stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, | 232 | stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, |
228 | VMALLOC_START, VMALLOC_END, | 233 | VMALLOC_START, VMALLOC_END, |
229 | THREADINFO_GFP, | 234 | THREADINFO_GFP & ~__GFP_ACCOUNT, |
230 | PAGE_KERNEL, | 235 | PAGE_KERNEL, |
231 | 0, node, __builtin_return_address(0)); | 236 | 0, node, __builtin_return_address(0)); |
232 | 237 | ||
@@ -249,9 +254,19 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) | |||
249 | static inline void free_thread_stack(struct task_struct *tsk) | 254 | static inline void free_thread_stack(struct task_struct *tsk) |
250 | { | 255 | { |
251 | #ifdef CONFIG_VMAP_STACK | 256 | #ifdef CONFIG_VMAP_STACK |
252 | if (task_stack_vm_area(tsk)) { | 257 | struct vm_struct *vm = task_stack_vm_area(tsk); |
258 | |||
259 | if (vm) { | ||
253 | int i; | 260 | int i; |
254 | 261 | ||
262 | for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { | ||
263 | mod_memcg_page_state(vm->pages[i], | ||
264 | MEMCG_KERNEL_STACK_KB, | ||
265 | -(int)(PAGE_SIZE / 1024)); | ||
266 | |||
267 | memcg_kmem_uncharge(vm->pages[i], 0); | ||
268 | } | ||
269 | |||
255 | for (i = 0; i < NR_CACHED_STACKS; i++) { | 270 | for (i = 0; i < NR_CACHED_STACKS; i++) { |
256 | if (this_cpu_cmpxchg(cached_stacks[i], | 271 | if (this_cpu_cmpxchg(cached_stacks[i], |
257 | NULL, tsk->stack_vm_area) != NULL) | 272 | NULL, tsk->stack_vm_area) != NULL) |
@@ -352,10 +367,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account) | |||
352 | NR_KERNEL_STACK_KB, | 367 | NR_KERNEL_STACK_KB, |
353 | PAGE_SIZE / 1024 * account); | 368 | PAGE_SIZE / 1024 * account); |
354 | } | 369 | } |
355 | |||
356 | /* All stack pages belong to the same memcg. */ | ||
357 | mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB, | ||
358 | account * (THREAD_SIZE / 1024)); | ||
359 | } else { | 370 | } else { |
360 | /* | 371 | /* |
361 | * All stack pages are in the same zone and belong to the | 372 | * All stack pages are in the same zone and belong to the |
@@ -371,6 +382,35 @@ static void account_kernel_stack(struct task_struct *tsk, int account) | |||
371 | } | 382 | } |
372 | } | 383 | } |
373 | 384 | ||
385 | static int memcg_charge_kernel_stack(struct task_struct *tsk) | ||
386 | { | ||
387 | #ifdef CONFIG_VMAP_STACK | ||
388 | struct vm_struct *vm = task_stack_vm_area(tsk); | ||
389 | int ret; | ||
390 | |||
391 | if (vm) { | ||
392 | int i; | ||
393 | |||
394 | for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { | ||
395 | /* | ||
396 | * If memcg_kmem_charge() fails, page->mem_cgroup | ||
397 | * pointer is NULL, and both memcg_kmem_uncharge() | ||
398 | * and mod_memcg_page_state() in free_thread_stack() | ||
399 | * will ignore this page. So it's safe. | ||
400 | */ | ||
401 | ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0); | ||
402 | if (ret) | ||
403 | return ret; | ||
404 | |||
405 | mod_memcg_page_state(vm->pages[i], | ||
406 | MEMCG_KERNEL_STACK_KB, | ||
407 | PAGE_SIZE / 1024); | ||
408 | } | ||
409 | } | ||
410 | #endif | ||
411 | return 0; | ||
412 | } | ||
413 | |||
374 | static void release_task_stack(struct task_struct *tsk) | 414 | static void release_task_stack(struct task_struct *tsk) |
375 | { | 415 | { |
376 | if (WARN_ON(tsk->state != TASK_DEAD)) | 416 | if (WARN_ON(tsk->state != TASK_DEAD)) |
@@ -551,8 +591,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, | |||
551 | goto out; | 591 | goto out; |
552 | } | 592 | } |
553 | /* a new mm has just been created */ | 593 | /* a new mm has just been created */ |
554 | arch_dup_mmap(oldmm, mm); | 594 | retval = arch_dup_mmap(oldmm, mm); |
555 | retval = 0; | ||
556 | out: | 595 | out: |
557 | up_write(&mm->mmap_sem); | 596 | up_write(&mm->mmap_sem); |
558 | flush_tlb_mm(oldmm); | 597 | flush_tlb_mm(oldmm); |
@@ -809,6 +848,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
809 | if (!stack) | 848 | if (!stack) |
810 | goto free_tsk; | 849 | goto free_tsk; |
811 | 850 | ||
851 | if (memcg_charge_kernel_stack(tsk)) | ||
852 | goto free_stack; | ||
853 | |||
812 | stack_vm_area = task_stack_vm_area(tsk); | 854 | stack_vm_area = task_stack_vm_area(tsk); |
813 | 855 | ||
814 | err = arch_dup_task_struct(tsk, orig); | 856 | err = arch_dup_task_struct(tsk, orig); |
@@ -1781,6 +1823,10 @@ static __latent_entropy struct task_struct *copy_process( | |||
1781 | 1823 | ||
1782 | p->default_timer_slack_ns = current->timer_slack_ns; | 1824 | p->default_timer_slack_ns = current->timer_slack_ns; |
1783 | 1825 | ||
1826 | #ifdef CONFIG_PSI | ||
1827 | p->psi_flags = 0; | ||
1828 | #endif | ||
1829 | |||
1784 | task_io_accounting_init(&p->ioac); | 1830 | task_io_accounting_init(&p->ioac); |
1785 | acct_clear_integrals(p); | 1831 | acct_clear_integrals(p); |
1786 | 1832 | ||