summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2016-09-16 01:45:48 -0400
committerIngo Molnar <mingo@kernel.org>2016-09-16 03:18:54 -0400
commit68f24b08ee892d47bdef925d676e1ae1ccc316f8 (patch)
treeeb68202da134522dd22c4bf78487ae9017df970f
parentaa1f1a639621672b68f654dc815a7d8298ff396f (diff)
sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
We currently keep every task's stack around until the task_struct itself is freed. This means that we keep the stack allocation alive for longer than necessary and that, under load, we free stacks in big batches whenever RCU drops the last task reference. Neither of these is good for reuse of cache-hot memory, and freeing in batches prevents us from usefully caching small numbers of vmalloced stacks. On architectures that have thread_info on the stack, we can't easily change this, but on architectures that set THREAD_INFO_IN_TASK, we can free it as soon as the task is dead. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jann Horn <jann@thejh.net> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/init_task.h4
-rw-r--r--include/linux/sched.h14
-rw-r--r--kernel/fork.c35
-rw-r--r--kernel/sched/core.c4
4 files changed, 55 insertions, 2 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9c04d44eeb3c..325f649d77ff 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -186,7 +186,9 @@ extern struct task_group root_task_group;
186#endif 186#endif
187 187
188#ifdef CONFIG_THREAD_INFO_IN_TASK 188#ifdef CONFIG_THREAD_INFO_IN_TASK
189# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk), 189# define INIT_TASK_TI(tsk) \
190 .thread_info = INIT_THREAD_INFO(tsk), \
191 .stack_refcount = ATOMIC_INIT(1),
190#else 192#else
191# define INIT_TASK_TI(tsk) 193# define INIT_TASK_TI(tsk)
192#endif 194#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a95867267e9f..abb795afc823 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1936,6 +1936,10 @@ struct task_struct {
1936#ifdef CONFIG_VMAP_STACK 1936#ifdef CONFIG_VMAP_STACK
1937 struct vm_struct *stack_vm_area; 1937 struct vm_struct *stack_vm_area;
1938#endif 1938#endif
1939#ifdef CONFIG_THREAD_INFO_IN_TASK
1940 /* A live task holds one reference. */
1941 atomic_t stack_refcount;
1942#endif
1939/* CPU-specific state of this task */ 1943/* CPU-specific state of this task */
1940 struct thread_struct thread; 1944 struct thread_struct thread;
1941/* 1945/*
@@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
3143 3147
3144#endif 3148#endif
3145 3149
3150#ifdef CONFIG_THREAD_INFO_IN_TASK
3151static inline void *try_get_task_stack(struct task_struct *tsk)
3152{
3153 return atomic_inc_not_zero(&tsk->stack_refcount) ?
3154 task_stack_page(tsk) : NULL;
3155}
3156
3157extern void put_task_stack(struct task_struct *tsk);
3158#else
3146static inline void *try_get_task_stack(struct task_struct *tsk) 3159static inline void *try_get_task_stack(struct task_struct *tsk)
3147{ 3160{
3148 return task_stack_page(tsk); 3161 return task_stack_page(tsk);
3149} 3162}
3150 3163
3151static inline void put_task_stack(struct task_struct *tsk) {} 3164static inline void put_task_stack(struct task_struct *tsk) {}
3165#endif
3152 3166
3153#define task_stack_end_corrupted(task) \ 3167#define task_stack_end_corrupted(task) \
3154 (*(end_of_stack(task)) != STACK_END_MAGIC) 3168 (*(end_of_stack(task)) != STACK_END_MAGIC)
diff --git a/kernel/fork.c b/kernel/fork.c
index 0c240fd5beba..5dd0a516626d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -269,11 +269,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
269 } 269 }
270} 270}
271 271
272void free_task(struct task_struct *tsk) 272static void release_task_stack(struct task_struct *tsk)
273{ 273{
274 account_kernel_stack(tsk, -1); 274 account_kernel_stack(tsk, -1);
275 arch_release_thread_stack(tsk->stack); 275 arch_release_thread_stack(tsk->stack);
276 free_thread_stack(tsk); 276 free_thread_stack(tsk);
277 tsk->stack = NULL;
278#ifdef CONFIG_VMAP_STACK
279 tsk->stack_vm_area = NULL;
280#endif
281}
282
283#ifdef CONFIG_THREAD_INFO_IN_TASK
284void put_task_stack(struct task_struct *tsk)
285{
286 if (atomic_dec_and_test(&tsk->stack_refcount))
287 release_task_stack(tsk);
288}
289#endif
290
291void free_task(struct task_struct *tsk)
292{
293#ifndef CONFIG_THREAD_INFO_IN_TASK
294 /*
295 * The task is finally done with both the stack and thread_info,
296 * so free both.
297 */
298 release_task_stack(tsk);
299#else
300 /*
301 * If the task had a separate stack allocation, it should be gone
302 * by now.
303 */
304 WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
305#endif
277 rt_mutex_debug_task_free(tsk); 306 rt_mutex_debug_task_free(tsk);
278 ftrace_graph_exit_task(tsk); 307 ftrace_graph_exit_task(tsk);
279 put_seccomp_filter(tsk); 308 put_seccomp_filter(tsk);
@@ -411,6 +440,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
411#ifdef CONFIG_VMAP_STACK 440#ifdef CONFIG_VMAP_STACK
412 tsk->stack_vm_area = stack_vm_area; 441 tsk->stack_vm_area = stack_vm_area;
413#endif 442#endif
443#ifdef CONFIG_THREAD_INFO_IN_TASK
444 atomic_set(&tsk->stack_refcount, 1);
445#endif
414 446
415 if (err) 447 if (err)
416 goto free_stack; 448 goto free_stack;
@@ -1771,6 +1803,7 @@ bad_fork_cleanup_count:
1771 atomic_dec(&p->cred->user->processes); 1803 atomic_dec(&p->cred->user->processes);
1772 exit_creds(p); 1804 exit_creds(p);
1773bad_fork_free: 1805bad_fork_free:
1806 put_task_stack(p);
1774 free_task(p); 1807 free_task(p);
1775fork_out: 1808fork_out:
1776 return ERR_PTR(retval); 1809 return ERR_PTR(retval);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0b6238f18da2..23c6037e2d89 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2772,6 +2772,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2772 * task and put them back on the free list. 2772 * task and put them back on the free list.
2773 */ 2773 */
2774 kprobe_flush_task(prev); 2774 kprobe_flush_task(prev);
2775
2776 /* Task is done with its stack. */
2777 put_task_stack(prev);
2778
2775 put_task_struct(prev); 2779 put_task_struct(prev);
2776 } 2780 }
2777 2781