aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2016-05-23 19:24:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 20:04:14 -0400
commit725fc629ff2545b061407305ae51016c9f928fce (patch)
treed9b5ace469a064da5439bd6c96d1e714df85bab2
parent5c8ccefdf46c5f87d87b694c7fbc04941c2c99a5 (diff)
kernek/fork.c: allocate idle task for a CPU always on its local node
Linux preallocates the task structs of the idle tasks for all possible CPUs. This currently means they all end up on node 0. This also implies that the cache line of MWAIT, which is around the flags field in the task struct, are all located in node 0. We see a noticeable performance improvement on Knights Landing CPUs when the cache lines used for MWAIT are located in the local nodes of the CPUs using them. I would expect this to give a (likely slight) improvement on other systems too. The patch implements placing the idle task in the node of its CPUs, by passing the right target node to copy_process() [akpm@linux-foundation.org: use NUMA_NO_NODE, not a bare -1] Link: http://lkml.kernel.org/r/1463492694-15833-1-git-send-email-andi@firstfloor.org Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/fork.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 103d78fd8f75..e67d7b773348 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -340,13 +340,14 @@ void set_task_stack_end_magic(struct task_struct *tsk)
340 *stackend = STACK_END_MAGIC; /* for overflow detection */ 340 *stackend = STACK_END_MAGIC; /* for overflow detection */
341} 341}
342 342
343static struct task_struct *dup_task_struct(struct task_struct *orig) 343static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
344{ 344{
345 struct task_struct *tsk; 345 struct task_struct *tsk;
346 struct thread_info *ti; 346 struct thread_info *ti;
347 int node = tsk_fork_get_node(orig);
348 int err; 347 int err;
349 348
349 if (node == NUMA_NO_NODE)
350 node = tsk_fork_get_node(orig);
350 tsk = alloc_task_struct_node(node); 351 tsk = alloc_task_struct_node(node);
351 if (!tsk) 352 if (!tsk)
352 return NULL; 353 return NULL;
@@ -1276,7 +1277,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1276 int __user *child_tidptr, 1277 int __user *child_tidptr,
1277 struct pid *pid, 1278 struct pid *pid,
1278 int trace, 1279 int trace,
1279 unsigned long tls) 1280 unsigned long tls,
1281 int node)
1280{ 1282{
1281 int retval; 1283 int retval;
1282 struct task_struct *p; 1284 struct task_struct *p;
@@ -1328,7 +1330,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1328 goto fork_out; 1330 goto fork_out;
1329 1331
1330 retval = -ENOMEM; 1332 retval = -ENOMEM;
1331 p = dup_task_struct(current); 1333 p = dup_task_struct(current, node);
1332 if (!p) 1334 if (!p)
1333 goto fork_out; 1335 goto fork_out;
1334 1336
@@ -1706,7 +1708,8 @@ static inline void init_idle_pids(struct pid_link *links)
1706struct task_struct *fork_idle(int cpu) 1708struct task_struct *fork_idle(int cpu)
1707{ 1709{
1708 struct task_struct *task; 1710 struct task_struct *task;
1709 task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); 1711 task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
1712 cpu_to_node(cpu));
1710 if (!IS_ERR(task)) { 1713 if (!IS_ERR(task)) {
1711 init_idle_pids(task->pids); 1714 init_idle_pids(task->pids);
1712 init_idle(task, cpu); 1715 init_idle(task, cpu);
@@ -1751,7 +1754,7 @@ long _do_fork(unsigned long clone_flags,
1751 } 1754 }
1752 1755
1753 p = copy_process(clone_flags, stack_start, stack_size, 1756 p = copy_process(clone_flags, stack_start, stack_size,
1754 child_tidptr, NULL, trace, tls); 1757 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
1755 /* 1758 /*
1756 * Do this prior waking up the new thread - the thread pointer 1759 * Do this prior waking up the new thread - the thread pointer
1757 * might get invalid after that point, if the thread exits quickly. 1760 * might get invalid after that point, if the thread exits quickly.