aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-22 09:40:03 -0500
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:56 -0500
commit5bca23035391928c4c7301835accca3551b96cc2 (patch)
tree2feb63abf318e6edfded8bb97b43ca29c3c5b312 /kernel/sched
parent3105b86a9fee7d2c2e76edb53bbbc4027599628f (diff)
mm: sched: numa: Delay PTE scanning until a task is scheduled on a new node
Due to the fact that migrations are driven by the CPU a task is running on there is no point tracking NUMA faults until one task runs on a new node. This patch tracks the first node used by an address space. Until it changes, PTE scanning is disabled and no NUMA hinting faults are trapped. This should help workloads that are short-lived, do not care about NUMA placement or have bound themselves to a single node. This takes advantage of the logic in "mm: sched: numa: Implement slow start for working set sampling" to delay when the checks are made. This will take advantage of processes that set their CPU and node bindings early in their lifetime. It will also potentially allow any initial load balancing to take place. Signed-off-by: Mel Gorman <mgorman@suse.de>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c18
-rw-r--r--kernel/sched/features.h4
2 files changed, 21 insertions, 1 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a02a2082e95..3e18f611a5aa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -861,6 +861,24 @@ void task_numa_work(struct callback_head *work)
861 return; 861 return;
862 862
863 /* 863 /*
864 * We do not care about task placement until a task runs on a node
865 * other than the first one used by the address space. This is
866 * largely because migrations are driven by what CPU the task
867 * is running on. If it's never scheduled on another node, it'll
868 * not migrate so why bother trapping the fault.
869 */
870 if (mm->first_nid == NUMA_PTE_SCAN_INIT)
871 mm->first_nid = numa_node_id();
872 if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) {
873 /* Are we running on a new node yet? */
874 if (numa_node_id() == mm->first_nid &&
875 !sched_feat_numa(NUMA_FORCE))
876 return;
877
878 mm->first_nid = NUMA_PTE_SCAN_ACTIVE;
879 }
880
881 /*
864 * Reset the scan period if enough time has gone by. Objective is that 882 * Reset the scan period if enough time has gone by. Objective is that
865 * scanning will be reduced if pages are properly placed. As tasks 883 * scanning will be reduced if pages are properly placed. As tasks
866 * can enter different phases this needs to be re-examined. Lacking 884 * can enter different phases this needs to be re-examined. Lacking
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index d2373a3e3252..e7c25fff1e94 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -65,8 +65,10 @@ SCHED_FEAT(LB_MIN, false)
65/* 65/*
66 * Apply the automatic NUMA scheduling policy. Enabled automatically 66 * Apply the automatic NUMA scheduling policy. Enabled automatically
67 * at runtime if running on a NUMA machine. Can be controlled via 67 * at runtime if running on a NUMA machine. Can be controlled via
68 * numa_balancing= 68 * numa_balancing=. Allow PTE scanning to be forced on UMA machines
69 * for debugging the core machinery.
69 */ 70 */
70#ifdef CONFIG_NUMA_BALANCING 71#ifdef CONFIG_NUMA_BALANCING
71SCHED_FEAT(NUMA, false) 72SCHED_FEAT(NUMA, false)
73SCHED_FEAT(NUMA_FORCE, false)
72#endif 74#endif