aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2014-04-11 13:00:27 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-07 07:33:45 -0400
commit792568ec6a31ca560ca4d528782cbc6cd2cea8b0 (patch)
treef5a0b25a3dcce51c3e4850d82071bbb9e73ec7f7 /kernel/sched
parent2fe5de9ce7d57498abc14b375cad2fcf8c3ee6cc (diff)
sched/numa: Count pages on active node as local
The NUMA code is smart enough to distribute the memory of workloads that span multiple NUMA nodes across those NUMA nodes. However, it still has a pretty high scan rate for such workloads, because any memory that is left on a node other than the node of the CPU that faulted on the memory is counted as non-local, which causes the scan rate to go up. Counting the memory on any node where the task's numa group is actively running as local, allows the scan rate to slow down once the application is settled in. This should reduce the overhead of the automatic NUMA placement code, when a workload spans multiple NUMA nodes. Signed-off-by: Rik van Riel <riel@redhat.com> Tested-by: Vinod Chegu <chegu_vinod@hp.com> Acked-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/1397235629-16328-2-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5d859ec975c2..f6457b63c95c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1738,6 +1738,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
1738 struct task_struct *p = current; 1738 struct task_struct *p = current;
1739 bool migrated = flags & TNF_MIGRATED; 1739 bool migrated = flags & TNF_MIGRATED;
1740 int cpu_node = task_node(current); 1740 int cpu_node = task_node(current);
1741 int local = !!(flags & TNF_FAULT_LOCAL);
1741 int priv; 1742 int priv;
1742 1743
1743 if (!numabalancing_enabled) 1744 if (!numabalancing_enabled)
@@ -1786,6 +1787,17 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
1786 task_numa_group(p, last_cpupid, flags, &priv); 1787 task_numa_group(p, last_cpupid, flags, &priv);
1787 } 1788 }
1788 1789
1790 /*
1791 * If a workload spans multiple NUMA nodes, a shared fault that
1792 * occurs wholly within the set of nodes that the workload is
1793 * actively using should be counted as local. This allows the
1794 * scan rate to slow down when a workload has settled down.
1795 */
1796 if (!priv && !local && p->numa_group &&
1797 node_isset(cpu_node, p->numa_group->active_nodes) &&
1798 node_isset(mem_node, p->numa_group->active_nodes))
1799 local = 1;
1800
1789 task_numa_placement(p); 1801 task_numa_placement(p);
1790 1802
1791 /* 1803 /*
@@ -1800,7 +1812,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
1800 1812
1801 p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; 1813 p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages;
1802 p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; 1814 p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages;
1803 p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; 1815 p->numa_faults_locality[local] += pages;
1804} 1816}
1805 1817
1806static void reset_ptenuma_scan(struct task_struct *p) 1818static void reset_ptenuma_scan(struct task_struct *p)