aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2014-10-17 03:29:50 -0400
committerIngo Molnar <mingo@kernel.org>2014-10-28 05:47:48 -0400
commite3fe70b1f72e3f83a00d9c332ec09ab347a981e2 (patch)
tree78bf6ac641ce98eae277a1365ea3d73dcaaefe64 /kernel/sched/core.c
parent9942f79baaaf111d63ebf0862a819278d84fccc4 (diff)
sched/numa: Classify the NUMA topology of a system
Smaller NUMA systems tend to have all NUMA nodes directly connected to each other. This includes the degenerate case of a system with just one node, ie. a non-NUMA system. Larger systems can have two kinds of NUMA topology, which affects how tasks and memory should be placed on the system. On glueless mesh systems, nodes that are not directly connected to each other will bounce traffic through intermediary nodes. Task groups can be run closer to each other by moving tasks from a node to an intermediary node between it and the task's preferred node. On NUMA systems with backplane controllers, the intermediary hops are incapable of running programs. This creates "islands" of nodes that are at an equal distance to anywhere else in the system. Each kind of topology requires a slightly different placement algorithm; this patch provides the mechanism to detect the kind of NUMA topology of a system. Signed-off-by: Rik van Riel <riel@redhat.com> Tested-by: Chegu Vinod <chegu_vinod@hp.com> [ Changed to use kernel/sched/sched.h ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: mgorman@suse.de Cc: chegu_vinod@hp.com Link: http://lkml.kernel.org/r/1413530994-9732-3-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c53
1 files changed, 53 insertions, 0 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4007595f87e4..cde848149dd6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6128,6 +6128,7 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
6128 6128
6129#ifdef CONFIG_NUMA 6129#ifdef CONFIG_NUMA
6130static int sched_domains_numa_levels; 6130static int sched_domains_numa_levels;
6131enum numa_topology_type sched_numa_topology_type;
6131static int *sched_domains_numa_distance; 6132static int *sched_domains_numa_distance;
6132int sched_max_numa_distance; 6133int sched_max_numa_distance;
6133static struct cpumask ***sched_domains_numa_masks; 6134static struct cpumask ***sched_domains_numa_masks;
@@ -6316,6 +6317,56 @@ bool find_numa_distance(int distance)
6316 return false; 6317 return false;
6317} 6318}
6318 6319
6320/*
6321 * A system can have three types of NUMA topology:
6322 * NUMA_DIRECT: all nodes are directly connected, or not a NUMA system
6323 * NUMA_GLUELESS_MESH: some nodes reachable through intermediary nodes
6324 * NUMA_BACKPLANE: nodes can reach other nodes through a backplane
6325 *
6326 * The difference between a glueless mesh topology and a backplane
6327 * topology lies in whether communication between not directly
6328 * connected nodes goes through intermediary nodes (where programs
6329 * could run), or through backplane controllers. This affects
6330 * placement of programs.
6331 *
6332 * The type of topology can be discerned with the following tests:
6333 * - If the maximum distance between any nodes is 1 hop, the system
6334 * is directly connected.
6335 * - If for two nodes A and B, located N > 1 hops away from each other,
6336 * there is an intermediary node C, which is < N hops away from both
6337 * nodes A and B, the system is a glueless mesh.
6338 */
6339static void init_numa_topology_type(void)
6340{
6341 int a, b, c, n;
6342
6343 n = sched_max_numa_distance;
6344
6345 if (n <= 1)
6346 sched_numa_topology_type = NUMA_DIRECT;
6347
6348 for_each_online_node(a) {
6349 for_each_online_node(b) {
6350 /* Find two nodes furthest removed from each other. */
6351 if (node_distance(a, b) < n)
6352 continue;
6353
6354 /* Is there an intermediary node between a and b? */
6355 for_each_online_node(c) {
6356 if (node_distance(a, c) < n &&
6357 node_distance(b, c) < n) {
6358 sched_numa_topology_type =
6359 NUMA_GLUELESS_MESH;
6360 return;
6361 }
6362 }
6363
6364 sched_numa_topology_type = NUMA_BACKPLANE;
6365 return;
6366 }
6367 }
6368}
6369
6319static void sched_init_numa(void) 6370static void sched_init_numa(void)
6320{ 6371{
6321 int next_distance, curr_distance = node_distance(0, 0); 6372 int next_distance, curr_distance = node_distance(0, 0);
@@ -6449,6 +6500,8 @@ static void sched_init_numa(void)
6449 6500
6450 sched_domains_numa_levels = level; 6501 sched_domains_numa_levels = level;
6451 sched_max_numa_distance = sched_domains_numa_distance[level - 1]; 6502 sched_max_numa_distance = sched_domains_numa_distance[level - 1];
6503
6504 init_numa_topology_type();
6452} 6505}
6453 6506
6454static void sched_domains_numa_masks_set(int cpu) 6507static void sched_domains_numa_masks_set(int cpu)