diff options
author | Siddha, Suresh B <suresh.b.siddha@intel.com> | 2006-03-27 04:15:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-27 11:44:43 -0500 |
commit | 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 (patch) | |
tree | ccfa4927ebc7a8f663f9ac9e7789a713a33253ff /arch/x86_64 | |
parent | 77e4bfbcf071f795b54862455dce8902b3fc29c2 (diff) |
[PATCH] sched: new sched domain for representing multi-core
Add a new sched domain for representing multi-core with shared caches
between cores. Consider a dual package system, each package containing two
cores and with last level cache shared between cores with in a package. If
there are two runnable processes, with this appended patch those two
processes will be scheduled on different packages.
On such systems, with this patch we have observed 8% perf improvement with
specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2
users).
This new domain will come into play only on multi-core systems with shared
caches. On other systems, this sched domain will be removed by domain
degeneration code. This new domain can be also used for implementing power
savings policy (see OLS 2005 CMP kernel scheduler paper for more details..
I will post another patch for power savings policy soon)
Most of the arch/* file changes are for cpu_coregroup_map() implementation.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 9 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 24 |
3 files changed, 35 insertions, 1 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 45efe0ca88f8..1cb4aa241c8c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -250,6 +250,15 @@ config SCHED_SMT | |||
250 | cost of slightly increased overhead in some places. If unsure say | 250 | cost of slightly increased overhead in some places. If unsure say |
251 | N here. | 251 | N here. |
252 | 252 | ||
253 | config SCHED_MC | ||
254 | bool "Multi-core scheduler support" | ||
255 | depends on SMP | ||
256 | default y | ||
257 | help | ||
258 | Multi-core scheduler support improves the CPU scheduler's decision | ||
259 | making when dealing with multi-core CPU chips at a cost of slightly | ||
260 | increased overhead in some places. If unsure say N here. | ||
261 | |||
253 | source "kernel/Kconfig.preempt" | 262 | source "kernel/Kconfig.preempt" |
254 | 263 | ||
255 | config NUMA | 264 | config NUMA |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a57eec8311a7..d1f3e9272c05 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
962 | 962 | ||
963 | cpuid(1, &eax, &ebx, &ecx, &edx); | 963 | cpuid(1, &eax, &ebx, &ecx, &edx); |
964 | 964 | ||
965 | c->apicid = phys_pkg_id(0); | ||
966 | 965 | ||
967 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 966 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
968 | return; | 967 | return; |
@@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
1171 | c->x86_capability[2] = cpuid_edx(0x80860001); | 1170 | c->x86_capability[2] = cpuid_edx(0x80860001); |
1172 | } | 1171 | } |
1173 | 1172 | ||
1173 | c->apicid = phys_pkg_id(0); | ||
1174 | |||
1174 | /* | 1175 | /* |
1175 | * Vendor-specific initialization. In this section we | 1176 | * Vendor-specific initialization. In this section we |
1176 | * canonicalize the feature flags, meaning if there are | 1177 | * canonicalize the feature flags, meaning if there are |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 66e98659d077..ea48fa638070 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | |||
68 | /* core ID of each logical CPU */ | 68 | /* core ID of each logical CPU */ |
69 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 69 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
70 | 70 | ||
71 | /* Last level cache ID of each logical CPU */ | ||
72 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
73 | |||
71 | /* Bitmask of currently online CPUs */ | 74 | /* Bitmask of currently online CPUs */ |
72 | cpumask_t cpu_online_map __read_mostly; | 75 | cpumask_t cpu_online_map __read_mostly; |
73 | 76 | ||
@@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) | |||
445 | cpu_set(cpuid, cpu_callin_map); | 448 | cpu_set(cpuid, cpu_callin_map); |
446 | } | 449 | } |
447 | 450 | ||
451 | /* maps the cpu to the sched domain representing multi-core */ | ||
452 | cpumask_t cpu_coregroup_map(int cpu) | ||
453 | { | ||
454 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
455 | /* | ||
456 | * For perf, we return last level cache shared map. | ||
457 | * TBD: when power saving sched policy is added, we will return | ||
458 | * cpu_core_map when power saving policy is enabled | ||
459 | */ | ||
460 | return c->llc_shared_map; | ||
461 | } | ||
462 | |||
448 | /* representing cpus for which sibling maps can be computed */ | 463 | /* representing cpus for which sibling maps can be computed */ |
449 | static cpumask_t cpu_sibling_setup_map; | 464 | static cpumask_t cpu_sibling_setup_map; |
450 | 465 | ||
@@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) | |||
463 | cpu_set(cpu, cpu_sibling_map[i]); | 478 | cpu_set(cpu, cpu_sibling_map[i]); |
464 | cpu_set(i, cpu_core_map[cpu]); | 479 | cpu_set(i, cpu_core_map[cpu]); |
465 | cpu_set(cpu, cpu_core_map[i]); | 480 | cpu_set(cpu, cpu_core_map[i]); |
481 | cpu_set(i, c[cpu].llc_shared_map); | ||
482 | cpu_set(cpu, c[i].llc_shared_map); | ||
466 | } | 483 | } |
467 | } | 484 | } |
468 | } else { | 485 | } else { |
469 | cpu_set(cpu, cpu_sibling_map[cpu]); | 486 | cpu_set(cpu, cpu_sibling_map[cpu]); |
470 | } | 487 | } |
471 | 488 | ||
489 | cpu_set(cpu, c[cpu].llc_shared_map); | ||
490 | |||
472 | if (current_cpu_data.x86_max_cores == 1) { | 491 | if (current_cpu_data.x86_max_cores == 1) { |
473 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 492 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
474 | c[cpu].booted_cores = 1; | 493 | c[cpu].booted_cores = 1; |
@@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) | |||
476 | } | 495 | } |
477 | 496 | ||
478 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 497 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
498 | if (cpu_llc_id[cpu] != BAD_APICID && | ||
499 | cpu_llc_id[cpu] == cpu_llc_id[i]) { | ||
500 | cpu_set(i, c[cpu].llc_shared_map); | ||
501 | cpu_set(cpu, c[i].llc_shared_map); | ||
502 | } | ||
479 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 503 | if (phys_proc_id[cpu] == phys_proc_id[i]) { |
480 | cpu_set(i, cpu_core_map[cpu]); | 504 | cpu_set(i, cpu_core_map[cpu]); |
481 | cpu_set(cpu, cpu_core_map[i]); | 505 | cpu_set(cpu, cpu_core_map[i]); |