diff options
author | Siddha, Suresh B <suresh.b.siddha@intel.com> | 2006-03-27 04:15:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-27 11:44:43 -0500 |
commit | 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 (patch) | |
tree | ccfa4927ebc7a8f663f9ac9e7789a713a33253ff /arch/i386 | |
parent | 77e4bfbcf071f795b54862455dce8902b3fc29c2 (diff) |
[PATCH] sched: new sched domain for representing multi-core
Add a new sched domain for representing multi-core with shared caches
between cores. Consider a dual package system, each package containing two
cores and with last level cache shared between cores with in a package. If
there are two runnable processes, with this appended patch those two
processes will be scheduled on different packages.
On such systems, with this patch we have observed 8% perf improvement with
specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2
users).
This new domain will come into play only on multi-core systems with shared
caches. On other systems, this sched domain will be removed by domain
degeneration code. This new domain can be also used for implementing power
savings policy (see OLS 2005 CMP kernel scheduler paper for more details..
I will post another patch for power savings policy soon)
Most of the arch/* file changes are for cpu_coregroup_map() implementation.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/Kconfig | 9 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/common.c | 10 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/intel_cacheinfo.c | 22 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 24 |
4 files changed, 60 insertions, 5 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f7db71d0b913..f17bd1d2707e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -231,6 +231,15 @@ config SCHED_SMT | |||
231 | cost of slightly increased overhead in some places. If unsure say | 231 | cost of slightly increased overhead in some places. If unsure say |
232 | N here. | 232 | N here. |
233 | 233 | ||
234 | config SCHED_MC | ||
235 | bool "Multi-core scheduler support" | ||
236 | depends on SMP | ||
237 | default y | ||
238 | help | ||
239 | Multi-core scheduler support improves the CPU scheduler's decision | ||
240 | making when dealing with multi-core CPU chips at a cost of slightly | ||
241 | increased overhead in some places. If unsure say N here. | ||
242 | |||
234 | source "kernel/Kconfig.preempt" | 243 | source "kernel/Kconfig.preempt" |
235 | 244 | ||
236 | config X86_UP_APIC | 245 | config X86_UP_APIC |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6a4e96..a06a49075f10 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) | |||
266 | void __cpuinit generic_identify(struct cpuinfo_x86 * c) | 266 | void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
267 | { | 267 | { |
268 | u32 tfms, xlvl; | 268 | u32 tfms, xlvl; |
269 | int junk; | 269 | int ebx; |
270 | 270 | ||
271 | if (have_cpuid_p()) { | 271 | if (have_cpuid_p()) { |
272 | /* Get vendor name */ | 272 | /* Get vendor name */ |
@@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
282 | /* Intel-defined flags: level 0x00000001 */ | 282 | /* Intel-defined flags: level 0x00000001 */ |
283 | if ( c->cpuid_level >= 0x00000001 ) { | 283 | if ( c->cpuid_level >= 0x00000001 ) { |
284 | u32 capability, excap; | 284 | u32 capability, excap; |
285 | cpuid(0x00000001, &tfms, &junk, &excap, &capability); | 285 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); |
286 | c->x86_capability[0] = capability; | 286 | c->x86_capability[0] = capability; |
287 | c->x86_capability[4] = excap; | 287 | c->x86_capability[4] = excap; |
288 | c->x86 = (tfms >> 8) & 15; | 288 | c->x86 = (tfms >> 8) & 15; |
@@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
292 | if (c->x86 >= 0x6) | 292 | if (c->x86 >= 0x6) |
293 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 293 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
294 | c->x86_mask = tfms & 15; | 294 | c->x86_mask = tfms & 15; |
295 | #ifdef CONFIG_SMP | ||
296 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); | ||
297 | #else | ||
298 | c->apicid = (ebx >> 24) & 0xFF; | ||
299 | #endif | ||
295 | } else { | 300 | } else { |
296 | /* Have CPUID level 0 only - unheard of */ | 301 | /* Have CPUID level 0 only - unheard of */ |
297 | c->x86 = 4; | 302 | c->x86 = 4; |
@@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
474 | 479 | ||
475 | cpuid(1, &eax, &ebx, &ecx, &edx); | 480 | cpuid(1, &eax, &ebx, &ecx, &edx); |
476 | 481 | ||
477 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); | ||
478 | 482 | ||
479 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 483 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
480 | return; | 484 | return; |
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921369e5..7e7fd4e67dd0 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
@@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
173 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ | 173 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ |
174 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ | 174 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
175 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ | 175 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
176 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; | ||
177 | #ifdef CONFIG_SMP | ||
178 | unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); | ||
179 | #endif | ||
176 | 180 | ||
177 | if (c->cpuid_level > 3) { | 181 | if (c->cpuid_level > 3) { |
178 | static int is_initialized; | 182 | static int is_initialized; |
@@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
205 | break; | 209 | break; |
206 | case 2: | 210 | case 2: |
207 | new_l2 = this_leaf.size/1024; | 211 | new_l2 = this_leaf.size/1024; |
212 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | ||
213 | index_msb = get_count_order(num_threads_sharing); | ||
214 | l2_id = c->apicid >> index_msb; | ||
208 | break; | 215 | break; |
209 | case 3: | 216 | case 3: |
210 | new_l3 = this_leaf.size/1024; | 217 | new_l3 = this_leaf.size/1024; |
218 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | ||
219 | index_msb = get_count_order(num_threads_sharing); | ||
220 | l3_id = c->apicid >> index_msb; | ||
211 | break; | 221 | break; |
212 | default: | 222 | default: |
213 | break; | 223 | break; |
@@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
273 | if (new_l1i) | 283 | if (new_l1i) |
274 | l1i = new_l1i; | 284 | l1i = new_l1i; |
275 | 285 | ||
276 | if (new_l2) | 286 | if (new_l2) { |
277 | l2 = new_l2; | 287 | l2 = new_l2; |
288 | #ifdef CONFIG_SMP | ||
289 | cpu_llc_id[cpu] = l2_id; | ||
290 | #endif | ||
291 | } | ||
278 | 292 | ||
279 | if (new_l3) | 293 | if (new_l3) { |
280 | l3 = new_l3; | 294 | l3 = new_l3; |
295 | #ifdef CONFIG_SMP | ||
296 | cpu_llc_id[cpu] = l3_id; | ||
297 | #endif | ||
298 | } | ||
281 | 299 | ||
282 | if ( trace ) | 300 | if ( trace ) |
283 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); | 301 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d83bfa9..a6969903f2d6 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | |||
72 | /* Core ID of each logical CPU */ | 72 | /* Core ID of each logical CPU */ |
73 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | 73 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; |
74 | 74 | ||
75 | /* Last level cache ID of each logical CPU */ | ||
76 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
77 | |||
75 | /* representing HT siblings of each logical CPU */ | 78 | /* representing HT siblings of each logical CPU */ |
76 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 79 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
77 | EXPORT_SYMBOL(cpu_sibling_map); | 80 | EXPORT_SYMBOL(cpu_sibling_map); |
@@ -440,6 +443,18 @@ static void __devinit smp_callin(void) | |||
440 | 443 | ||
441 | static int cpucount; | 444 | static int cpucount; |
442 | 445 | ||
446 | /* maps the cpu to the sched domain representing multi-core */ | ||
447 | cpumask_t cpu_coregroup_map(int cpu) | ||
448 | { | ||
449 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
450 | /* | ||
451 | * For perf, we return last level cache shared map. | ||
452 | * TBD: when power saving sched policy is added, we will return | ||
453 | * cpu_core_map when power saving policy is enabled | ||
454 | */ | ||
455 | return c->llc_shared_map; | ||
456 | } | ||
457 | |||
443 | /* representing cpus for which sibling maps can be computed */ | 458 | /* representing cpus for which sibling maps can be computed */ |
444 | static cpumask_t cpu_sibling_setup_map; | 459 | static cpumask_t cpu_sibling_setup_map; |
445 | 460 | ||
@@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) | |||
459 | cpu_set(cpu, cpu_sibling_map[i]); | 474 | cpu_set(cpu, cpu_sibling_map[i]); |
460 | cpu_set(i, cpu_core_map[cpu]); | 475 | cpu_set(i, cpu_core_map[cpu]); |
461 | cpu_set(cpu, cpu_core_map[i]); | 476 | cpu_set(cpu, cpu_core_map[i]); |
477 | cpu_set(i, c[cpu].llc_shared_map); | ||
478 | cpu_set(cpu, c[i].llc_shared_map); | ||
462 | } | 479 | } |
463 | } | 480 | } |
464 | } else { | 481 | } else { |
465 | cpu_set(cpu, cpu_sibling_map[cpu]); | 482 | cpu_set(cpu, cpu_sibling_map[cpu]); |
466 | } | 483 | } |
467 | 484 | ||
485 | cpu_set(cpu, c[cpu].llc_shared_map); | ||
486 | |||
468 | if (current_cpu_data.x86_max_cores == 1) { | 487 | if (current_cpu_data.x86_max_cores == 1) { |
469 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 488 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
470 | c[cpu].booted_cores = 1; | 489 | c[cpu].booted_cores = 1; |
@@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) | |||
472 | } | 491 | } |
473 | 492 | ||
474 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 493 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
494 | if (cpu_llc_id[cpu] != BAD_APICID && | ||
495 | cpu_llc_id[cpu] == cpu_llc_id[i]) { | ||
496 | cpu_set(i, c[cpu].llc_shared_map); | ||
497 | cpu_set(cpu, c[i].llc_shared_map); | ||
498 | } | ||
475 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 499 | if (phys_proc_id[cpu] == phys_proc_id[i]) { |
476 | cpu_set(i, cpu_core_map[cpu]); | 500 | cpu_set(i, cpu_core_map[cpu]); |
477 | cpu_set(cpu, cpu_core_map[i]); | 501 | cpu_set(cpu, cpu_core_map[i]); |