aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorSiddha, Suresh B <suresh.b.siddha@intel.com>2006-03-27 04:15:22 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:44:43 -0500
commit1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 (patch)
treeccfa4927ebc7a8f663f9ac9e7789a713a33253ff /arch
parent77e4bfbcf071f795b54862455dce8902b3fc29c2 (diff)
[PATCH] sched: new sched domain for representing multi-core
Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/Kconfig9
-rw-r--r--arch/i386/kernel/cpu/common.c10
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c22
-rw-r--r--arch/i386/kernel/smpboot.c24
-rw-r--r--arch/x86_64/Kconfig9
-rw-r--r--arch/x86_64/kernel/setup.c3
-rw-r--r--arch/x86_64/kernel/smpboot.c24
7 files changed, 95 insertions, 6 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index f7db71d0b913..f17bd1d2707e 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -231,6 +231,15 @@ config SCHED_SMT
231 cost of slightly increased overhead in some places. If unsure say 231 cost of slightly increased overhead in some places. If unsure say
232 N here. 232 N here.
233 233
234config SCHED_MC
235 bool "Multi-core scheduler support"
236 depends on SMP
237 default y
238 help
239 Multi-core scheduler support improves the CPU scheduler's decision
240 making when dealing with multi-core CPU chips at a cost of slightly
241 increased overhead in some places. If unsure say N here.
242
234source "kernel/Kconfig.preempt" 243source "kernel/Kconfig.preempt"
235 244
236config X86_UP_APIC 245config X86_UP_APIC
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 7e3d6b6a4e96..a06a49075f10 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -266,7 +266,7 @@ static void __init early_cpu_detect(void)
266void __cpuinit generic_identify(struct cpuinfo_x86 * c) 266void __cpuinit generic_identify(struct cpuinfo_x86 * c)
267{ 267{
268 u32 tfms, xlvl; 268 u32 tfms, xlvl;
269 int junk; 269 int ebx;
270 270
271 if (have_cpuid_p()) { 271 if (have_cpuid_p()) {
272 /* Get vendor name */ 272 /* Get vendor name */
@@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
282 /* Intel-defined flags: level 0x00000001 */ 282 /* Intel-defined flags: level 0x00000001 */
283 if ( c->cpuid_level >= 0x00000001 ) { 283 if ( c->cpuid_level >= 0x00000001 ) {
284 u32 capability, excap; 284 u32 capability, excap;
285 cpuid(0x00000001, &tfms, &junk, &excap, &capability); 285 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
286 c->x86_capability[0] = capability; 286 c->x86_capability[0] = capability;
287 c->x86_capability[4] = excap; 287 c->x86_capability[4] = excap;
288 c->x86 = (tfms >> 8) & 15; 288 c->x86 = (tfms >> 8) & 15;
@@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
292 if (c->x86 >= 0x6) 292 if (c->x86 >= 0x6)
293 c->x86_model += ((tfms >> 16) & 0xF) << 4; 293 c->x86_model += ((tfms >> 16) & 0xF) << 4;
294 c->x86_mask = tfms & 15; 294 c->x86_mask = tfms & 15;
295#ifdef CONFIG_SMP
296 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
297#else
298 c->apicid = (ebx >> 24) & 0xFF;
299#endif
295 } else { 300 } else {
296 /* Have CPUID level 0 only - unheard of */ 301 /* Have CPUID level 0 only - unheard of */
297 c->x86 = 4; 302 c->x86 = 4;
@@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
474 479
475 cpuid(1, &eax, &ebx, &ecx, &edx); 480 cpuid(1, &eax, &ebx, &ecx, &edx);
476 481
477 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
478 482
479 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 483 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
480 return; 484 return;
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index ce61921369e5..7e7fd4e67dd0 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
173 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 173 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
174 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 174 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
175 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 175 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
176 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
177#ifdef CONFIG_SMP
178 unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
179#endif
176 180
177 if (c->cpuid_level > 3) { 181 if (c->cpuid_level > 3) {
178 static int is_initialized; 182 static int is_initialized;
@@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
205 break; 209 break;
206 case 2: 210 case 2:
207 new_l2 = this_leaf.size/1024; 211 new_l2 = this_leaf.size/1024;
212 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
213 index_msb = get_count_order(num_threads_sharing);
214 l2_id = c->apicid >> index_msb;
208 break; 215 break;
209 case 3: 216 case 3:
210 new_l3 = this_leaf.size/1024; 217 new_l3 = this_leaf.size/1024;
218 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
219 index_msb = get_count_order(num_threads_sharing);
220 l3_id = c->apicid >> index_msb;
211 break; 221 break;
212 default: 222 default:
213 break; 223 break;
@@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
273 if (new_l1i) 283 if (new_l1i)
274 l1i = new_l1i; 284 l1i = new_l1i;
275 285
276 if (new_l2) 286 if (new_l2) {
277 l2 = new_l2; 287 l2 = new_l2;
288#ifdef CONFIG_SMP
289 cpu_llc_id[cpu] = l2_id;
290#endif
291 }
278 292
279 if (new_l3) 293 if (new_l3) {
280 l3 = new_l3; 294 l3 = new_l3;
295#ifdef CONFIG_SMP
296 cpu_llc_id[cpu] = l3_id;
297#endif
298 }
281 299
282 if ( trace ) 300 if ( trace )
283 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); 301 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 82371d83bfa9..a6969903f2d6 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
72/* Core ID of each logical CPU */ 72/* Core ID of each logical CPU */
73int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; 73int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
74 74
75/* Last level cache ID of each logical CPU */
76int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
77
75/* representing HT siblings of each logical CPU */ 78/* representing HT siblings of each logical CPU */
76cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 79cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
77EXPORT_SYMBOL(cpu_sibling_map); 80EXPORT_SYMBOL(cpu_sibling_map);
@@ -440,6 +443,18 @@ static void __devinit smp_callin(void)
440 443
441static int cpucount; 444static int cpucount;
442 445
446/* maps the cpu to the sched domain representing multi-core */
447cpumask_t cpu_coregroup_map(int cpu)
448{
449 struct cpuinfo_x86 *c = cpu_data + cpu;
450 /*
451 * For perf, we return last level cache shared map.
452 * TBD: when power saving sched policy is added, we will return
453 * cpu_core_map when power saving policy is enabled
454 */
455 return c->llc_shared_map;
456}
457
443/* representing cpus for which sibling maps can be computed */ 458/* representing cpus for which sibling maps can be computed */
444static cpumask_t cpu_sibling_setup_map; 459static cpumask_t cpu_sibling_setup_map;
445 460
@@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu)
459 cpu_set(cpu, cpu_sibling_map[i]); 474 cpu_set(cpu, cpu_sibling_map[i]);
460 cpu_set(i, cpu_core_map[cpu]); 475 cpu_set(i, cpu_core_map[cpu]);
461 cpu_set(cpu, cpu_core_map[i]); 476 cpu_set(cpu, cpu_core_map[i]);
477 cpu_set(i, c[cpu].llc_shared_map);
478 cpu_set(cpu, c[i].llc_shared_map);
462 } 479 }
463 } 480 }
464 } else { 481 } else {
465 cpu_set(cpu, cpu_sibling_map[cpu]); 482 cpu_set(cpu, cpu_sibling_map[cpu]);
466 } 483 }
467 484
485 cpu_set(cpu, c[cpu].llc_shared_map);
486
468 if (current_cpu_data.x86_max_cores == 1) { 487 if (current_cpu_data.x86_max_cores == 1) {
469 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 488 cpu_core_map[cpu] = cpu_sibling_map[cpu];
470 c[cpu].booted_cores = 1; 489 c[cpu].booted_cores = 1;
@@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu)
472 } 491 }
473 492
474 for_each_cpu_mask(i, cpu_sibling_setup_map) { 493 for_each_cpu_mask(i, cpu_sibling_setup_map) {
494 if (cpu_llc_id[cpu] != BAD_APICID &&
495 cpu_llc_id[cpu] == cpu_llc_id[i]) {
496 cpu_set(i, c[cpu].llc_shared_map);
497 cpu_set(cpu, c[i].llc_shared_map);
498 }
475 if (phys_proc_id[cpu] == phys_proc_id[i]) { 499 if (phys_proc_id[cpu] == phys_proc_id[i]) {
476 cpu_set(i, cpu_core_map[cpu]); 500 cpu_set(i, cpu_core_map[cpu]);
477 cpu_set(cpu, cpu_core_map[i]); 501 cpu_set(cpu, cpu_core_map[i]);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 45efe0ca88f8..1cb4aa241c8c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -250,6 +250,15 @@ config SCHED_SMT
250 cost of slightly increased overhead in some places. If unsure say 250 cost of slightly increased overhead in some places. If unsure say
251 N here. 251 N here.
252 252
253config SCHED_MC
254 bool "Multi-core scheduler support"
255 depends on SMP
256 default y
257 help
258 Multi-core scheduler support improves the CPU scheduler's decision
259 making when dealing with multi-core CPU chips at a cost of slightly
260 increased overhead in some places. If unsure say N here.
261
253source "kernel/Kconfig.preempt" 262source "kernel/Kconfig.preempt"
254 263
255config NUMA 264config NUMA
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index a57eec8311a7..d1f3e9272c05 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
962 962
963 cpuid(1, &eax, &ebx, &ecx, &edx); 963 cpuid(1, &eax, &ebx, &ecx, &edx);
964 964
965 c->apicid = phys_pkg_id(0);
966 965
967 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 966 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
968 return; 967 return;
@@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1171 c->x86_capability[2] = cpuid_edx(0x80860001); 1170 c->x86_capability[2] = cpuid_edx(0x80860001);
1172 } 1171 }
1173 1172
1173 c->apicid = phys_pkg_id(0);
1174
1174 /* 1175 /*
1175 * Vendor-specific initialization. In this section we 1176 * Vendor-specific initialization. In this section we
1176 * canonicalize the feature flags, meaning if there are 1177 * canonicalize the feature flags, meaning if there are
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 66e98659d077..ea48fa638070 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68/* core ID of each logical CPU */ 68/* core ID of each logical CPU */
69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
70 70
71/* Last level cache ID of each logical CPU */
72u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
73
71/* Bitmask of currently online CPUs */ 74/* Bitmask of currently online CPUs */
72cpumask_t cpu_online_map __read_mostly; 75cpumask_t cpu_online_map __read_mostly;
73 76
@@ -445,6 +448,18 @@ void __cpuinit smp_callin(void)
445 cpu_set(cpuid, cpu_callin_map); 448 cpu_set(cpuid, cpu_callin_map);
446} 449}
447 450
451/* maps the cpu to the sched domain representing multi-core */
452cpumask_t cpu_coregroup_map(int cpu)
453{
454 struct cpuinfo_x86 *c = cpu_data + cpu;
455 /*
456 * For perf, we return last level cache shared map.
457 * TBD: when power saving sched policy is added, we will return
458 * cpu_core_map when power saving policy is enabled
459 */
460 return c->llc_shared_map;
461}
462
448/* representing cpus for which sibling maps can be computed */ 463/* representing cpus for which sibling maps can be computed */
449static cpumask_t cpu_sibling_setup_map; 464static cpumask_t cpu_sibling_setup_map;
450 465
@@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu)
463 cpu_set(cpu, cpu_sibling_map[i]); 478 cpu_set(cpu, cpu_sibling_map[i]);
464 cpu_set(i, cpu_core_map[cpu]); 479 cpu_set(i, cpu_core_map[cpu]);
465 cpu_set(cpu, cpu_core_map[i]); 480 cpu_set(cpu, cpu_core_map[i]);
481 cpu_set(i, c[cpu].llc_shared_map);
482 cpu_set(cpu, c[i].llc_shared_map);
466 } 483 }
467 } 484 }
468 } else { 485 } else {
469 cpu_set(cpu, cpu_sibling_map[cpu]); 486 cpu_set(cpu, cpu_sibling_map[cpu]);
470 } 487 }
471 488
489 cpu_set(cpu, c[cpu].llc_shared_map);
490
472 if (current_cpu_data.x86_max_cores == 1) { 491 if (current_cpu_data.x86_max_cores == 1) {
473 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 492 cpu_core_map[cpu] = cpu_sibling_map[cpu];
474 c[cpu].booted_cores = 1; 493 c[cpu].booted_cores = 1;
@@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu)
476 } 495 }
477 496
478 for_each_cpu_mask(i, cpu_sibling_setup_map) { 497 for_each_cpu_mask(i, cpu_sibling_setup_map) {
498 if (cpu_llc_id[cpu] != BAD_APICID &&
499 cpu_llc_id[cpu] == cpu_llc_id[i]) {
500 cpu_set(i, c[cpu].llc_shared_map);
501 cpu_set(cpu, c[i].llc_shared_map);
502 }
479 if (phys_proc_id[cpu] == phys_proc_id[i]) { 503 if (phys_proc_id[cpu] == phys_proc_id[i]) {
480 cpu_set(i, cpu_core_map[cpu]); 504 cpu_set(i, cpu_core_map[cpu]);
481 cpu_set(cpu, cpu_core_map[i]); 505 cpu_set(cpu, cpu_core_map[i]);