aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/Kconfig9
-rw-r--r--arch/i386/kernel/cpu/common.c10
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c22
-rw-r--r--arch/i386/kernel/smpboot.c24
-rw-r--r--arch/x86_64/Kconfig9
-rw-r--r--arch/x86_64/kernel/setup.c3
-rw-r--r--arch/x86_64/kernel/smpboot.c24
-rw-r--r--include/asm-i386/processor.h5
-rw-r--r--include/asm-i386/topology.h2
-rw-r--r--include/asm-x86_64/processor.h4
-rw-r--r--include/asm-x86_64/smp.h1
-rw-r--r--include/asm-x86_64/topology.h2
-rw-r--r--include/linux/topology.h9
-rw-r--r--kernel/sched.c73
14 files changed, 186 insertions, 11 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index f7db71d0b913..f17bd1d2707e 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -231,6 +231,15 @@ config SCHED_SMT
231 cost of slightly increased overhead in some places. If unsure say 231 cost of slightly increased overhead in some places. If unsure say
232 N here. 232 N here.
233 233
234config SCHED_MC
235 bool "Multi-core scheduler support"
236 depends on SMP
237 default y
238 help
239 Multi-core scheduler support improves the CPU scheduler's decision
240 making when dealing with multi-core CPU chips at a cost of slightly
241 increased overhead in some places. If unsure say N here.
242
234source "kernel/Kconfig.preempt" 243source "kernel/Kconfig.preempt"
235 244
236config X86_UP_APIC 245config X86_UP_APIC
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 7e3d6b6a4e96..a06a49075f10 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -266,7 +266,7 @@ static void __init early_cpu_detect(void)
266void __cpuinit generic_identify(struct cpuinfo_x86 * c) 266void __cpuinit generic_identify(struct cpuinfo_x86 * c)
267{ 267{
268 u32 tfms, xlvl; 268 u32 tfms, xlvl;
269 int junk; 269 int ebx;
270 270
271 if (have_cpuid_p()) { 271 if (have_cpuid_p()) {
272 /* Get vendor name */ 272 /* Get vendor name */
@@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
282 /* Intel-defined flags: level 0x00000001 */ 282 /* Intel-defined flags: level 0x00000001 */
283 if ( c->cpuid_level >= 0x00000001 ) { 283 if ( c->cpuid_level >= 0x00000001 ) {
284 u32 capability, excap; 284 u32 capability, excap;
285 cpuid(0x00000001, &tfms, &junk, &excap, &capability); 285 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
286 c->x86_capability[0] = capability; 286 c->x86_capability[0] = capability;
287 c->x86_capability[4] = excap; 287 c->x86_capability[4] = excap;
288 c->x86 = (tfms >> 8) & 15; 288 c->x86 = (tfms >> 8) & 15;
@@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c)
292 if (c->x86 >= 0x6) 292 if (c->x86 >= 0x6)
293 c->x86_model += ((tfms >> 16) & 0xF) << 4; 293 c->x86_model += ((tfms >> 16) & 0xF) << 4;
294 c->x86_mask = tfms & 15; 294 c->x86_mask = tfms & 15;
295#ifdef CONFIG_SMP
296 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
297#else
298 c->apicid = (ebx >> 24) & 0xFF;
299#endif
295 } else { 300 } else {
296 /* Have CPUID level 0 only - unheard of */ 301 /* Have CPUID level 0 only - unheard of */
297 c->x86 = 4; 302 c->x86 = 4;
@@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
474 479
475 cpuid(1, &eax, &ebx, &ecx, &edx); 480 cpuid(1, &eax, &ebx, &ecx, &edx);
476 481
477 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
478 482
479 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 483 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
480 return; 484 return;
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index ce61921369e5..7e7fd4e67dd0 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
173 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 173 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
174 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 174 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
175 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 175 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
176 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
177#ifdef CONFIG_SMP
178 unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
179#endif
176 180
177 if (c->cpuid_level > 3) { 181 if (c->cpuid_level > 3) {
178 static int is_initialized; 182 static int is_initialized;
@@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
205 break; 209 break;
206 case 2: 210 case 2:
207 new_l2 = this_leaf.size/1024; 211 new_l2 = this_leaf.size/1024;
212 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
213 index_msb = get_count_order(num_threads_sharing);
214 l2_id = c->apicid >> index_msb;
208 break; 215 break;
209 case 3: 216 case 3:
210 new_l3 = this_leaf.size/1024; 217 new_l3 = this_leaf.size/1024;
218 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
219 index_msb = get_count_order(num_threads_sharing);
220 l3_id = c->apicid >> index_msb;
211 break; 221 break;
212 default: 222 default:
213 break; 223 break;
@@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
273 if (new_l1i) 283 if (new_l1i)
274 l1i = new_l1i; 284 l1i = new_l1i;
275 285
276 if (new_l2) 286 if (new_l2) {
277 l2 = new_l2; 287 l2 = new_l2;
288#ifdef CONFIG_SMP
289 cpu_llc_id[cpu] = l2_id;
290#endif
291 }
278 292
279 if (new_l3) 293 if (new_l3) {
280 l3 = new_l3; 294 l3 = new_l3;
295#ifdef CONFIG_SMP
296 cpu_llc_id[cpu] = l3_id;
297#endif
298 }
281 299
282 if ( trace ) 300 if ( trace )
283 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); 301 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 82371d83bfa9..a6969903f2d6 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
72/* Core ID of each logical CPU */ 72/* Core ID of each logical CPU */
73int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; 73int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
74 74
75/* Last level cache ID of each logical CPU */
76int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
77
75/* representing HT siblings of each logical CPU */ 78/* representing HT siblings of each logical CPU */
76cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 79cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
77EXPORT_SYMBOL(cpu_sibling_map); 80EXPORT_SYMBOL(cpu_sibling_map);
@@ -440,6 +443,18 @@ static void __devinit smp_callin(void)
440 443
441static int cpucount; 444static int cpucount;
442 445
446/* maps the cpu to the sched domain representing multi-core */
447cpumask_t cpu_coregroup_map(int cpu)
448{
449 struct cpuinfo_x86 *c = cpu_data + cpu;
450 /*
451 * For perf, we return last level cache shared map.
452 * TBD: when power saving sched policy is added, we will return
453 * cpu_core_map when power saving policy is enabled
454 */
455 return c->llc_shared_map;
456}
457
443/* representing cpus for which sibling maps can be computed */ 458/* representing cpus for which sibling maps can be computed */
444static cpumask_t cpu_sibling_setup_map; 459static cpumask_t cpu_sibling_setup_map;
445 460
@@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu)
459 cpu_set(cpu, cpu_sibling_map[i]); 474 cpu_set(cpu, cpu_sibling_map[i]);
460 cpu_set(i, cpu_core_map[cpu]); 475 cpu_set(i, cpu_core_map[cpu]);
461 cpu_set(cpu, cpu_core_map[i]); 476 cpu_set(cpu, cpu_core_map[i]);
477 cpu_set(i, c[cpu].llc_shared_map);
478 cpu_set(cpu, c[i].llc_shared_map);
462 } 479 }
463 } 480 }
464 } else { 481 } else {
465 cpu_set(cpu, cpu_sibling_map[cpu]); 482 cpu_set(cpu, cpu_sibling_map[cpu]);
466 } 483 }
467 484
485 cpu_set(cpu, c[cpu].llc_shared_map);
486
468 if (current_cpu_data.x86_max_cores == 1) { 487 if (current_cpu_data.x86_max_cores == 1) {
469 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 488 cpu_core_map[cpu] = cpu_sibling_map[cpu];
470 c[cpu].booted_cores = 1; 489 c[cpu].booted_cores = 1;
@@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu)
472 } 491 }
473 492
474 for_each_cpu_mask(i, cpu_sibling_setup_map) { 493 for_each_cpu_mask(i, cpu_sibling_setup_map) {
494 if (cpu_llc_id[cpu] != BAD_APICID &&
495 cpu_llc_id[cpu] == cpu_llc_id[i]) {
496 cpu_set(i, c[cpu].llc_shared_map);
497 cpu_set(cpu, c[i].llc_shared_map);
498 }
475 if (phys_proc_id[cpu] == phys_proc_id[i]) { 499 if (phys_proc_id[cpu] == phys_proc_id[i]) {
476 cpu_set(i, cpu_core_map[cpu]); 500 cpu_set(i, cpu_core_map[cpu]);
477 cpu_set(cpu, cpu_core_map[i]); 501 cpu_set(cpu, cpu_core_map[i]);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 45efe0ca88f8..1cb4aa241c8c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -250,6 +250,15 @@ config SCHED_SMT
250 cost of slightly increased overhead in some places. If unsure say 250 cost of slightly increased overhead in some places. If unsure say
251 N here. 251 N here.
252 252
253config SCHED_MC
254 bool "Multi-core scheduler support"
255 depends on SMP
256 default y
257 help
258 Multi-core scheduler support improves the CPU scheduler's decision
259 making when dealing with multi-core CPU chips at a cost of slightly
260 increased overhead in some places. If unsure say N here.
261
253source "kernel/Kconfig.preempt" 262source "kernel/Kconfig.preempt"
254 263
255config NUMA 264config NUMA
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index a57eec8311a7..d1f3e9272c05 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
962 962
963 cpuid(1, &eax, &ebx, &ecx, &edx); 963 cpuid(1, &eax, &ebx, &ecx, &edx);
964 964
965 c->apicid = phys_pkg_id(0);
966 965
967 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 966 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
968 return; 967 return;
@@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1171 c->x86_capability[2] = cpuid_edx(0x80860001); 1170 c->x86_capability[2] = cpuid_edx(0x80860001);
1172 } 1171 }
1173 1172
1173 c->apicid = phys_pkg_id(0);
1174
1174 /* 1175 /*
1175 * Vendor-specific initialization. In this section we 1176 * Vendor-specific initialization. In this section we
1176 * canonicalize the feature flags, meaning if there are 1177 * canonicalize the feature flags, meaning if there are
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 66e98659d077..ea48fa638070 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68/* core ID of each logical CPU */ 68/* core ID of each logical CPU */
69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 69u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
70 70
71/* Last level cache ID of each logical CPU */
72u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
73
71/* Bitmask of currently online CPUs */ 74/* Bitmask of currently online CPUs */
72cpumask_t cpu_online_map __read_mostly; 75cpumask_t cpu_online_map __read_mostly;
73 76
@@ -445,6 +448,18 @@ void __cpuinit smp_callin(void)
445 cpu_set(cpuid, cpu_callin_map); 448 cpu_set(cpuid, cpu_callin_map);
446} 449}
447 450
451/* maps the cpu to the sched domain representing multi-core */
452cpumask_t cpu_coregroup_map(int cpu)
453{
454 struct cpuinfo_x86 *c = cpu_data + cpu;
455 /*
456 * For perf, we return last level cache shared map.
457 * TBD: when power saving sched policy is added, we will return
458 * cpu_core_map when power saving policy is enabled
459 */
460 return c->llc_shared_map;
461}
462
448/* representing cpus for which sibling maps can be computed */ 463/* representing cpus for which sibling maps can be computed */
449static cpumask_t cpu_sibling_setup_map; 464static cpumask_t cpu_sibling_setup_map;
450 465
@@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu)
463 cpu_set(cpu, cpu_sibling_map[i]); 478 cpu_set(cpu, cpu_sibling_map[i]);
464 cpu_set(i, cpu_core_map[cpu]); 479 cpu_set(i, cpu_core_map[cpu]);
465 cpu_set(cpu, cpu_core_map[i]); 480 cpu_set(cpu, cpu_core_map[i]);
481 cpu_set(i, c[cpu].llc_shared_map);
482 cpu_set(cpu, c[i].llc_shared_map);
466 } 483 }
467 } 484 }
468 } else { 485 } else {
469 cpu_set(cpu, cpu_sibling_map[cpu]); 486 cpu_set(cpu, cpu_sibling_map[cpu]);
470 } 487 }
471 488
489 cpu_set(cpu, c[cpu].llc_shared_map);
490
472 if (current_cpu_data.x86_max_cores == 1) { 491 if (current_cpu_data.x86_max_cores == 1) {
473 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 492 cpu_core_map[cpu] = cpu_sibling_map[cpu];
474 c[cpu].booted_cores = 1; 493 c[cpu].booted_cores = 1;
@@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu)
476 } 495 }
477 496
478 for_each_cpu_mask(i, cpu_sibling_setup_map) { 497 for_each_cpu_mask(i, cpu_sibling_setup_map) {
498 if (cpu_llc_id[cpu] != BAD_APICID &&
499 cpu_llc_id[cpu] == cpu_llc_id[i]) {
500 cpu_set(i, c[cpu].llc_shared_map);
501 cpu_set(cpu, c[i].llc_shared_map);
502 }
479 if (phys_proc_id[cpu] == phys_proc_id[i]) { 503 if (phys_proc_id[cpu] == phys_proc_id[i]) {
480 cpu_set(i, cpu_core_map[cpu]); 504 cpu_set(i, cpu_core_map[cpu]);
481 cpu_set(cpu, cpu_core_map[i]); 505 cpu_set(cpu, cpu_core_map[i]);
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index feca5d961e2b..af4bfd012475 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -20,6 +20,7 @@
20#include <linux/config.h> 20#include <linux/config.h>
21#include <linux/threads.h> 21#include <linux/threads.h>
22#include <asm/percpu.h> 22#include <asm/percpu.h>
23#include <linux/cpumask.h>
23 24
24/* flag for disabling the tsc */ 25/* flag for disabling the tsc */
25extern int tsc_disable; 26extern int tsc_disable;
@@ -67,6 +68,9 @@ struct cpuinfo_x86 {
67 char pad0; 68 char pad0;
68 int x86_power; 69 int x86_power;
69 unsigned long loops_per_jiffy; 70 unsigned long loops_per_jiffy;
71#ifdef CONFIG_SMP
72 cpumask_t llc_shared_map; /* cpus sharing the last level cache */
73#endif
70 unsigned char x86_max_cores; /* cpuid returned max cores value */ 74 unsigned char x86_max_cores; /* cpuid returned max cores value */
71 unsigned char booted_cores; /* number of cores as seen by OS */ 75 unsigned char booted_cores; /* number of cores as seen by OS */
72 unsigned char apicid; 76 unsigned char apicid;
@@ -103,6 +107,7 @@ extern struct cpuinfo_x86 cpu_data[];
103 107
104extern int phys_proc_id[NR_CPUS]; 108extern int phys_proc_id[NR_CPUS];
105extern int cpu_core_id[NR_CPUS]; 109extern int cpu_core_id[NR_CPUS];
110extern int cpu_llc_id[NR_CPUS];
106extern char ignore_fpu_irq; 111extern char ignore_fpu_irq;
107 112
108extern void identify_cpu(struct cpuinfo_x86 *); 113extern void identify_cpu(struct cpuinfo_x86 *);
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h
index aa958c6ee83e..b94e5eeef917 100644
--- a/include/asm-i386/topology.h
+++ b/include/asm-i386/topology.h
@@ -112,4 +112,6 @@ extern unsigned long node_remap_size[];
112 112
113#endif /* CONFIG_NUMA */ 113#endif /* CONFIG_NUMA */
114 114
115extern cpumask_t cpu_coregroup_map(int cpu);
116
115#endif /* _ASM_I386_TOPOLOGY_H */ 117#endif /* _ASM_I386_TOPOLOGY_H */
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 8c8d88c036ed..1aa2cee43344 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -20,6 +20,7 @@
20#include <asm/mmsegment.h> 20#include <asm/mmsegment.h>
21#include <asm/percpu.h> 21#include <asm/percpu.h>
22#include <linux/personality.h> 22#include <linux/personality.h>
23#include <linux/cpumask.h>
23 24
24#define TF_MASK 0x00000100 25#define TF_MASK 0x00000100
25#define IF_MASK 0x00000200 26#define IF_MASK 0x00000200
@@ -65,6 +66,9 @@ struct cpuinfo_x86 {
65 __u32 x86_power; 66 __u32 x86_power;
66 __u32 extended_cpuid_level; /* Max extended CPUID function supported */ 67 __u32 extended_cpuid_level; /* Max extended CPUID function supported */
67 unsigned long loops_per_jiffy; 68 unsigned long loops_per_jiffy;
69#ifdef CONFIG_SMP
70 cpumask_t llc_shared_map; /* cpus sharing the last level cache */
71#endif
68 __u8 apicid; 72 __u8 apicid;
69 __u8 booted_cores; /* number of cores as seen by OS */ 73 __u8 booted_cores; /* number of cores as seen by OS */
70} ____cacheline_aligned; 74} ____cacheline_aligned;
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 9ccbb2cfd5c0..a4fdaeb5c397 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -56,6 +56,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS];
56extern cpumask_t cpu_core_map[NR_CPUS]; 56extern cpumask_t cpu_core_map[NR_CPUS];
57extern u8 phys_proc_id[NR_CPUS]; 57extern u8 phys_proc_id[NR_CPUS];
58extern u8 cpu_core_id[NR_CPUS]; 58extern u8 cpu_core_id[NR_CPUS];
59extern u8 cpu_llc_id[NR_CPUS];
59 60
60#define SMP_TRAMPOLINE_BASE 0x6000 61#define SMP_TRAMPOLINE_BASE 0x6000
61 62
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h
index c642f5d9882d..9db54e9d17bb 100644
--- a/include/asm-x86_64/topology.h
+++ b/include/asm-x86_64/topology.h
@@ -68,4 +68,6 @@ extern int __node_distance(int, int);
68 68
69#include <asm-generic/topology.h> 69#include <asm-generic/topology.h>
70 70
71extern cpumask_t cpu_coregroup_map(int cpu);
72
71#endif 73#endif
diff --git a/include/linux/topology.h b/include/linux/topology.h
index e8eb0040ce3a..a305ae2e44b6 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -164,6 +164,15 @@
164 .nr_balance_failed = 0, \ 164 .nr_balance_failed = 0, \
165} 165}
166 166
167#ifdef CONFIG_SCHED_MC
168#ifndef SD_MC_INIT
169/* for now its same as SD_CPU_INIT.
170 * TBD: Tune Domain parameters!
171 */
172#define SD_MC_INIT SD_CPU_INIT
173#endif
174#endif
175
167#ifdef CONFIG_NUMA 176#ifdef CONFIG_NUMA
168#ifndef SD_NODE_INIT 177#ifndef SD_NODE_INIT
169#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! 178#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
diff --git a/kernel/sched.c b/kernel/sched.c
index a96a05d23262..8a8b71b5751b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5574,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
5574} 5574}
5575#endif 5575#endif
5576 5576
5577#ifdef CONFIG_SCHED_MC
5578static DEFINE_PER_CPU(struct sched_domain, core_domains);
5579static struct sched_group sched_group_core[NR_CPUS];
5580#endif
5581
5582#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
5583static int cpu_to_core_group(int cpu)
5584{
5585 return first_cpu(cpu_sibling_map[cpu]);
5586}
5587#elif defined(CONFIG_SCHED_MC)
5588static int cpu_to_core_group(int cpu)
5589{
5590 return cpu;
5591}
5592#endif
5593
5577static DEFINE_PER_CPU(struct sched_domain, phys_domains); 5594static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5578static struct sched_group sched_group_phys[NR_CPUS]; 5595static struct sched_group sched_group_phys[NR_CPUS];
5579static int cpu_to_phys_group(int cpu) 5596static int cpu_to_phys_group(int cpu)
5580{ 5597{
5581#ifdef CONFIG_SCHED_SMT 5598#if defined(CONFIG_SCHED_MC)
5599 cpumask_t mask = cpu_coregroup_map(cpu);
5600 return first_cpu(mask);
5601#elif defined(CONFIG_SCHED_SMT)
5582 return first_cpu(cpu_sibling_map[cpu]); 5602 return first_cpu(cpu_sibling_map[cpu]);
5583#else 5603#else
5584 return cpu; 5604 return cpu;
@@ -5676,6 +5696,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
5676 sd->parent = p; 5696 sd->parent = p;
5677 sd->groups = &sched_group_phys[group]; 5697 sd->groups = &sched_group_phys[group];
5678 5698
5699#ifdef CONFIG_SCHED_MC
5700 p = sd;
5701 sd = &per_cpu(core_domains, i);
5702 group = cpu_to_core_group(i);
5703 *sd = SD_MC_INIT;
5704 sd->span = cpu_coregroup_map(i);
5705 cpus_and(sd->span, sd->span, *cpu_map);
5706 sd->parent = p;
5707 sd->groups = &sched_group_core[group];
5708#endif
5709
5679#ifdef CONFIG_SCHED_SMT 5710#ifdef CONFIG_SCHED_SMT
5680 p = sd; 5711 p = sd;
5681 sd = &per_cpu(cpu_domains, i); 5712 sd = &per_cpu(cpu_domains, i);
@@ -5701,6 +5732,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
5701 } 5732 }
5702#endif 5733#endif
5703 5734
5735#ifdef CONFIG_SCHED_MC
5736 /* Set up multi-core groups */
5737 for_each_cpu_mask(i, *cpu_map) {
5738 cpumask_t this_core_map = cpu_coregroup_map(i);
5739 cpus_and(this_core_map, this_core_map, *cpu_map);
5740 if (i != first_cpu(this_core_map))
5741 continue;
5742 init_sched_build_groups(sched_group_core, this_core_map,
5743 &cpu_to_core_group);
5744 }
5745#endif
5746
5747
5704 /* Set up physical groups */ 5748 /* Set up physical groups */
5705 for (i = 0; i < MAX_NUMNODES; i++) { 5749 for (i = 0; i < MAX_NUMNODES; i++) {
5706 cpumask_t nodemask = node_to_cpumask(i); 5750 cpumask_t nodemask = node_to_cpumask(i);
@@ -5797,11 +5841,31 @@ void build_sched_domains(const cpumask_t *cpu_map)
5797 power = SCHED_LOAD_SCALE; 5841 power = SCHED_LOAD_SCALE;
5798 sd->groups->cpu_power = power; 5842 sd->groups->cpu_power = power;
5799#endif 5843#endif
5844#ifdef CONFIG_SCHED_MC
5845 sd = &per_cpu(core_domains, i);
5846 power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
5847 * SCHED_LOAD_SCALE / 10;
5848 sd->groups->cpu_power = power;
5849
5850 sd = &per_cpu(phys_domains, i);
5800 5851
5852 /*
5853 * This has to be < 2 * SCHED_LOAD_SCALE
5854 * Lets keep it SCHED_LOAD_SCALE, so that
5855 * while calculating NUMA group's cpu_power
5856 * we can simply do
5857 * numa_group->cpu_power += phys_group->cpu_power;
5858 *
5859 * See "only add power once for each physical pkg"
5860 * comment below
5861 */
5862 sd->groups->cpu_power = SCHED_LOAD_SCALE;
5863#else
5801 sd = &per_cpu(phys_domains, i); 5864 sd = &per_cpu(phys_domains, i);
5802 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * 5865 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5803 (cpus_weight(sd->groups->cpumask)-1) / 10; 5866 (cpus_weight(sd->groups->cpumask)-1) / 10;
5804 sd->groups->cpu_power = power; 5867 sd->groups->cpu_power = power;
5868#endif
5805 5869
5806#ifdef CONFIG_NUMA 5870#ifdef CONFIG_NUMA
5807 sd = &per_cpu(allnodes_domains, i); 5871 sd = &per_cpu(allnodes_domains, i);
@@ -5823,7 +5887,6 @@ void build_sched_domains(const cpumask_t *cpu_map)
5823next_sg: 5887next_sg:
5824 for_each_cpu_mask(j, sg->cpumask) { 5888 for_each_cpu_mask(j, sg->cpumask) {
5825 struct sched_domain *sd; 5889 struct sched_domain *sd;
5826 int power;
5827 5890
5828 sd = &per_cpu(phys_domains, j); 5891 sd = &per_cpu(phys_domains, j);
5829 if (j != first_cpu(sd->groups->cpumask)) { 5892 if (j != first_cpu(sd->groups->cpumask)) {
@@ -5833,10 +5896,8 @@ next_sg:
5833 */ 5896 */
5834 continue; 5897 continue;
5835 } 5898 }
5836 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5837 (cpus_weight(sd->groups->cpumask)-1) / 10;
5838 5899
5839 sg->cpu_power += power; 5900 sg->cpu_power += sd->groups->cpu_power;
5840 } 5901 }
5841 sg = sg->next; 5902 sg = sg->next;
5842 if (sg != sched_group_nodes[i]) 5903 if (sg != sched_group_nodes[i])
@@ -5849,6 +5910,8 @@ next_sg:
5849 struct sched_domain *sd; 5910 struct sched_domain *sd;
5850#ifdef CONFIG_SCHED_SMT 5911#ifdef CONFIG_SCHED_SMT
5851 sd = &per_cpu(cpu_domains, i); 5912 sd = &per_cpu(cpu_domains, i);
5913#elif defined(CONFIG_SCHED_MC)
5914 sd = &per_cpu(core_domains, i);
5852#else 5915#else
5853 sd = &per_cpu(phys_domains, i); 5916 sd = &per_cpu(phys_domains, i);
5854#endif 5917#endif