diff options
| -rw-r--r-- | arch/i386/Kconfig | 9 | ||||
| -rw-r--r-- | arch/i386/kernel/cpu/common.c | 10 | ||||
| -rw-r--r-- | arch/i386/kernel/cpu/intel_cacheinfo.c | 22 | ||||
| -rw-r--r-- | arch/i386/kernel/smpboot.c | 24 | ||||
| -rw-r--r-- | arch/x86_64/Kconfig | 9 | ||||
| -rw-r--r-- | arch/x86_64/kernel/setup.c | 3 | ||||
| -rw-r--r-- | arch/x86_64/kernel/smpboot.c | 24 | ||||
| -rw-r--r-- | include/asm-i386/processor.h | 5 | ||||
| -rw-r--r-- | include/asm-i386/topology.h | 2 | ||||
| -rw-r--r-- | include/asm-x86_64/processor.h | 4 | ||||
| -rw-r--r-- | include/asm-x86_64/smp.h | 1 | ||||
| -rw-r--r-- | include/asm-x86_64/topology.h | 2 | ||||
| -rw-r--r-- | include/linux/topology.h | 9 | ||||
| -rw-r--r-- | kernel/sched.c | 73 |
14 files changed, 186 insertions, 11 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index f7db71d0b913..f17bd1d2707e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
| @@ -231,6 +231,15 @@ config SCHED_SMT | |||
| 231 | cost of slightly increased overhead in some places. If unsure say | 231 | cost of slightly increased overhead in some places. If unsure say |
| 232 | N here. | 232 | N here. |
| 233 | 233 | ||
| 234 | config SCHED_MC | ||
| 235 | bool "Multi-core scheduler support" | ||
| 236 | depends on SMP | ||
| 237 | default y | ||
| 238 | help | ||
| 239 | Multi-core scheduler support improves the CPU scheduler's decision | ||
| 240 | making when dealing with multi-core CPU chips at a cost of slightly | ||
| 241 | increased overhead in some places. If unsure say N here. | ||
| 242 | |||
| 234 | source "kernel/Kconfig.preempt" | 243 | source "kernel/Kconfig.preempt" |
| 235 | 244 | ||
| 236 | config X86_UP_APIC | 245 | config X86_UP_APIC |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6a4e96..a06a49075f10 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
| @@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) | |||
| 266 | void __cpuinit generic_identify(struct cpuinfo_x86 * c) | 266 | void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
| 267 | { | 267 | { |
| 268 | u32 tfms, xlvl; | 268 | u32 tfms, xlvl; |
| 269 | int junk; | 269 | int ebx; |
| 270 | 270 | ||
| 271 | if (have_cpuid_p()) { | 271 | if (have_cpuid_p()) { |
| 272 | /* Get vendor name */ | 272 | /* Get vendor name */ |
| @@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
| 282 | /* Intel-defined flags: level 0x00000001 */ | 282 | /* Intel-defined flags: level 0x00000001 */ |
| 283 | if ( c->cpuid_level >= 0x00000001 ) { | 283 | if ( c->cpuid_level >= 0x00000001 ) { |
| 284 | u32 capability, excap; | 284 | u32 capability, excap; |
| 285 | cpuid(0x00000001, &tfms, &junk, &excap, &capability); | 285 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); |
| 286 | c->x86_capability[0] = capability; | 286 | c->x86_capability[0] = capability; |
| 287 | c->x86_capability[4] = excap; | 287 | c->x86_capability[4] = excap; |
| 288 | c->x86 = (tfms >> 8) & 15; | 288 | c->x86 = (tfms >> 8) & 15; |
| @@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) | |||
| 292 | if (c->x86 >= 0x6) | 292 | if (c->x86 >= 0x6) |
| 293 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 293 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
| 294 | c->x86_mask = tfms & 15; | 294 | c->x86_mask = tfms & 15; |
| 295 | #ifdef CONFIG_SMP | ||
| 296 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); | ||
| 297 | #else | ||
| 298 | c->apicid = (ebx >> 24) & 0xFF; | ||
| 299 | #endif | ||
| 295 | } else { | 300 | } else { |
| 296 | /* Have CPUID level 0 only - unheard of */ | 301 | /* Have CPUID level 0 only - unheard of */ |
| 297 | c->x86 = 4; | 302 | c->x86 = 4; |
| @@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 474 | 479 | ||
| 475 | cpuid(1, &eax, &ebx, &ecx, &edx); | 480 | cpuid(1, &eax, &ebx, &ecx, &edx); |
| 476 | 481 | ||
| 477 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); | ||
| 478 | 482 | ||
| 479 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 483 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
| 480 | return; | 484 | return; |
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921369e5..7e7fd4e67dd0 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
| @@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 173 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ | 173 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ |
| 174 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ | 174 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
| 175 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ | 175 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
| 176 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; | ||
| 177 | #ifdef CONFIG_SMP | ||
| 178 | unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); | ||
| 179 | #endif | ||
| 176 | 180 | ||
| 177 | if (c->cpuid_level > 3) { | 181 | if (c->cpuid_level > 3) { |
| 178 | static int is_initialized; | 182 | static int is_initialized; |
| @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 205 | break; | 209 | break; |
| 206 | case 2: | 210 | case 2: |
| 207 | new_l2 = this_leaf.size/1024; | 211 | new_l2 = this_leaf.size/1024; |
| 212 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | ||
| 213 | index_msb = get_count_order(num_threads_sharing); | ||
| 214 | l2_id = c->apicid >> index_msb; | ||
| 208 | break; | 215 | break; |
| 209 | case 3: | 216 | case 3: |
| 210 | new_l3 = this_leaf.size/1024; | 217 | new_l3 = this_leaf.size/1024; |
| 218 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | ||
| 219 | index_msb = get_count_order(num_threads_sharing); | ||
| 220 | l3_id = c->apicid >> index_msb; | ||
| 211 | break; | 221 | break; |
| 212 | default: | 222 | default: |
| 213 | break; | 223 | break; |
| @@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
| 273 | if (new_l1i) | 283 | if (new_l1i) |
| 274 | l1i = new_l1i; | 284 | l1i = new_l1i; |
| 275 | 285 | ||
| 276 | if (new_l2) | 286 | if (new_l2) { |
| 277 | l2 = new_l2; | 287 | l2 = new_l2; |
| 288 | #ifdef CONFIG_SMP | ||
| 289 | cpu_llc_id[cpu] = l2_id; | ||
| 290 | #endif | ||
| 291 | } | ||
| 278 | 292 | ||
| 279 | if (new_l3) | 293 | if (new_l3) { |
| 280 | l3 = new_l3; | 294 | l3 = new_l3; |
| 295 | #ifdef CONFIG_SMP | ||
| 296 | cpu_llc_id[cpu] = l3_id; | ||
| 297 | #endif | ||
| 298 | } | ||
| 281 | 299 | ||
| 282 | if ( trace ) | 300 | if ( trace ) |
| 283 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); | 301 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d83bfa9..a6969903f2d6 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
| @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | |||
| 72 | /* Core ID of each logical CPU */ | 72 | /* Core ID of each logical CPU */ |
| 73 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | 73 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; |
| 74 | 74 | ||
| 75 | /* Last level cache ID of each logical CPU */ | ||
| 76 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
| 77 | |||
| 75 | /* representing HT siblings of each logical CPU */ | 78 | /* representing HT siblings of each logical CPU */ |
| 76 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 79 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
| 77 | EXPORT_SYMBOL(cpu_sibling_map); | 80 | EXPORT_SYMBOL(cpu_sibling_map); |
| @@ -440,6 +443,18 @@ static void __devinit smp_callin(void) | |||
| 440 | 443 | ||
| 441 | static int cpucount; | 444 | static int cpucount; |
| 442 | 445 | ||
| 446 | /* maps the cpu to the sched domain representing multi-core */ | ||
| 447 | cpumask_t cpu_coregroup_map(int cpu) | ||
| 448 | { | ||
| 449 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
| 450 | /* | ||
| 451 | * For perf, we return last level cache shared map. | ||
| 452 | * TBD: when power saving sched policy is added, we will return | ||
| 453 | * cpu_core_map when power saving policy is enabled | ||
| 454 | */ | ||
| 455 | return c->llc_shared_map; | ||
| 456 | } | ||
| 457 | |||
| 443 | /* representing cpus for which sibling maps can be computed */ | 458 | /* representing cpus for which sibling maps can be computed */ |
| 444 | static cpumask_t cpu_sibling_setup_map; | 459 | static cpumask_t cpu_sibling_setup_map; |
| 445 | 460 | ||
| @@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) | |||
| 459 | cpu_set(cpu, cpu_sibling_map[i]); | 474 | cpu_set(cpu, cpu_sibling_map[i]); |
| 460 | cpu_set(i, cpu_core_map[cpu]); | 475 | cpu_set(i, cpu_core_map[cpu]); |
| 461 | cpu_set(cpu, cpu_core_map[i]); | 476 | cpu_set(cpu, cpu_core_map[i]); |
| 477 | cpu_set(i, c[cpu].llc_shared_map); | ||
| 478 | cpu_set(cpu, c[i].llc_shared_map); | ||
| 462 | } | 479 | } |
| 463 | } | 480 | } |
| 464 | } else { | 481 | } else { |
| 465 | cpu_set(cpu, cpu_sibling_map[cpu]); | 482 | cpu_set(cpu, cpu_sibling_map[cpu]); |
| 466 | } | 483 | } |
| 467 | 484 | ||
| 485 | cpu_set(cpu, c[cpu].llc_shared_map); | ||
| 486 | |||
| 468 | if (current_cpu_data.x86_max_cores == 1) { | 487 | if (current_cpu_data.x86_max_cores == 1) { |
| 469 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 488 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
| 470 | c[cpu].booted_cores = 1; | 489 | c[cpu].booted_cores = 1; |
| @@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) | |||
| 472 | } | 491 | } |
| 473 | 492 | ||
| 474 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 493 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
| 494 | if (cpu_llc_id[cpu] != BAD_APICID && | ||
| 495 | cpu_llc_id[cpu] == cpu_llc_id[i]) { | ||
| 496 | cpu_set(i, c[cpu].llc_shared_map); | ||
| 497 | cpu_set(cpu, c[i].llc_shared_map); | ||
| 498 | } | ||
| 475 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 499 | if (phys_proc_id[cpu] == phys_proc_id[i]) { |
| 476 | cpu_set(i, cpu_core_map[cpu]); | 500 | cpu_set(i, cpu_core_map[cpu]); |
| 477 | cpu_set(cpu, cpu_core_map[i]); | 501 | cpu_set(cpu, cpu_core_map[i]); |
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 45efe0ca88f8..1cb4aa241c8c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
| @@ -250,6 +250,15 @@ config SCHED_SMT | |||
| 250 | cost of slightly increased overhead in some places. If unsure say | 250 | cost of slightly increased overhead in some places. If unsure say |
| 251 | N here. | 251 | N here. |
| 252 | 252 | ||
| 253 | config SCHED_MC | ||
| 254 | bool "Multi-core scheduler support" | ||
| 255 | depends on SMP | ||
| 256 | default y | ||
| 257 | help | ||
| 258 | Multi-core scheduler support improves the CPU scheduler's decision | ||
| 259 | making when dealing with multi-core CPU chips at a cost of slightly | ||
| 260 | increased overhead in some places. If unsure say N here. | ||
| 261 | |||
| 253 | source "kernel/Kconfig.preempt" | 262 | source "kernel/Kconfig.preempt" |
| 254 | 263 | ||
| 255 | config NUMA | 264 | config NUMA |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a57eec8311a7..d1f3e9272c05 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
| @@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
| 962 | 962 | ||
| 963 | cpuid(1, &eax, &ebx, &ecx, &edx); | 963 | cpuid(1, &eax, &ebx, &ecx, &edx); |
| 964 | 964 | ||
| 965 | c->apicid = phys_pkg_id(0); | ||
| 966 | 965 | ||
| 967 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 966 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
| 968 | return; | 967 | return; |
| @@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 1171 | c->x86_capability[2] = cpuid_edx(0x80860001); | 1170 | c->x86_capability[2] = cpuid_edx(0x80860001); |
| 1172 | } | 1171 | } |
| 1173 | 1172 | ||
| 1173 | c->apicid = phys_pkg_id(0); | ||
| 1174 | |||
| 1174 | /* | 1175 | /* |
| 1175 | * Vendor-specific initialization. In this section we | 1176 | * Vendor-specific initialization. In this section we |
| 1176 | * canonicalize the feature flags, meaning if there are | 1177 | * canonicalize the feature flags, meaning if there are |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 66e98659d077..ea48fa638070 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
| @@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | |||
| 68 | /* core ID of each logical CPU */ | 68 | /* core ID of each logical CPU */ |
| 69 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 69 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
| 70 | 70 | ||
| 71 | /* Last level cache ID of each logical CPU */ | ||
| 72 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
| 73 | |||
| 71 | /* Bitmask of currently online CPUs */ | 74 | /* Bitmask of currently online CPUs */ |
| 72 | cpumask_t cpu_online_map __read_mostly; | 75 | cpumask_t cpu_online_map __read_mostly; |
| 73 | 76 | ||
| @@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) | |||
| 445 | cpu_set(cpuid, cpu_callin_map); | 448 | cpu_set(cpuid, cpu_callin_map); |
| 446 | } | 449 | } |
| 447 | 450 | ||
| 451 | /* maps the cpu to the sched domain representing multi-core */ | ||
| 452 | cpumask_t cpu_coregroup_map(int cpu) | ||
| 453 | { | ||
| 454 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
| 455 | /* | ||
| 456 | * For perf, we return last level cache shared map. | ||
| 457 | * TBD: when power saving sched policy is added, we will return | ||
| 458 | * cpu_core_map when power saving policy is enabled | ||
| 459 | */ | ||
| 460 | return c->llc_shared_map; | ||
| 461 | } | ||
| 462 | |||
| 448 | /* representing cpus for which sibling maps can be computed */ | 463 | /* representing cpus for which sibling maps can be computed */ |
| 449 | static cpumask_t cpu_sibling_setup_map; | 464 | static cpumask_t cpu_sibling_setup_map; |
| 450 | 465 | ||
| @@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) | |||
| 463 | cpu_set(cpu, cpu_sibling_map[i]); | 478 | cpu_set(cpu, cpu_sibling_map[i]); |
| 464 | cpu_set(i, cpu_core_map[cpu]); | 479 | cpu_set(i, cpu_core_map[cpu]); |
| 465 | cpu_set(cpu, cpu_core_map[i]); | 480 | cpu_set(cpu, cpu_core_map[i]); |
| 481 | cpu_set(i, c[cpu].llc_shared_map); | ||
| 482 | cpu_set(cpu, c[i].llc_shared_map); | ||
| 466 | } | 483 | } |
| 467 | } | 484 | } |
| 468 | } else { | 485 | } else { |
| 469 | cpu_set(cpu, cpu_sibling_map[cpu]); | 486 | cpu_set(cpu, cpu_sibling_map[cpu]); |
| 470 | } | 487 | } |
| 471 | 488 | ||
| 489 | cpu_set(cpu, c[cpu].llc_shared_map); | ||
| 490 | |||
| 472 | if (current_cpu_data.x86_max_cores == 1) { | 491 | if (current_cpu_data.x86_max_cores == 1) { |
| 473 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 492 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
| 474 | c[cpu].booted_cores = 1; | 493 | c[cpu].booted_cores = 1; |
| @@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) | |||
| 476 | } | 495 | } |
| 477 | 496 | ||
| 478 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 497 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
| 498 | if (cpu_llc_id[cpu] != BAD_APICID && | ||
| 499 | cpu_llc_id[cpu] == cpu_llc_id[i]) { | ||
| 500 | cpu_set(i, c[cpu].llc_shared_map); | ||
| 501 | cpu_set(cpu, c[i].llc_shared_map); | ||
| 502 | } | ||
| 479 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | 503 | if (phys_proc_id[cpu] == phys_proc_id[i]) { |
| 480 | cpu_set(i, cpu_core_map[cpu]); | 504 | cpu_set(i, cpu_core_map[cpu]); |
| 481 | cpu_set(cpu, cpu_core_map[i]); | 505 | cpu_set(cpu, cpu_core_map[i]); |
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index feca5d961e2b..af4bfd012475 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/config.h> | 20 | #include <linux/config.h> |
| 21 | #include <linux/threads.h> | 21 | #include <linux/threads.h> |
| 22 | #include <asm/percpu.h> | 22 | #include <asm/percpu.h> |
| 23 | #include <linux/cpumask.h> | ||
| 23 | 24 | ||
| 24 | /* flag for disabling the tsc */ | 25 | /* flag for disabling the tsc */ |
| 25 | extern int tsc_disable; | 26 | extern int tsc_disable; |
| @@ -67,6 +68,9 @@ struct cpuinfo_x86 { | |||
| 67 | char pad0; | 68 | char pad0; |
| 68 | int x86_power; | 69 | int x86_power; |
| 69 | unsigned long loops_per_jiffy; | 70 | unsigned long loops_per_jiffy; |
| 71 | #ifdef CONFIG_SMP | ||
| 72 | cpumask_t llc_shared_map; /* cpus sharing the last level cache */ | ||
| 73 | #endif | ||
| 70 | unsigned char x86_max_cores; /* cpuid returned max cores value */ | 74 | unsigned char x86_max_cores; /* cpuid returned max cores value */ |
| 71 | unsigned char booted_cores; /* number of cores as seen by OS */ | 75 | unsigned char booted_cores; /* number of cores as seen by OS */ |
| 72 | unsigned char apicid; | 76 | unsigned char apicid; |
| @@ -103,6 +107,7 @@ extern struct cpuinfo_x86 cpu_data[]; | |||
| 103 | 107 | ||
| 104 | extern int phys_proc_id[NR_CPUS]; | 108 | extern int phys_proc_id[NR_CPUS]; |
| 105 | extern int cpu_core_id[NR_CPUS]; | 109 | extern int cpu_core_id[NR_CPUS]; |
| 110 | extern int cpu_llc_id[NR_CPUS]; | ||
| 106 | extern char ignore_fpu_irq; | 111 | extern char ignore_fpu_irq; |
| 107 | 112 | ||
| 108 | extern void identify_cpu(struct cpuinfo_x86 *); | 113 | extern void identify_cpu(struct cpuinfo_x86 *); |
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index aa958c6ee83e..b94e5eeef917 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h | |||
| @@ -112,4 +112,6 @@ extern unsigned long node_remap_size[]; | |||
| 112 | 112 | ||
| 113 | #endif /* CONFIG_NUMA */ | 113 | #endif /* CONFIG_NUMA */ |
| 114 | 114 | ||
| 115 | extern cpumask_t cpu_coregroup_map(int cpu); | ||
| 116 | |||
| 115 | #endif /* _ASM_I386_TOPOLOGY_H */ | 117 | #endif /* _ASM_I386_TOPOLOGY_H */ |
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 8c8d88c036ed..1aa2cee43344 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <asm/mmsegment.h> | 20 | #include <asm/mmsegment.h> |
| 21 | #include <asm/percpu.h> | 21 | #include <asm/percpu.h> |
| 22 | #include <linux/personality.h> | 22 | #include <linux/personality.h> |
| 23 | #include <linux/cpumask.h> | ||
| 23 | 24 | ||
| 24 | #define TF_MASK 0x00000100 | 25 | #define TF_MASK 0x00000100 |
| 25 | #define IF_MASK 0x00000200 | 26 | #define IF_MASK 0x00000200 |
| @@ -65,6 +66,9 @@ struct cpuinfo_x86 { | |||
| 65 | __u32 x86_power; | 66 | __u32 x86_power; |
| 66 | __u32 extended_cpuid_level; /* Max extended CPUID function supported */ | 67 | __u32 extended_cpuid_level; /* Max extended CPUID function supported */ |
| 67 | unsigned long loops_per_jiffy; | 68 | unsigned long loops_per_jiffy; |
| 69 | #ifdef CONFIG_SMP | ||
| 70 | cpumask_t llc_shared_map; /* cpus sharing the last level cache */ | ||
| 71 | #endif | ||
| 68 | __u8 apicid; | 72 | __u8 apicid; |
| 69 | __u8 booted_cores; /* number of cores as seen by OS */ | 73 | __u8 booted_cores; /* number of cores as seen by OS */ |
| 70 | } ____cacheline_aligned; | 74 | } ____cacheline_aligned; |
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 9ccbb2cfd5c0..a4fdaeb5c397 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h | |||
| @@ -56,6 +56,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS]; | |||
| 56 | extern cpumask_t cpu_core_map[NR_CPUS]; | 56 | extern cpumask_t cpu_core_map[NR_CPUS]; |
| 57 | extern u8 phys_proc_id[NR_CPUS]; | 57 | extern u8 phys_proc_id[NR_CPUS]; |
| 58 | extern u8 cpu_core_id[NR_CPUS]; | 58 | extern u8 cpu_core_id[NR_CPUS]; |
| 59 | extern u8 cpu_llc_id[NR_CPUS]; | ||
| 59 | 60 | ||
| 60 | #define SMP_TRAMPOLINE_BASE 0x6000 | 61 | #define SMP_TRAMPOLINE_BASE 0x6000 |
| 61 | 62 | ||
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index c642f5d9882d..9db54e9d17bb 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h | |||
| @@ -68,4 +68,6 @@ extern int __node_distance(int, int); | |||
| 68 | 68 | ||
| 69 | #include <asm-generic/topology.h> | 69 | #include <asm-generic/topology.h> |
| 70 | 70 | ||
| 71 | extern cpumask_t cpu_coregroup_map(int cpu); | ||
| 72 | |||
| 71 | #endif | 73 | #endif |
diff --git a/include/linux/topology.h b/include/linux/topology.h index e8eb0040ce3a..a305ae2e44b6 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -164,6 +164,15 @@ | |||
| 164 | .nr_balance_failed = 0, \ | 164 | .nr_balance_failed = 0, \ |
| 165 | } | 165 | } |
| 166 | 166 | ||
| 167 | #ifdef CONFIG_SCHED_MC | ||
| 168 | #ifndef SD_MC_INIT | ||
| 169 | /* for now its same as SD_CPU_INIT. | ||
| 170 | * TBD: Tune Domain parameters! | ||
| 171 | */ | ||
| 172 | #define SD_MC_INIT SD_CPU_INIT | ||
| 173 | #endif | ||
| 174 | #endif | ||
| 175 | |||
| 167 | #ifdef CONFIG_NUMA | 176 | #ifdef CONFIG_NUMA |
| 168 | #ifndef SD_NODE_INIT | 177 | #ifndef SD_NODE_INIT |
| 169 | #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! | 178 | #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! |
diff --git a/kernel/sched.c b/kernel/sched.c index a96a05d23262..8a8b71b5751b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -5574,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu) | |||
| 5574 | } | 5574 | } |
| 5575 | #endif | 5575 | #endif |
| 5576 | 5576 | ||
| 5577 | #ifdef CONFIG_SCHED_MC | ||
| 5578 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | ||
| 5579 | static struct sched_group sched_group_core[NR_CPUS]; | ||
| 5580 | #endif | ||
| 5581 | |||
| 5582 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | ||
| 5583 | static int cpu_to_core_group(int cpu) | ||
| 5584 | { | ||
| 5585 | return first_cpu(cpu_sibling_map[cpu]); | ||
| 5586 | } | ||
| 5587 | #elif defined(CONFIG_SCHED_MC) | ||
| 5588 | static int cpu_to_core_group(int cpu) | ||
| 5589 | { | ||
| 5590 | return cpu; | ||
| 5591 | } | ||
| 5592 | #endif | ||
| 5593 | |||
| 5577 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 5594 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
| 5578 | static struct sched_group sched_group_phys[NR_CPUS]; | 5595 | static struct sched_group sched_group_phys[NR_CPUS]; |
| 5579 | static int cpu_to_phys_group(int cpu) | 5596 | static int cpu_to_phys_group(int cpu) |
| 5580 | { | 5597 | { |
| 5581 | #ifdef CONFIG_SCHED_SMT | 5598 | #if defined(CONFIG_SCHED_MC) |
| 5599 | cpumask_t mask = cpu_coregroup_map(cpu); | ||
| 5600 | return first_cpu(mask); | ||
| 5601 | #elif defined(CONFIG_SCHED_SMT) | ||
| 5582 | return first_cpu(cpu_sibling_map[cpu]); | 5602 | return first_cpu(cpu_sibling_map[cpu]); |
| 5583 | #else | 5603 | #else |
| 5584 | return cpu; | 5604 | return cpu; |
| @@ -5676,6 +5696,17 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5676 | sd->parent = p; | 5696 | sd->parent = p; |
| 5677 | sd->groups = &sched_group_phys[group]; | 5697 | sd->groups = &sched_group_phys[group]; |
| 5678 | 5698 | ||
| 5699 | #ifdef CONFIG_SCHED_MC | ||
| 5700 | p = sd; | ||
| 5701 | sd = &per_cpu(core_domains, i); | ||
| 5702 | group = cpu_to_core_group(i); | ||
| 5703 | *sd = SD_MC_INIT; | ||
| 5704 | sd->span = cpu_coregroup_map(i); | ||
| 5705 | cpus_and(sd->span, sd->span, *cpu_map); | ||
| 5706 | sd->parent = p; | ||
| 5707 | sd->groups = &sched_group_core[group]; | ||
| 5708 | #endif | ||
| 5709 | |||
| 5679 | #ifdef CONFIG_SCHED_SMT | 5710 | #ifdef CONFIG_SCHED_SMT |
| 5680 | p = sd; | 5711 | p = sd; |
| 5681 | sd = &per_cpu(cpu_domains, i); | 5712 | sd = &per_cpu(cpu_domains, i); |
| @@ -5701,6 +5732,19 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5701 | } | 5732 | } |
| 5702 | #endif | 5733 | #endif |
| 5703 | 5734 | ||
| 5735 | #ifdef CONFIG_SCHED_MC | ||
| 5736 | /* Set up multi-core groups */ | ||
| 5737 | for_each_cpu_mask(i, *cpu_map) { | ||
| 5738 | cpumask_t this_core_map = cpu_coregroup_map(i); | ||
| 5739 | cpus_and(this_core_map, this_core_map, *cpu_map); | ||
| 5740 | if (i != first_cpu(this_core_map)) | ||
| 5741 | continue; | ||
| 5742 | init_sched_build_groups(sched_group_core, this_core_map, | ||
| 5743 | &cpu_to_core_group); | ||
| 5744 | } | ||
| 5745 | #endif | ||
| 5746 | |||
| 5747 | |||
| 5704 | /* Set up physical groups */ | 5748 | /* Set up physical groups */ |
| 5705 | for (i = 0; i < MAX_NUMNODES; i++) { | 5749 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 5706 | cpumask_t nodemask = node_to_cpumask(i); | 5750 | cpumask_t nodemask = node_to_cpumask(i); |
| @@ -5797,11 +5841,31 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5797 | power = SCHED_LOAD_SCALE; | 5841 | power = SCHED_LOAD_SCALE; |
| 5798 | sd->groups->cpu_power = power; | 5842 | sd->groups->cpu_power = power; |
| 5799 | #endif | 5843 | #endif |
| 5844 | #ifdef CONFIG_SCHED_MC | ||
| 5845 | sd = &per_cpu(core_domains, i); | ||
| 5846 | power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) | ||
| 5847 | * SCHED_LOAD_SCALE / 10; | ||
| 5848 | sd->groups->cpu_power = power; | ||
| 5849 | |||
| 5850 | sd = &per_cpu(phys_domains, i); | ||
| 5800 | 5851 | ||
| 5852 | /* | ||
| 5853 | * This has to be < 2 * SCHED_LOAD_SCALE | ||
| 5854 | * Lets keep it SCHED_LOAD_SCALE, so that | ||
| 5855 | * while calculating NUMA group's cpu_power | ||
| 5856 | * we can simply do | ||
| 5857 | * numa_group->cpu_power += phys_group->cpu_power; | ||
| 5858 | * | ||
| 5859 | * See "only add power once for each physical pkg" | ||
| 5860 | * comment below | ||
| 5861 | */ | ||
| 5862 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
| 5863 | #else | ||
| 5801 | sd = &per_cpu(phys_domains, i); | 5864 | sd = &per_cpu(phys_domains, i); |
| 5802 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | 5865 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * |
| 5803 | (cpus_weight(sd->groups->cpumask)-1) / 10; | 5866 | (cpus_weight(sd->groups->cpumask)-1) / 10; |
| 5804 | sd->groups->cpu_power = power; | 5867 | sd->groups->cpu_power = power; |
| 5868 | #endif | ||
| 5805 | 5869 | ||
| 5806 | #ifdef CONFIG_NUMA | 5870 | #ifdef CONFIG_NUMA |
| 5807 | sd = &per_cpu(allnodes_domains, i); | 5871 | sd = &per_cpu(allnodes_domains, i); |
| @@ -5823,7 +5887,6 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5823 | next_sg: | 5887 | next_sg: |
| 5824 | for_each_cpu_mask(j, sg->cpumask) { | 5888 | for_each_cpu_mask(j, sg->cpumask) { |
| 5825 | struct sched_domain *sd; | 5889 | struct sched_domain *sd; |
| 5826 | int power; | ||
| 5827 | 5890 | ||
| 5828 | sd = &per_cpu(phys_domains, j); | 5891 | sd = &per_cpu(phys_domains, j); |
| 5829 | if (j != first_cpu(sd->groups->cpumask)) { | 5892 | if (j != first_cpu(sd->groups->cpumask)) { |
| @@ -5833,10 +5896,8 @@ next_sg: | |||
| 5833 | */ | 5896 | */ |
| 5834 | continue; | 5897 | continue; |
| 5835 | } | 5898 | } |
| 5836 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
| 5837 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
| 5838 | 5899 | ||
| 5839 | sg->cpu_power += power; | 5900 | sg->cpu_power += sd->groups->cpu_power; |
| 5840 | } | 5901 | } |
| 5841 | sg = sg->next; | 5902 | sg = sg->next; |
| 5842 | if (sg != sched_group_nodes[i]) | 5903 | if (sg != sched_group_nodes[i]) |
| @@ -5849,6 +5910,8 @@ next_sg: | |||
| 5849 | struct sched_domain *sd; | 5910 | struct sched_domain *sd; |
| 5850 | #ifdef CONFIG_SCHED_SMT | 5911 | #ifdef CONFIG_SCHED_SMT |
| 5851 | sd = &per_cpu(cpu_domains, i); | 5912 | sd = &per_cpu(cpu_domains, i); |
| 5913 | #elif defined(CONFIG_SCHED_MC) | ||
| 5914 | sd = &per_cpu(core_domains, i); | ||
| 5852 | #else | 5915 | #else |
| 5853 | sd = &per_cpu(phys_domains, i); | 5916 | sd = &per_cpu(phys_domains, i); |
| 5854 | #endif | 5917 | #endif |
