diff options
author | Siddha, Suresh B <suresh.b.siddha@intel.com> | 2005-11-05 11:25:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-14 22:55:16 -0500 |
commit | 94605eff572b727aaad9b4b29bc358b919096503 (patch) | |
tree | 657a848d8ef34d2f94bbad3aa4e5458d2d3f2d2b | |
parent | e90f22edf432512219cc2952f5811961abbd164f (diff) |
[PATCH] x86-64/i386: Intel HT, Multi core detection fixes
Fields obtained through cpuid vector 0x1(ebx[16:23]) and
vector 0x4(eax[14:25], eax[26:31]) indicate the maximum values and might not
always be the same as what is available and what OS sees. So make sure
"siblings" and "cpu cores" values in /proc/cpuinfo reflect the values as seen
by OS instead of what cpuid instruction says. This will also fix the buggy BIOS
cases (for example where cpuid on a single core cpu says there are "2" siblings,
even when HT is disabled in the BIOS.
http://bugzilla.kernel.org/show_bug.cgi?id=4359)
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/i386/kernel/cpu/amd.c | 12 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/common.c | 36 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/intel.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/intel_cacheinfo.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/proc.c | 7 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 73 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 69 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 69 | ||||
-rw-r--r-- | include/asm-i386/processor.h | 4 | ||||
-rw-r--r-- | include/asm-x86_64/processor.h | 4 | ||||
-rw-r--r-- | include/linux/bitops.h | 10 |
11 files changed, 178 insertions, 110 deletions
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 53a1681cd964..e344ef88cfcd 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
206 | display_cacheinfo(c); | 206 | display_cacheinfo(c); |
207 | 207 | ||
208 | if (cpuid_eax(0x80000000) >= 0x80000008) { | 208 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
209 | c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 209 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
210 | if (c->x86_num_cores & (c->x86_num_cores - 1)) | 210 | if (c->x86_max_cores & (c->x86_max_cores - 1)) |
211 | c->x86_num_cores = 1; | 211 | c->x86_max_cores = 1; |
212 | } | 212 | } |
213 | 213 | ||
214 | #ifdef CONFIG_X86_HT | 214 | #ifdef CONFIG_X86_HT |
@@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
217 | * distingush the cores. Assumes number of cores is a power | 217 | * distingush the cores. Assumes number of cores is a power |
218 | * of two. | 218 | * of two. |
219 | */ | 219 | */ |
220 | if (c->x86_num_cores > 1) { | 220 | if (c->x86_max_cores > 1) { |
221 | int cpu = smp_processor_id(); | 221 | int cpu = smp_processor_id(); |
222 | unsigned bits = 0; | 222 | unsigned bits = 0; |
223 | while ((1 << bits) < c->x86_num_cores) | 223 | while ((1 << bits) < c->x86_max_cores) |
224 | bits++; | 224 | bits++; |
225 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); | 225 | cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); |
226 | phys_proc_id[cpu] >>= bits; | 226 | phys_proc_id[cpu] >>= bits; |
227 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", | 227 | printk(KERN_INFO "CPU %d(%d) -> Core %d\n", |
228 | cpu, c->x86_num_cores, cpu_core_id[cpu]); | 228 | cpu, c->x86_max_cores, cpu_core_id[cpu]); |
229 | } | 229 | } |
230 | #endif | 230 | #endif |
231 | } | 231 | } |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 35a67dab4a94..4e9c2e99b0a5 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -335,7 +335,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) | |||
335 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | 335 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ |
336 | c->x86_vendor_id[0] = '\0'; /* Unset */ | 336 | c->x86_vendor_id[0] = '\0'; /* Unset */ |
337 | c->x86_model_id[0] = '\0'; /* Unset */ | 337 | c->x86_model_id[0] = '\0'; /* Unset */ |
338 | c->x86_num_cores = 1; | 338 | c->x86_max_cores = 1; |
339 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 339 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
340 | 340 | ||
341 | if (!have_cpuid_p()) { | 341 | if (!have_cpuid_p()) { |
@@ -446,52 +446,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) | |||
446 | void __devinit detect_ht(struct cpuinfo_x86 *c) | 446 | void __devinit detect_ht(struct cpuinfo_x86 *c) |
447 | { | 447 | { |
448 | u32 eax, ebx, ecx, edx; | 448 | u32 eax, ebx, ecx, edx; |
449 | int index_msb, tmp; | 449 | int index_msb, core_bits; |
450 | int cpu = smp_processor_id(); | 450 | int cpu = smp_processor_id(); |
451 | 451 | ||
452 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
453 | |||
454 | c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); | ||
455 | |||
452 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 456 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
453 | return; | 457 | return; |
454 | 458 | ||
455 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
456 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 459 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
457 | 460 | ||
458 | if (smp_num_siblings == 1) { | 461 | if (smp_num_siblings == 1) { |
459 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 462 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
460 | } else if (smp_num_siblings > 1 ) { | 463 | } else if (smp_num_siblings > 1 ) { |
461 | index_msb = 31; | ||
462 | 464 | ||
463 | if (smp_num_siblings > NR_CPUS) { | 465 | if (smp_num_siblings > NR_CPUS) { |
464 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 466 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); |
465 | smp_num_siblings = 1; | 467 | smp_num_siblings = 1; |
466 | return; | 468 | return; |
467 | } | 469 | } |
468 | tmp = smp_num_siblings; | 470 | |
469 | while ((tmp & 0x80000000 ) == 0) { | 471 | index_msb = get_count_order(smp_num_siblings); |
470 | tmp <<=1 ; | ||
471 | index_msb--; | ||
472 | } | ||
473 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
474 | index_msb++; | ||
475 | phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); | 472 | phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); |
476 | 473 | ||
477 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 474 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
478 | phys_proc_id[cpu]); | 475 | phys_proc_id[cpu]); |
479 | 476 | ||
480 | smp_num_siblings = smp_num_siblings / c->x86_num_cores; | 477 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
481 | 478 | ||
482 | tmp = smp_num_siblings; | 479 | index_msb = get_count_order(smp_num_siblings) ; |
483 | index_msb = 31; | ||
484 | while ((tmp & 0x80000000) == 0) { | ||
485 | tmp <<=1 ; | ||
486 | index_msb--; | ||
487 | } | ||
488 | 480 | ||
489 | if (smp_num_siblings & (smp_num_siblings - 1)) | 481 | core_bits = get_count_order(c->x86_max_cores); |
490 | index_msb++; | ||
491 | 482 | ||
492 | cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); | 483 | cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & |
484 | ((1 << core_bits) - 1); | ||
493 | 485 | ||
494 | if (c->x86_num_cores > 1) | 486 | if (c->x86_max_cores > 1) |
495 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 487 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
496 | cpu_core_id[cpu]); | 488 | cpu_core_id[cpu]); |
497 | } | 489 | } |
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 43601de0f633..8d603ba28126 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -157,7 +157,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) | |||
157 | if ( p ) | 157 | if ( p ) |
158 | strcpy(c->x86_model_id, p); | 158 | strcpy(c->x86_model_id, p); |
159 | 159 | ||
160 | c->x86_num_cores = num_cpu_cores(c); | 160 | c->x86_max_cores = num_cpu_cores(c); |
161 | 161 | ||
162 | detect_ht(c); | 162 | detect_ht(c); |
163 | 163 | ||
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 4dc42a189ae5..e66d14099564 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
@@ -307,7 +307,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
307 | #ifdef CONFIG_X86_HT | 307 | #ifdef CONFIG_X86_HT |
308 | else if (num_threads_sharing == smp_num_siblings) | 308 | else if (num_threads_sharing == smp_num_siblings) |
309 | this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; | 309 | this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; |
310 | else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) | 310 | else if (num_threads_sharing == (c->x86_max_cores * smp_num_siblings)) |
311 | this_leaf->shared_cpu_map = cpu_core_map[cpu]; | 311 | this_leaf->shared_cpu_map = cpu_core_map[cpu]; |
312 | else | 312 | else |
313 | printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " | 313 | printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " |
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 41b871ecf4b3..e7921315ae9d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
94 | if (c->x86_cache_size >= 0) | 94 | if (c->x86_cache_size >= 0) |
95 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 95 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
96 | #ifdef CONFIG_X86_HT | 96 | #ifdef CONFIG_X86_HT |
97 | if (c->x86_num_cores * smp_num_siblings > 1) { | 97 | if (c->x86_max_cores * smp_num_siblings > 1) { |
98 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); | 98 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); |
99 | seq_printf(m, "siblings\t: %d\n", | 99 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); |
100 | c->x86_num_cores * smp_num_siblings); | ||
101 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); | 100 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); |
102 | seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); | 101 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
103 | } | 102 | } |
104 | #endif | 103 | #endif |
105 | 104 | ||
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 01b618e73ecd..0a9c64655236 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -74,9 +74,11 @@ EXPORT_SYMBOL(phys_proc_id); | |||
74 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; | 74 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; |
75 | EXPORT_SYMBOL(cpu_core_id); | 75 | EXPORT_SYMBOL(cpu_core_id); |
76 | 76 | ||
77 | /* representing HT siblings of each logical CPU */ | ||
77 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 78 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
78 | EXPORT_SYMBOL(cpu_sibling_map); | 79 | EXPORT_SYMBOL(cpu_sibling_map); |
79 | 80 | ||
81 | /* representing HT and core siblings of each logical CPU */ | ||
80 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 82 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
81 | EXPORT_SYMBOL(cpu_core_map); | 83 | EXPORT_SYMBOL(cpu_core_map); |
82 | 84 | ||
@@ -444,35 +446,60 @@ static void __devinit smp_callin(void) | |||
444 | 446 | ||
445 | static int cpucount; | 447 | static int cpucount; |
446 | 448 | ||
449 | /* representing cpus for which sibling maps can be computed */ | ||
450 | static cpumask_t cpu_sibling_setup_map; | ||
451 | |||
447 | static inline void | 452 | static inline void |
448 | set_cpu_sibling_map(int cpu) | 453 | set_cpu_sibling_map(int cpu) |
449 | { | 454 | { |
450 | int i; | 455 | int i; |
456 | struct cpuinfo_x86 *c = cpu_data; | ||
457 | |||
458 | cpu_set(cpu, cpu_sibling_setup_map); | ||
451 | 459 | ||
452 | if (smp_num_siblings > 1) { | 460 | if (smp_num_siblings > 1) { |
453 | for (i = 0; i < NR_CPUS; i++) { | 461 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
454 | if (!cpu_isset(i, cpu_callout_map)) | 462 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
455 | continue; | 463 | cpu_core_id[cpu] == cpu_core_id[i]) { |
456 | if (cpu_core_id[cpu] == cpu_core_id[i]) { | ||
457 | cpu_set(i, cpu_sibling_map[cpu]); | 464 | cpu_set(i, cpu_sibling_map[cpu]); |
458 | cpu_set(cpu, cpu_sibling_map[i]); | 465 | cpu_set(cpu, cpu_sibling_map[i]); |
466 | cpu_set(i, cpu_core_map[cpu]); | ||
467 | cpu_set(cpu, cpu_core_map[i]); | ||
459 | } | 468 | } |
460 | } | 469 | } |
461 | } else { | 470 | } else { |
462 | cpu_set(cpu, cpu_sibling_map[cpu]); | 471 | cpu_set(cpu, cpu_sibling_map[cpu]); |
463 | } | 472 | } |
464 | 473 | ||
465 | if (current_cpu_data.x86_num_cores > 1) { | 474 | if (current_cpu_data.x86_max_cores == 1) { |
466 | for (i = 0; i < NR_CPUS; i++) { | ||
467 | if (!cpu_isset(i, cpu_callout_map)) | ||
468 | continue; | ||
469 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
470 | cpu_set(i, cpu_core_map[cpu]); | ||
471 | cpu_set(cpu, cpu_core_map[i]); | ||
472 | } | ||
473 | } | ||
474 | } else { | ||
475 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 475 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
476 | c[cpu].booted_cores = 1; | ||
477 | return; | ||
478 | } | ||
479 | |||
480 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
481 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
482 | cpu_set(i, cpu_core_map[cpu]); | ||
483 | cpu_set(cpu, cpu_core_map[i]); | ||
484 | /* | ||
485 | * Does this new cpu bringup a new core? | ||
486 | */ | ||
487 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
488 | /* | ||
489 | * for each core in package, increment | ||
490 | * the booted_cores for this new cpu | ||
491 | */ | ||
492 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
493 | c[cpu].booted_cores++; | ||
494 | /* | ||
495 | * increment the core count for all | ||
496 | * the other cpus in this package | ||
497 | */ | ||
498 | if (i != cpu) | ||
499 | c[i].booted_cores++; | ||
500 | } else if (i != cpu && !c[cpu].booted_cores) | ||
501 | c[cpu].booted_cores = c[i].booted_cores; | ||
502 | } | ||
476 | } | 503 | } |
477 | } | 504 | } |
478 | 505 | ||
@@ -1096,11 +1123,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1096 | 1123 | ||
1097 | current_thread_info()->cpu = 0; | 1124 | current_thread_info()->cpu = 0; |
1098 | smp_tune_scheduling(); | 1125 | smp_tune_scheduling(); |
1099 | cpus_clear(cpu_sibling_map[0]); | ||
1100 | cpu_set(0, cpu_sibling_map[0]); | ||
1101 | 1126 | ||
1102 | cpus_clear(cpu_core_map[0]); | 1127 | set_cpu_sibling_map(0); |
1103 | cpu_set(0, cpu_core_map[0]); | ||
1104 | 1128 | ||
1105 | /* | 1129 | /* |
1106 | * If we couldn't find an SMP configuration at boot time, | 1130 | * If we couldn't find an SMP configuration at boot time, |
@@ -1279,15 +1303,24 @@ static void | |||
1279 | remove_siblinginfo(int cpu) | 1303 | remove_siblinginfo(int cpu) |
1280 | { | 1304 | { |
1281 | int sibling; | 1305 | int sibling; |
1306 | struct cpuinfo_x86 *c = cpu_data; | ||
1282 | 1307 | ||
1308 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
1309 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1310 | /* | ||
1311 | * last thread sibling in this cpu core going down | ||
1312 | */ | ||
1313 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
1314 | c[sibling].booted_cores--; | ||
1315 | } | ||
1316 | |||
1283 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | 1317 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1284 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1318 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1285 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) | ||
1286 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1287 | cpus_clear(cpu_sibling_map[cpu]); | 1319 | cpus_clear(cpu_sibling_map[cpu]); |
1288 | cpus_clear(cpu_core_map[cpu]); | 1320 | cpus_clear(cpu_core_map[cpu]); |
1289 | phys_proc_id[cpu] = BAD_APICID; | 1321 | phys_proc_id[cpu] = BAD_APICID; |
1290 | cpu_core_id[cpu] = BAD_APICID; | 1322 | cpu_core_id[cpu] = BAD_APICID; |
1323 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1291 | } | 1324 | } |
1292 | 1325 | ||
1293 | int __cpu_disable(void) | 1326 | int __cpu_disable(void) |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 476ee034fca2..40c77f6fe4b0 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -795,7 +795,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
795 | #endif | 795 | #endif |
796 | 796 | ||
797 | bits = 0; | 797 | bits = 0; |
798 | while ((1 << bits) < c->x86_num_cores) | 798 | while ((1 << bits) < c->x86_max_cores) |
799 | bits++; | 799 | bits++; |
800 | 800 | ||
801 | /* Low order bits define the core id (index of core in socket) */ | 801 | /* Low order bits define the core id (index of core in socket) */ |
@@ -828,7 +828,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
828 | numa_set_node(cpu, node); | 828 | numa_set_node(cpu, node); |
829 | 829 | ||
830 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", | 830 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", |
831 | cpu, c->x86_num_cores, node, cpu_core_id[cpu]); | 831 | cpu, c->x86_max_cores, node, cpu_core_id[cpu]); |
832 | #endif | 832 | #endif |
833 | #endif | 833 | #endif |
834 | } | 834 | } |
@@ -877,9 +877,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
877 | display_cacheinfo(c); | 877 | display_cacheinfo(c); |
878 | 878 | ||
879 | if (c->extended_cpuid_level >= 0x80000008) { | 879 | if (c->extended_cpuid_level >= 0x80000008) { |
880 | c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 880 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
881 | if (c->x86_num_cores & (c->x86_num_cores - 1)) | 881 | if (c->x86_max_cores & (c->x86_max_cores - 1)) |
882 | c->x86_num_cores = 1; | 882 | c->x86_max_cores = 1; |
883 | 883 | ||
884 | amd_detect_cmp(c); | 884 | amd_detect_cmp(c); |
885 | } | 885 | } |
@@ -891,54 +891,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
891 | { | 891 | { |
892 | #ifdef CONFIG_SMP | 892 | #ifdef CONFIG_SMP |
893 | u32 eax, ebx, ecx, edx; | 893 | u32 eax, ebx, ecx, edx; |
894 | int index_msb, tmp; | 894 | int index_msb, core_bits; |
895 | int cpu = smp_processor_id(); | 895 | int cpu = smp_processor_id(); |
896 | 896 | ||
897 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
898 | |||
899 | c->apicid = phys_pkg_id(0); | ||
900 | |||
897 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 901 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
898 | return; | 902 | return; |
899 | 903 | ||
900 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
901 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 904 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
902 | 905 | ||
903 | if (smp_num_siblings == 1) { | 906 | if (smp_num_siblings == 1) { |
904 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 907 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
905 | } else if (smp_num_siblings > 1) { | 908 | } else if (smp_num_siblings > 1 ) { |
906 | index_msb = 31; | 909 | |
907 | /* | ||
908 | * At this point we only support two siblings per | ||
909 | * processor package. | ||
910 | */ | ||
911 | if (smp_num_siblings > NR_CPUS) { | 910 | if (smp_num_siblings > NR_CPUS) { |
912 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 911 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); |
913 | smp_num_siblings = 1; | 912 | smp_num_siblings = 1; |
914 | return; | 913 | return; |
915 | } | 914 | } |
916 | tmp = smp_num_siblings; | 915 | |
917 | while ((tmp & 0x80000000 ) == 0) { | 916 | index_msb = get_count_order(smp_num_siblings); |
918 | tmp <<=1 ; | ||
919 | index_msb--; | ||
920 | } | ||
921 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
922 | index_msb++; | ||
923 | phys_proc_id[cpu] = phys_pkg_id(index_msb); | 917 | phys_proc_id[cpu] = phys_pkg_id(index_msb); |
924 | 918 | ||
925 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 919 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
926 | phys_proc_id[cpu]); | 920 | phys_proc_id[cpu]); |
927 | 921 | ||
928 | smp_num_siblings = smp_num_siblings / c->x86_num_cores; | 922 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
929 | 923 | ||
930 | tmp = smp_num_siblings; | 924 | index_msb = get_count_order(smp_num_siblings) ; |
931 | index_msb = 31; | 925 | |
932 | while ((tmp & 0x80000000) == 0) { | 926 | core_bits = get_count_order(c->x86_max_cores); |
933 | tmp <<=1 ; | ||
934 | index_msb--; | ||
935 | } | ||
936 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
937 | index_msb++; | ||
938 | 927 | ||
939 | cpu_core_id[cpu] = phys_pkg_id(index_msb); | 928 | cpu_core_id[cpu] = phys_pkg_id(index_msb) & |
929 | ((1 << core_bits) - 1); | ||
940 | 930 | ||
941 | if (c->x86_num_cores > 1) | 931 | if (c->x86_max_cores > 1) |
942 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 932 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
943 | cpu_core_id[cpu]); | 933 | cpu_core_id[cpu]); |
944 | } | 934 | } |
@@ -1006,7 +996,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
1006 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 996 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
1007 | if (c->x86 >= 15) | 997 | if (c->x86 >= 15) |
1008 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 998 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
1009 | c->x86_num_cores = intel_num_cpu_cores(c); | 999 | c->x86_max_cores = intel_num_cpu_cores(c); |
1010 | 1000 | ||
1011 | srat_detect_node(); | 1001 | srat_detect_node(); |
1012 | } | 1002 | } |
@@ -1044,7 +1034,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1044 | c->x86_model_id[0] = '\0'; /* Unset */ | 1034 | c->x86_model_id[0] = '\0'; /* Unset */ |
1045 | c->x86_clflush_size = 64; | 1035 | c->x86_clflush_size = 64; |
1046 | c->x86_cache_alignment = c->x86_clflush_size; | 1036 | c->x86_cache_alignment = c->x86_clflush_size; |
1047 | c->x86_num_cores = 1; | 1037 | c->x86_max_cores = 1; |
1048 | c->extended_cpuid_level = 0; | 1038 | c->extended_cpuid_level = 0; |
1049 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 1039 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
1050 | 1040 | ||
@@ -1278,13 +1268,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1278 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 1268 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
1279 | 1269 | ||
1280 | #ifdef CONFIG_SMP | 1270 | #ifdef CONFIG_SMP |
1281 | if (smp_num_siblings * c->x86_num_cores > 1) { | 1271 | if (smp_num_siblings * c->x86_max_cores > 1) { |
1282 | int cpu = c - cpu_data; | 1272 | int cpu = c - cpu_data; |
1283 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); | 1273 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); |
1284 | seq_printf(m, "siblings\t: %d\n", | 1274 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); |
1285 | c->x86_num_cores * smp_num_siblings); | ||
1286 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); | 1275 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); |
1287 | seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); | 1276 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
1288 | } | 1277 | } |
1289 | #endif | 1278 | #endif |
1290 | 1279 | ||
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index f74319a80659..2b9ddba61b37 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -64,6 +64,7 @@ | |||
64 | int smp_num_siblings = 1; | 64 | int smp_num_siblings = 1; |
65 | /* Package ID of each logical CPU */ | 65 | /* Package ID of each logical CPU */ |
66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
67 | /* core ID of each logical CPU */ | ||
67 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 68 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
68 | EXPORT_SYMBOL(phys_proc_id); | 69 | EXPORT_SYMBOL(phys_proc_id); |
69 | EXPORT_SYMBOL(cpu_core_id); | 70 | EXPORT_SYMBOL(cpu_core_id); |
@@ -89,7 +90,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | |||
89 | /* Set when the idlers are all forked */ | 90 | /* Set when the idlers are all forked */ |
90 | int smp_threads_ready; | 91 | int smp_threads_ready; |
91 | 92 | ||
93 | /* representing HT siblings of each logical CPU */ | ||
92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 94 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
95 | |||
96 | /* representing HT and core siblings of each logical CPU */ | ||
93 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 97 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
94 | EXPORT_SYMBOL(cpu_core_map); | 98 | EXPORT_SYMBOL(cpu_core_map); |
95 | 99 | ||
@@ -436,30 +440,59 @@ void __cpuinit smp_callin(void) | |||
436 | cpu_set(cpuid, cpu_callin_map); | 440 | cpu_set(cpuid, cpu_callin_map); |
437 | } | 441 | } |
438 | 442 | ||
443 | /* representing cpus for which sibling maps can be computed */ | ||
444 | static cpumask_t cpu_sibling_setup_map; | ||
445 | |||
439 | static inline void set_cpu_sibling_map(int cpu) | 446 | static inline void set_cpu_sibling_map(int cpu) |
440 | { | 447 | { |
441 | int i; | 448 | int i; |
449 | struct cpuinfo_x86 *c = cpu_data; | ||
450 | |||
451 | cpu_set(cpu, cpu_sibling_setup_map); | ||
442 | 452 | ||
443 | if (smp_num_siblings > 1) { | 453 | if (smp_num_siblings > 1) { |
444 | for_each_cpu(i) { | 454 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
445 | if (cpu_core_id[cpu] == cpu_core_id[i]) { | 455 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
456 | cpu_core_id[cpu] == cpu_core_id[i]) { | ||
446 | cpu_set(i, cpu_sibling_map[cpu]); | 457 | cpu_set(i, cpu_sibling_map[cpu]); |
447 | cpu_set(cpu, cpu_sibling_map[i]); | 458 | cpu_set(cpu, cpu_sibling_map[i]); |
459 | cpu_set(i, cpu_core_map[cpu]); | ||
460 | cpu_set(cpu, cpu_core_map[i]); | ||
448 | } | 461 | } |
449 | } | 462 | } |
450 | } else { | 463 | } else { |
451 | cpu_set(cpu, cpu_sibling_map[cpu]); | 464 | cpu_set(cpu, cpu_sibling_map[cpu]); |
452 | } | 465 | } |
453 | 466 | ||
454 | if (current_cpu_data.x86_num_cores > 1) { | 467 | if (current_cpu_data.x86_max_cores == 1) { |
455 | for_each_cpu(i) { | ||
456 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
457 | cpu_set(i, cpu_core_map[cpu]); | ||
458 | cpu_set(cpu, cpu_core_map[i]); | ||
459 | } | ||
460 | } | ||
461 | } else { | ||
462 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 468 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
469 | c[cpu].booted_cores = 1; | ||
470 | return; | ||
471 | } | ||
472 | |||
473 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
474 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
475 | cpu_set(i, cpu_core_map[cpu]); | ||
476 | cpu_set(cpu, cpu_core_map[i]); | ||
477 | /* | ||
478 | * Does this new cpu bringup a new core? | ||
479 | */ | ||
480 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
481 | /* | ||
482 | * for each core in package, increment | ||
483 | * the booted_cores for this new cpu | ||
484 | */ | ||
485 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
486 | c[cpu].booted_cores++; | ||
487 | /* | ||
488 | * increment the core count for all | ||
489 | * the other cpus in this package | ||
490 | */ | ||
491 | if (i != cpu) | ||
492 | c[i].booted_cores++; | ||
493 | } else if (i != cpu && !c[cpu].booted_cores) | ||
494 | c[cpu].booted_cores = c[i].booted_cores; | ||
495 | } | ||
463 | } | 496 | } |
464 | } | 497 | } |
465 | 498 | ||
@@ -993,6 +1026,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
993 | nmi_watchdog_default(); | 1026 | nmi_watchdog_default(); |
994 | current_cpu_data = boot_cpu_data; | 1027 | current_cpu_data = boot_cpu_data; |
995 | current_thread_info()->cpu = 0; /* needed? */ | 1028 | current_thread_info()->cpu = 0; /* needed? */ |
1029 | set_cpu_sibling_map(0); | ||
996 | 1030 | ||
997 | if (smp_sanity_check(max_cpus) < 0) { | 1031 | if (smp_sanity_check(max_cpus) < 0) { |
998 | printk(KERN_INFO "SMP disabled\n"); | 1032 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1036,8 +1070,6 @@ void __init smp_prepare_boot_cpu(void) | |||
1036 | int me = smp_processor_id(); | 1070 | int me = smp_processor_id(); |
1037 | cpu_set(me, cpu_online_map); | 1071 | cpu_set(me, cpu_online_map); |
1038 | cpu_set(me, cpu_callout_map); | 1072 | cpu_set(me, cpu_callout_map); |
1039 | cpu_set(0, cpu_sibling_map[0]); | ||
1040 | cpu_set(0, cpu_core_map[0]); | ||
1041 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1073 | per_cpu(cpu_state, me) = CPU_ONLINE; |
1042 | } | 1074 | } |
1043 | 1075 | ||
@@ -1106,15 +1138,24 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1106 | static void remove_siblinginfo(int cpu) | 1138 | static void remove_siblinginfo(int cpu) |
1107 | { | 1139 | { |
1108 | int sibling; | 1140 | int sibling; |
1141 | struct cpuinfo_x86 *c = cpu_data; | ||
1109 | 1142 | ||
1143 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
1144 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1145 | /* | ||
1146 | * last thread sibling in this cpu core going down | ||
1147 | */ | ||
1148 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
1149 | c[sibling].booted_cores--; | ||
1150 | } | ||
1151 | |||
1110 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | 1152 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1111 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1153 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1112 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) | ||
1113 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1114 | cpus_clear(cpu_sibling_map[cpu]); | 1154 | cpus_clear(cpu_sibling_map[cpu]); |
1115 | cpus_clear(cpu_core_map[cpu]); | 1155 | cpus_clear(cpu_core_map[cpu]); |
1116 | phys_proc_id[cpu] = BAD_APICID; | 1156 | phys_proc_id[cpu] = BAD_APICID; |
1117 | cpu_core_id[cpu] = BAD_APICID; | 1157 | cpu_core_id[cpu] = BAD_APICID; |
1158 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1118 | } | 1159 | } |
1119 | 1160 | ||
1120 | void remove_cpu_from_maps(void) | 1161 | void remove_cpu_from_maps(void) |
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 0a4ec764377c..9cd4a05234a1 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h | |||
@@ -65,7 +65,9 @@ struct cpuinfo_x86 { | |||
65 | int f00f_bug; | 65 | int f00f_bug; |
66 | int coma_bug; | 66 | int coma_bug; |
67 | unsigned long loops_per_jiffy; | 67 | unsigned long loops_per_jiffy; |
68 | unsigned char x86_num_cores; | 68 | unsigned char x86_max_cores; /* cpuid returned max cores value */ |
69 | unsigned char booted_cores; /* number of cores as seen by OS */ | ||
70 | unsigned char apicid; | ||
69 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 71 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
70 | 72 | ||
71 | #define X86_VENDOR_INTEL 0 | 73 | #define X86_VENDOR_INTEL 0 |
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 03837d34fba0..4861246548f7 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h | |||
@@ -61,10 +61,12 @@ struct cpuinfo_x86 { | |||
61 | int x86_cache_alignment; | 61 | int x86_cache_alignment; |
62 | int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ | 62 | int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ |
63 | __u8 x86_virt_bits, x86_phys_bits; | 63 | __u8 x86_virt_bits, x86_phys_bits; |
64 | __u8 x86_num_cores; | 64 | __u8 x86_max_cores; /* cpuid returned max cores value */ |
65 | __u32 x86_power; | 65 | __u32 x86_power; |
66 | __u32 extended_cpuid_level; /* Max extended CPUID function supported */ | 66 | __u32 extended_cpuid_level; /* Max extended CPUID function supported */ |
67 | unsigned long loops_per_jiffy; | 67 | unsigned long loops_per_jiffy; |
68 | __u8 apicid; | ||
69 | __u8 booted_cores; /* number of cores as seen by OS */ | ||
68 | } ____cacheline_aligned; | 70 | } ____cacheline_aligned; |
69 | 71 | ||
70 | #define X86_VENDOR_INTEL 0 | 72 | #define X86_VENDOR_INTEL 0 |
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cb3c3ef50f50..38c2fb7ebe09 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h | |||
@@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count) | |||
84 | return order; /* We could be slightly more clever with -1 here... */ | 84 | return order; /* We could be slightly more clever with -1 here... */ |
85 | } | 85 | } |
86 | 86 | ||
87 | static __inline__ int get_count_order(unsigned int count) | ||
88 | { | ||
89 | int order; | ||
90 | |||
91 | order = fls(count) - 1; | ||
92 | if (count & (count - 1)) | ||
93 | order++; | ||
94 | return order; | ||
95 | } | ||
96 | |||
87 | /* | 97 | /* |
88 | * hweightN: returns the hamming weight (i.e. the number | 98 | * hweightN: returns the hamming weight (i.e. the number |
89 | * of bits set) of a N-bit word | 99 | * of bits set) of a N-bit word |