aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiddha, Suresh B <suresh.b.siddha@intel.com>2005-11-05 11:25:54 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-14 22:55:16 -0500
commit94605eff572b727aaad9b4b29bc358b919096503 (patch)
tree657a848d8ef34d2f94bbad3aa4e5458d2d3f2d2b
parente90f22edf432512219cc2952f5811961abbd164f (diff)
[PATCH] x86-64/i386: Intel HT, Multi core detection fixes
Fields obtained through cpuid vector 0x1(ebx[16:23]) and vector 0x4(eax[14:25], eax[26:31]) indicate the maximum values and might not always be the same as what is available and what OS sees. So make sure "siblings" and "cpu cores" values in /proc/cpuinfo reflect the values as seen by OS instead of what cpuid instruction says. This will also fix the buggy BIOS cases (for example where cpuid on a single core cpu says there are "2" siblings, even when HT is disabled in the BIOS. http://bugzilla.kernel.org/show_bug.cgi?id=4359) Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/cpu/amd.c12
-rw-r--r--arch/i386/kernel/cpu/common.c36
-rw-r--r--arch/i386/kernel/cpu/intel.c2
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/i386/kernel/cpu/proc.c7
-rw-r--r--arch/i386/kernel/smpboot.c73
-rw-r--r--arch/x86_64/kernel/setup.c69
-rw-r--r--arch/x86_64/kernel/smpboot.c69
-rw-r--r--include/asm-i386/processor.h4
-rw-r--r--include/asm-x86_64/processor.h4
-rw-r--r--include/linux/bitops.h10
11 files changed, 178 insertions, 110 deletions
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 53a1681cd96..e344ef88cfc 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c)
206 display_cacheinfo(c); 206 display_cacheinfo(c);
207 207
208 if (cpuid_eax(0x80000000) >= 0x80000008) { 208 if (cpuid_eax(0x80000000) >= 0x80000008) {
209 c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 209 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
210 if (c->x86_num_cores & (c->x86_num_cores - 1)) 210 if (c->x86_max_cores & (c->x86_max_cores - 1))
211 c->x86_num_cores = 1; 211 c->x86_max_cores = 1;
212 } 212 }
213 213
214#ifdef CONFIG_X86_HT 214#ifdef CONFIG_X86_HT
@@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c)
217 * distingush the cores. Assumes number of cores is a power 217 * distingush the cores. Assumes number of cores is a power
218 * of two. 218 * of two.
219 */ 219 */
220 if (c->x86_num_cores > 1) { 220 if (c->x86_max_cores > 1) {
221 int cpu = smp_processor_id(); 221 int cpu = smp_processor_id();
222 unsigned bits = 0; 222 unsigned bits = 0;
223 while ((1 << bits) < c->x86_num_cores) 223 while ((1 << bits) < c->x86_max_cores)
224 bits++; 224 bits++;
225 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); 225 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
226 phys_proc_id[cpu] >>= bits; 226 phys_proc_id[cpu] >>= bits;
227 printk(KERN_INFO "CPU %d(%d) -> Core %d\n", 227 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
228 cpu, c->x86_num_cores, cpu_core_id[cpu]); 228 cpu, c->x86_max_cores, cpu_core_id[cpu]);
229 } 229 }
230#endif 230#endif
231} 231}
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 35a67dab4a9..4e9c2e99b0a 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -335,7 +335,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
335 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 335 c->x86_model = c->x86_mask = 0; /* So far unknown... */
336 c->x86_vendor_id[0] = '\0'; /* Unset */ 336 c->x86_vendor_id[0] = '\0'; /* Unset */
337 c->x86_model_id[0] = '\0'; /* Unset */ 337 c->x86_model_id[0] = '\0'; /* Unset */
338 c->x86_num_cores = 1; 338 c->x86_max_cores = 1;
339 memset(&c->x86_capability, 0, sizeof c->x86_capability); 339 memset(&c->x86_capability, 0, sizeof c->x86_capability);
340 340
341 if (!have_cpuid_p()) { 341 if (!have_cpuid_p()) {
@@ -446,52 +446,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
446void __devinit detect_ht(struct cpuinfo_x86 *c) 446void __devinit detect_ht(struct cpuinfo_x86 *c)
447{ 447{
448 u32 eax, ebx, ecx, edx; 448 u32 eax, ebx, ecx, edx;
449 int index_msb, tmp; 449 int index_msb, core_bits;
450 int cpu = smp_processor_id(); 450 int cpu = smp_processor_id();
451 451
452 cpuid(1, &eax, &ebx, &ecx, &edx);
453
454 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
455
452 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 456 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
453 return; 457 return;
454 458
455 cpuid(1, &eax, &ebx, &ecx, &edx);
456 smp_num_siblings = (ebx & 0xff0000) >> 16; 459 smp_num_siblings = (ebx & 0xff0000) >> 16;
457 460
458 if (smp_num_siblings == 1) { 461 if (smp_num_siblings == 1) {
459 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 462 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
460 } else if (smp_num_siblings > 1 ) { 463 } else if (smp_num_siblings > 1 ) {
461 index_msb = 31;
462 464
463 if (smp_num_siblings > NR_CPUS) { 465 if (smp_num_siblings > NR_CPUS) {
464 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 466 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
465 smp_num_siblings = 1; 467 smp_num_siblings = 1;
466 return; 468 return;
467 } 469 }
468 tmp = smp_num_siblings; 470
469 while ((tmp & 0x80000000 ) == 0) { 471 index_msb = get_count_order(smp_num_siblings);
470 tmp <<=1 ;
471 index_msb--;
472 }
473 if (smp_num_siblings & (smp_num_siblings - 1))
474 index_msb++;
475 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 472 phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
476 473
477 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 474 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
478 phys_proc_id[cpu]); 475 phys_proc_id[cpu]);
479 476
480 smp_num_siblings = smp_num_siblings / c->x86_num_cores; 477 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
481 478
482 tmp = smp_num_siblings; 479 index_msb = get_count_order(smp_num_siblings) ;
483 index_msb = 31;
484 while ((tmp & 0x80000000) == 0) {
485 tmp <<=1 ;
486 index_msb--;
487 }
488 480
489 if (smp_num_siblings & (smp_num_siblings - 1)) 481 core_bits = get_count_order(c->x86_max_cores);
490 index_msb++;
491 482
492 cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); 483 cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
484 ((1 << core_bits) - 1);
493 485
494 if (c->x86_num_cores > 1) 486 if (c->x86_max_cores > 1)
495 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 487 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
496 cpu_core_id[cpu]); 488 cpu_core_id[cpu]);
497 } 489 }
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 43601de0f63..8d603ba2812 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -157,7 +157,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c)
157 if ( p ) 157 if ( p )
158 strcpy(c->x86_model_id, p); 158 strcpy(c->x86_model_id, p);
159 159
160 c->x86_num_cores = num_cpu_cores(c); 160 c->x86_max_cores = num_cpu_cores(c);
161 161
162 detect_ht(c); 162 detect_ht(c);
163 163
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 4dc42a189ae..e66d1409956 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -307,7 +307,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
307#ifdef CONFIG_X86_HT 307#ifdef CONFIG_X86_HT
308 else if (num_threads_sharing == smp_num_siblings) 308 else if (num_threads_sharing == smp_num_siblings)
309 this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; 309 this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
310 else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) 310 else if (num_threads_sharing == (c->x86_max_cores * smp_num_siblings))
311 this_leaf->shared_cpu_map = cpu_core_map[cpu]; 311 this_leaf->shared_cpu_map = cpu_core_map[cpu];
312 else 312 else
313 printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " 313 printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 41b871ecf4b..e7921315ae9 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
94 if (c->x86_cache_size >= 0) 94 if (c->x86_cache_size >= 0)
95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 95 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
96#ifdef CONFIG_X86_HT 96#ifdef CONFIG_X86_HT
97 if (c->x86_num_cores * smp_num_siblings > 1) { 97 if (c->x86_max_cores * smp_num_siblings > 1) {
98 seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); 98 seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
99 seq_printf(m, "siblings\t: %d\n", 99 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
100 c->x86_num_cores * smp_num_siblings);
101 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); 100 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]);
102 seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 101 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
103 } 102 }
104#endif 103#endif
105 104
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 01b618e73ec..0a9c6465523 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -74,9 +74,11 @@ EXPORT_SYMBOL(phys_proc_id);
74int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; 74int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
75EXPORT_SYMBOL(cpu_core_id); 75EXPORT_SYMBOL(cpu_core_id);
76 76
77/* representing HT siblings of each logical CPU */
77cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 78cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
78EXPORT_SYMBOL(cpu_sibling_map); 79EXPORT_SYMBOL(cpu_sibling_map);
79 80
81/* representing HT and core siblings of each logical CPU */
80cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 82cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
81EXPORT_SYMBOL(cpu_core_map); 83EXPORT_SYMBOL(cpu_core_map);
82 84
@@ -444,35 +446,60 @@ static void __devinit smp_callin(void)
444 446
445static int cpucount; 447static int cpucount;
446 448
449/* representing cpus for which sibling maps can be computed */
450static cpumask_t cpu_sibling_setup_map;
451
447static inline void 452static inline void
448set_cpu_sibling_map(int cpu) 453set_cpu_sibling_map(int cpu)
449{ 454{
450 int i; 455 int i;
456 struct cpuinfo_x86 *c = cpu_data;
457
458 cpu_set(cpu, cpu_sibling_setup_map);
451 459
452 if (smp_num_siblings > 1) { 460 if (smp_num_siblings > 1) {
453 for (i = 0; i < NR_CPUS; i++) { 461 for_each_cpu_mask(i, cpu_sibling_setup_map) {
454 if (!cpu_isset(i, cpu_callout_map)) 462 if (phys_proc_id[cpu] == phys_proc_id[i] &&
455 continue; 463 cpu_core_id[cpu] == cpu_core_id[i]) {
456 if (cpu_core_id[cpu] == cpu_core_id[i]) {
457 cpu_set(i, cpu_sibling_map[cpu]); 464 cpu_set(i, cpu_sibling_map[cpu]);
458 cpu_set(cpu, cpu_sibling_map[i]); 465 cpu_set(cpu, cpu_sibling_map[i]);
466 cpu_set(i, cpu_core_map[cpu]);
467 cpu_set(cpu, cpu_core_map[i]);
459 } 468 }
460 } 469 }
461 } else { 470 } else {
462 cpu_set(cpu, cpu_sibling_map[cpu]); 471 cpu_set(cpu, cpu_sibling_map[cpu]);
463 } 472 }
464 473
465 if (current_cpu_data.x86_num_cores > 1) { 474 if (current_cpu_data.x86_max_cores == 1) {
466 for (i = 0; i < NR_CPUS; i++) {
467 if (!cpu_isset(i, cpu_callout_map))
468 continue;
469 if (phys_proc_id[cpu] == phys_proc_id[i]) {
470 cpu_set(i, cpu_core_map[cpu]);
471 cpu_set(cpu, cpu_core_map[i]);
472 }
473 }
474 } else {
475 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 475 cpu_core_map[cpu] = cpu_sibling_map[cpu];
476 c[cpu].booted_cores = 1;
477 return;
478 }
479
480 for_each_cpu_mask(i, cpu_sibling_setup_map) {
481 if (phys_proc_id[cpu] == phys_proc_id[i]) {
482 cpu_set(i, cpu_core_map[cpu]);
483 cpu_set(cpu, cpu_core_map[i]);
484 /*
485 * Does this new cpu bringup a new core?
486 */
487 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
488 /*
489 * for each core in package, increment
490 * the booted_cores for this new cpu
491 */
492 if (first_cpu(cpu_sibling_map[i]) == i)
493 c[cpu].booted_cores++;
494 /*
495 * increment the core count for all
496 * the other cpus in this package
497 */
498 if (i != cpu)
499 c[i].booted_cores++;
500 } else if (i != cpu && !c[cpu].booted_cores)
501 c[cpu].booted_cores = c[i].booted_cores;
502 }
476 } 503 }
477} 504}
478 505
@@ -1096,11 +1123,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1096 1123
1097 current_thread_info()->cpu = 0; 1124 current_thread_info()->cpu = 0;
1098 smp_tune_scheduling(); 1125 smp_tune_scheduling();
1099 cpus_clear(cpu_sibling_map[0]);
1100 cpu_set(0, cpu_sibling_map[0]);
1101 1126
1102 cpus_clear(cpu_core_map[0]); 1127 set_cpu_sibling_map(0);
1103 cpu_set(0, cpu_core_map[0]);
1104 1128
1105 /* 1129 /*
1106 * If we couldn't find an SMP configuration at boot time, 1130 * If we couldn't find an SMP configuration at boot time,
@@ -1279,15 +1303,24 @@ static void
1279remove_siblinginfo(int cpu) 1303remove_siblinginfo(int cpu)
1280{ 1304{
1281 int sibling; 1305 int sibling;
1306 struct cpuinfo_x86 *c = cpu_data;
1282 1307
1308 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1309 cpu_clear(cpu, cpu_core_map[sibling]);
1310 /*
1311 * last thread sibling in this cpu core going down
1312 */
1313 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1314 c[sibling].booted_cores--;
1315 }
1316
1283 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1317 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1284 cpu_clear(cpu, cpu_sibling_map[sibling]); 1318 cpu_clear(cpu, cpu_sibling_map[sibling]);
1285 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1286 cpu_clear(cpu, cpu_core_map[sibling]);
1287 cpus_clear(cpu_sibling_map[cpu]); 1319 cpus_clear(cpu_sibling_map[cpu]);
1288 cpus_clear(cpu_core_map[cpu]); 1320 cpus_clear(cpu_core_map[cpu]);
1289 phys_proc_id[cpu] = BAD_APICID; 1321 phys_proc_id[cpu] = BAD_APICID;
1290 cpu_core_id[cpu] = BAD_APICID; 1322 cpu_core_id[cpu] = BAD_APICID;
1323 cpu_clear(cpu, cpu_sibling_setup_map);
1291} 1324}
1292 1325
1293int __cpu_disable(void) 1326int __cpu_disable(void)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 476ee034fca..40c77f6fe4b 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -795,7 +795,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
795#endif 795#endif
796 796
797 bits = 0; 797 bits = 0;
798 while ((1 << bits) < c->x86_num_cores) 798 while ((1 << bits) < c->x86_max_cores)
799 bits++; 799 bits++;
800 800
801 /* Low order bits define the core id (index of core in socket) */ 801 /* Low order bits define the core id (index of core in socket) */
@@ -828,7 +828,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
828 numa_set_node(cpu, node); 828 numa_set_node(cpu, node);
829 829
830 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", 830 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
831 cpu, c->x86_num_cores, node, cpu_core_id[cpu]); 831 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
832#endif 832#endif
833#endif 833#endif
834} 834}
@@ -877,9 +877,9 @@ static int __init init_amd(struct cpuinfo_x86 *c)
877 display_cacheinfo(c); 877 display_cacheinfo(c);
878 878
879 if (c->extended_cpuid_level >= 0x80000008) { 879 if (c->extended_cpuid_level >= 0x80000008) {
880 c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 880 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
881 if (c->x86_num_cores & (c->x86_num_cores - 1)) 881 if (c->x86_max_cores & (c->x86_max_cores - 1))
882 c->x86_num_cores = 1; 882 c->x86_max_cores = 1;
883 883
884 amd_detect_cmp(c); 884 amd_detect_cmp(c);
885 } 885 }
@@ -891,54 +891,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
891{ 891{
892#ifdef CONFIG_SMP 892#ifdef CONFIG_SMP
893 u32 eax, ebx, ecx, edx; 893 u32 eax, ebx, ecx, edx;
894 int index_msb, tmp; 894 int index_msb, core_bits;
895 int cpu = smp_processor_id(); 895 int cpu = smp_processor_id();
896 896
897 cpuid(1, &eax, &ebx, &ecx, &edx);
898
899 c->apicid = phys_pkg_id(0);
900
897 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) 901 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
898 return; 902 return;
899 903
900 cpuid(1, &eax, &ebx, &ecx, &edx);
901 smp_num_siblings = (ebx & 0xff0000) >> 16; 904 smp_num_siblings = (ebx & 0xff0000) >> 16;
902 905
903 if (smp_num_siblings == 1) { 906 if (smp_num_siblings == 1) {
904 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 907 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
905 } else if (smp_num_siblings > 1) { 908 } else if (smp_num_siblings > 1 ) {
906 index_msb = 31; 909
907 /*
908 * At this point we only support two siblings per
909 * processor package.
910 */
911 if (smp_num_siblings > NR_CPUS) { 910 if (smp_num_siblings > NR_CPUS) {
912 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); 911 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
913 smp_num_siblings = 1; 912 smp_num_siblings = 1;
914 return; 913 return;
915 } 914 }
916 tmp = smp_num_siblings; 915
917 while ((tmp & 0x80000000 ) == 0) { 916 index_msb = get_count_order(smp_num_siblings);
918 tmp <<=1 ;
919 index_msb--;
920 }
921 if (smp_num_siblings & (smp_num_siblings - 1))
922 index_msb++;
923 phys_proc_id[cpu] = phys_pkg_id(index_msb); 917 phys_proc_id[cpu] = phys_pkg_id(index_msb);
924 918
925 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 919 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
926 phys_proc_id[cpu]); 920 phys_proc_id[cpu]);
927 921
928 smp_num_siblings = smp_num_siblings / c->x86_num_cores; 922 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
929 923
930 tmp = smp_num_siblings; 924 index_msb = get_count_order(smp_num_siblings) ;
931 index_msb = 31; 925
932 while ((tmp & 0x80000000) == 0) { 926 core_bits = get_count_order(c->x86_max_cores);
933 tmp <<=1 ;
934 index_msb--;
935 }
936 if (smp_num_siblings & (smp_num_siblings - 1))
937 index_msb++;
938 927
939 cpu_core_id[cpu] = phys_pkg_id(index_msb); 928 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
929 ((1 << core_bits) - 1);
940 930
941 if (c->x86_num_cores > 1) 931 if (c->x86_max_cores > 1)
942 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 932 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
943 cpu_core_id[cpu]); 933 cpu_core_id[cpu]);
944 } 934 }
@@ -1006,7 +996,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1006 c->x86_cache_alignment = c->x86_clflush_size * 2; 996 c->x86_cache_alignment = c->x86_clflush_size * 2;
1007 if (c->x86 >= 15) 997 if (c->x86 >= 15)
1008 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 998 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1009 c->x86_num_cores = intel_num_cpu_cores(c); 999 c->x86_max_cores = intel_num_cpu_cores(c);
1010 1000
1011 srat_detect_node(); 1001 srat_detect_node();
1012} 1002}
@@ -1044,7 +1034,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1044 c->x86_model_id[0] = '\0'; /* Unset */ 1034 c->x86_model_id[0] = '\0'; /* Unset */
1045 c->x86_clflush_size = 64; 1035 c->x86_clflush_size = 64;
1046 c->x86_cache_alignment = c->x86_clflush_size; 1036 c->x86_cache_alignment = c->x86_clflush_size;
1047 c->x86_num_cores = 1; 1037 c->x86_max_cores = 1;
1048 c->extended_cpuid_level = 0; 1038 c->extended_cpuid_level = 0;
1049 memset(&c->x86_capability, 0, sizeof c->x86_capability); 1039 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1050 1040
@@ -1278,13 +1268,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1278 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); 1268 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1279 1269
1280#ifdef CONFIG_SMP 1270#ifdef CONFIG_SMP
1281 if (smp_num_siblings * c->x86_num_cores > 1) { 1271 if (smp_num_siblings * c->x86_max_cores > 1) {
1282 int cpu = c - cpu_data; 1272 int cpu = c - cpu_data;
1283 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); 1273 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1284 seq_printf(m, "siblings\t: %d\n", 1274 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1285 c->x86_num_cores * smp_num_siblings);
1286 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); 1275 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1287 seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); 1276 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1288 } 1277 }
1289#endif 1278#endif
1290 1279
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index f74319a8065..2b9ddba61b3 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -64,6 +64,7 @@
64int smp_num_siblings = 1; 64int smp_num_siblings = 1;
65/* Package ID of each logical CPU */ 65/* Package ID of each logical CPU */
66u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 66u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
67/* core ID of each logical CPU */
67u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; 68u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
68EXPORT_SYMBOL(phys_proc_id); 69EXPORT_SYMBOL(phys_proc_id);
69EXPORT_SYMBOL(cpu_core_id); 70EXPORT_SYMBOL(cpu_core_id);
@@ -89,7 +90,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
89/* Set when the idlers are all forked */ 90/* Set when the idlers are all forked */
90int smp_threads_ready; 91int smp_threads_ready;
91 92
93/* representing HT siblings of each logical CPU */
92cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; 94cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
95
96/* representing HT and core siblings of each logical CPU */
93cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 97cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
94EXPORT_SYMBOL(cpu_core_map); 98EXPORT_SYMBOL(cpu_core_map);
95 99
@@ -436,30 +440,59 @@ void __cpuinit smp_callin(void)
436 cpu_set(cpuid, cpu_callin_map); 440 cpu_set(cpuid, cpu_callin_map);
437} 441}
438 442
443/* representing cpus for which sibling maps can be computed */
444static cpumask_t cpu_sibling_setup_map;
445
439static inline void set_cpu_sibling_map(int cpu) 446static inline void set_cpu_sibling_map(int cpu)
440{ 447{
441 int i; 448 int i;
449 struct cpuinfo_x86 *c = cpu_data;
450
451 cpu_set(cpu, cpu_sibling_setup_map);
442 452
443 if (smp_num_siblings > 1) { 453 if (smp_num_siblings > 1) {
444 for_each_cpu(i) { 454 for_each_cpu_mask(i, cpu_sibling_setup_map) {
445 if (cpu_core_id[cpu] == cpu_core_id[i]) { 455 if (phys_proc_id[cpu] == phys_proc_id[i] &&
456 cpu_core_id[cpu] == cpu_core_id[i]) {
446 cpu_set(i, cpu_sibling_map[cpu]); 457 cpu_set(i, cpu_sibling_map[cpu]);
447 cpu_set(cpu, cpu_sibling_map[i]); 458 cpu_set(cpu, cpu_sibling_map[i]);
459 cpu_set(i, cpu_core_map[cpu]);
460 cpu_set(cpu, cpu_core_map[i]);
448 } 461 }
449 } 462 }
450 } else { 463 } else {
451 cpu_set(cpu, cpu_sibling_map[cpu]); 464 cpu_set(cpu, cpu_sibling_map[cpu]);
452 } 465 }
453 466
454 if (current_cpu_data.x86_num_cores > 1) { 467 if (current_cpu_data.x86_max_cores == 1) {
455 for_each_cpu(i) {
456 if (phys_proc_id[cpu] == phys_proc_id[i]) {
457 cpu_set(i, cpu_core_map[cpu]);
458 cpu_set(cpu, cpu_core_map[i]);
459 }
460 }
461 } else {
462 cpu_core_map[cpu] = cpu_sibling_map[cpu]; 468 cpu_core_map[cpu] = cpu_sibling_map[cpu];
469 c[cpu].booted_cores = 1;
470 return;
471 }
472
473 for_each_cpu_mask(i, cpu_sibling_setup_map) {
474 if (phys_proc_id[cpu] == phys_proc_id[i]) {
475 cpu_set(i, cpu_core_map[cpu]);
476 cpu_set(cpu, cpu_core_map[i]);
477 /*
478 * Does this new cpu bringup a new core?
479 */
480 if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
481 /*
482 * for each core in package, increment
483 * the booted_cores for this new cpu
484 */
485 if (first_cpu(cpu_sibling_map[i]) == i)
486 c[cpu].booted_cores++;
487 /*
488 * increment the core count for all
489 * the other cpus in this package
490 */
491 if (i != cpu)
492 c[i].booted_cores++;
493 } else if (i != cpu && !c[cpu].booted_cores)
494 c[cpu].booted_cores = c[i].booted_cores;
495 }
463 } 496 }
464} 497}
465 498
@@ -993,6 +1026,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
993 nmi_watchdog_default(); 1026 nmi_watchdog_default();
994 current_cpu_data = boot_cpu_data; 1027 current_cpu_data = boot_cpu_data;
995 current_thread_info()->cpu = 0; /* needed? */ 1028 current_thread_info()->cpu = 0; /* needed? */
1029 set_cpu_sibling_map(0);
996 1030
997 if (smp_sanity_check(max_cpus) < 0) { 1031 if (smp_sanity_check(max_cpus) < 0) {
998 printk(KERN_INFO "SMP disabled\n"); 1032 printk(KERN_INFO "SMP disabled\n");
@@ -1036,8 +1070,6 @@ void __init smp_prepare_boot_cpu(void)
1036 int me = smp_processor_id(); 1070 int me = smp_processor_id();
1037 cpu_set(me, cpu_online_map); 1071 cpu_set(me, cpu_online_map);
1038 cpu_set(me, cpu_callout_map); 1072 cpu_set(me, cpu_callout_map);
1039 cpu_set(0, cpu_sibling_map[0]);
1040 cpu_set(0, cpu_core_map[0]);
1041 per_cpu(cpu_state, me) = CPU_ONLINE; 1073 per_cpu(cpu_state, me) = CPU_ONLINE;
1042} 1074}
1043 1075
@@ -1106,15 +1138,24 @@ void __init smp_cpus_done(unsigned int max_cpus)
1106static void remove_siblinginfo(int cpu) 1138static void remove_siblinginfo(int cpu)
1107{ 1139{
1108 int sibling; 1140 int sibling;
1141 struct cpuinfo_x86 *c = cpu_data;
1109 1142
1143 for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
1144 cpu_clear(cpu, cpu_core_map[sibling]);
1145 /*
1146 * last thread sibling in this cpu core going down
1147 */
1148 if (cpus_weight(cpu_sibling_map[cpu]) == 1)
1149 c[sibling].booted_cores--;
1150 }
1151
1110 for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) 1152 for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
1111 cpu_clear(cpu, cpu_sibling_map[sibling]); 1153 cpu_clear(cpu, cpu_sibling_map[sibling]);
1112 for_each_cpu_mask(sibling, cpu_core_map[cpu])
1113 cpu_clear(cpu, cpu_core_map[sibling]);
1114 cpus_clear(cpu_sibling_map[cpu]); 1154 cpus_clear(cpu_sibling_map[cpu]);
1115 cpus_clear(cpu_core_map[cpu]); 1155 cpus_clear(cpu_core_map[cpu]);
1116 phys_proc_id[cpu] = BAD_APICID; 1156 phys_proc_id[cpu] = BAD_APICID;
1117 cpu_core_id[cpu] = BAD_APICID; 1157 cpu_core_id[cpu] = BAD_APICID;
1158 cpu_clear(cpu, cpu_sibling_setup_map);
1118} 1159}
1119 1160
1120void remove_cpu_from_maps(void) 1161void remove_cpu_from_maps(void)
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 0a4ec764377..9cd4a05234a 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -65,7 +65,9 @@ struct cpuinfo_x86 {
65 int f00f_bug; 65 int f00f_bug;
66 int coma_bug; 66 int coma_bug;
67 unsigned long loops_per_jiffy; 67 unsigned long loops_per_jiffy;
68 unsigned char x86_num_cores; 68 unsigned char x86_max_cores; /* cpuid returned max cores value */
69 unsigned char booted_cores; /* number of cores as seen by OS */
70 unsigned char apicid;
69} __attribute__((__aligned__(SMP_CACHE_BYTES))); 71} __attribute__((__aligned__(SMP_CACHE_BYTES)));
70 72
71#define X86_VENDOR_INTEL 0 73#define X86_VENDOR_INTEL 0
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index 03837d34fba..4861246548f 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -61,10 +61,12 @@ struct cpuinfo_x86 {
61 int x86_cache_alignment; 61 int x86_cache_alignment;
62 int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ 62 int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
63 __u8 x86_virt_bits, x86_phys_bits; 63 __u8 x86_virt_bits, x86_phys_bits;
64 __u8 x86_num_cores; 64 __u8 x86_max_cores; /* cpuid returned max cores value */
65 __u32 x86_power; 65 __u32 x86_power;
66 __u32 extended_cpuid_level; /* Max extended CPUID function supported */ 66 __u32 extended_cpuid_level; /* Max extended CPUID function supported */
67 unsigned long loops_per_jiffy; 67 unsigned long loops_per_jiffy;
68 __u8 apicid;
69 __u8 booted_cores; /* number of cores as seen by OS */
68} ____cacheline_aligned; 70} ____cacheline_aligned;
69 71
70#define X86_VENDOR_INTEL 0 72#define X86_VENDOR_INTEL 0
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cb3c3ef50f5..38c2fb7ebe0 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count)
84 return order; /* We could be slightly more clever with -1 here... */ 84 return order; /* We could be slightly more clever with -1 here... */
85} 85}
86 86
87static __inline__ int get_count_order(unsigned int count)
88{
89 int order;
90
91 order = fls(count) - 1;
92 if (count & (count - 1))
93 order++;
94 return order;
95}
96
87/* 97/*
88 * hweightN: returns the hamming weight (i.e. the number 98 * hweightN: returns the hamming weight (i.e. the number
89 * of bits set) of a N-bit word 99 * of bits set) of a N-bit word