diff options
Diffstat (limited to 'arch/x86/kernel/cpu/amd.c')
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 167 |
1 files changed, 121 insertions, 46 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ba5f62f45f01..b13ed393dfce 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
148 | { | 148 | { |
149 | #ifdef CONFIG_SMP | 149 | #ifdef CONFIG_SMP |
150 | /* calling is from identify_secondary_cpu() ? */ | 150 | /* calling is from identify_secondary_cpu() ? */ |
151 | if (c->cpu_index == boot_cpu_id) | 151 | if (!c->cpu_index) |
152 | return; | 152 | return; |
153 | 153 | ||
154 | /* | 154 | /* |
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | |||
233 | } | 233 | } |
234 | #endif | 234 | #endif |
235 | 235 | ||
236 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 236 | #ifdef CONFIG_NUMA |
237 | /* | ||
238 | * To workaround broken NUMA config. Read the comment in | ||
239 | * srat_detect_node(). | ||
240 | */ | ||
237 | static int __cpuinit nearby_node(int apicid) | 241 | static int __cpuinit nearby_node(int apicid) |
238 | { | 242 | { |
239 | int i, node; | 243 | int i, node; |
240 | 244 | ||
241 | for (i = apicid - 1; i >= 0; i--) { | 245 | for (i = apicid - 1; i >= 0; i--) { |
242 | node = apicid_to_node[i]; | 246 | node = __apicid_to_node[i]; |
243 | if (node != NUMA_NO_NODE && node_online(node)) | 247 | if (node != NUMA_NO_NODE && node_online(node)) |
244 | return node; | 248 | return node; |
245 | } | 249 | } |
246 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | 250 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { |
247 | node = apicid_to_node[i]; | 251 | node = __apicid_to_node[i]; |
248 | if (node != NUMA_NO_NODE && node_online(node)) | 252 | if (node != NUMA_NO_NODE && node_online(node)) |
249 | return node; | 253 | return node; |
250 | } | 254 | } |
@@ -253,37 +257,55 @@ static int __cpuinit nearby_node(int apicid) | |||
253 | #endif | 257 | #endif |
254 | 258 | ||
255 | /* | 259 | /* |
256 | * Fixup core topology information for AMD multi-node processors. | 260 | * Fixup core topology information for |
257 | * Assumption: Number of cores in each internal node is the same. | 261 | * (1) AMD multi-node processors |
262 | * Assumption: Number of cores in each internal node is the same. | ||
263 | * (2) AMD processors supporting compute units | ||
258 | */ | 264 | */ |
259 | #ifdef CONFIG_X86_HT | 265 | #ifdef CONFIG_X86_HT |
260 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | 266 | static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) |
261 | { | 267 | { |
262 | unsigned long long value; | 268 | u32 nodes, cores_per_cu = 1; |
263 | u32 nodes, cores_per_node; | 269 | u8 node_id; |
264 | int cpu = smp_processor_id(); | 270 | int cpu = smp_processor_id(); |
265 | 271 | ||
266 | if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) | 272 | /* get information required for multi-node processors */ |
267 | return; | 273 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
268 | 274 | u32 eax, ebx, ecx, edx; | |
269 | /* fixup topology information only once for a core */ | 275 | |
270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | 276 | cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); |
277 | nodes = ((ecx >> 8) & 7) + 1; | ||
278 | node_id = ecx & 7; | ||
279 | |||
280 | /* get compute unit information */ | ||
281 | smp_num_siblings = ((ebx >> 8) & 3) + 1; | ||
282 | c->compute_unit_id = ebx & 0xff; | ||
283 | cores_per_cu += ((ebx >> 8) & 3); | ||
284 | } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { | ||
285 | u64 value; | ||
286 | |||
287 | rdmsrl(MSR_FAM10H_NODE_ID, value); | ||
288 | nodes = ((value >> 3) & 7) + 1; | ||
289 | node_id = value & 7; | ||
290 | } else | ||
271 | return; | 291 | return; |
272 | 292 | ||
273 | rdmsrl(MSR_FAM10H_NODE_ID, value); | 293 | /* fixup multi-node processor information */ |
274 | 294 | if (nodes > 1) { | |
275 | nodes = ((value >> 3) & 7) + 1; | 295 | u32 cores_per_node; |
276 | if (nodes == 1) | 296 | u32 cus_per_node; |
277 | return; | ||
278 | 297 | ||
279 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | 298 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); |
280 | cores_per_node = c->x86_max_cores / nodes; | 299 | cores_per_node = c->x86_max_cores / nodes; |
300 | cus_per_node = cores_per_node / cores_per_cu; | ||
281 | 301 | ||
282 | /* store NodeID, use llc_shared_map to store sibling info */ | 302 | /* store NodeID, use llc_shared_map to store sibling info */ |
283 | per_cpu(cpu_llc_id, cpu) = value & 7; | 303 | per_cpu(cpu_llc_id, cpu) = node_id; |
284 | 304 | ||
285 | /* fixup core id to be in range from 0 to (cores_per_node - 1) */ | 305 | /* core id has to be in the [0 .. cores_per_node - 1] range */ |
286 | c->cpu_core_id = c->cpu_core_id % cores_per_node; | 306 | c->cpu_core_id %= cores_per_node; |
307 | c->compute_unit_id %= cus_per_node; | ||
308 | } | ||
287 | } | 309 | } |
288 | #endif | 310 | #endif |
289 | 311 | ||
@@ -304,9 +326,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
304 | c->phys_proc_id = c->initial_apicid >> bits; | 326 | c->phys_proc_id = c->initial_apicid >> bits; |
305 | /* use socket ID also for last level cache */ | 327 | /* use socket ID also for last level cache */ |
306 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | 328 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; |
307 | /* fixup topology information on multi-node processors */ | 329 | amd_get_topology(c); |
308 | if ((c->x86 == 0x10) && (c->x86_model == 9)) | ||
309 | amd_fixup_dcm(c); | ||
310 | #endif | 330 | #endif |
311 | } | 331 | } |
312 | 332 | ||
@@ -322,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id); | |||
322 | 342 | ||
323 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 343 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
324 | { | 344 | { |
325 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 345 | #ifdef CONFIG_NUMA |
326 | int cpu = smp_processor_id(); | 346 | int cpu = smp_processor_id(); |
327 | int node; | 347 | int node; |
328 | unsigned apicid = c->apicid; | 348 | unsigned apicid = c->apicid; |
329 | 349 | ||
330 | node = per_cpu(cpu_llc_id, cpu); | 350 | node = numa_cpu_node(cpu); |
351 | if (node == NUMA_NO_NODE) | ||
352 | node = per_cpu(cpu_llc_id, cpu); | ||
331 | 353 | ||
332 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
333 | node = apicid_to_node[apicid]; | ||
334 | if (!node_online(node)) { | 354 | if (!node_online(node)) { |
335 | /* Two possibilities here: | 355 | /* |
336 | - The CPU is missing memory and no node was created. | 356 | * Two possibilities here: |
337 | In that case try picking one from a nearby CPU | 357 | * |
338 | - The APIC IDs differ from the HyperTransport node IDs | 358 | * - The CPU is missing memory and no node was created. In |
339 | which the K8 northbridge parsing fills in. | 359 | * that case try picking one from a nearby CPU. |
340 | Assume they are all increased by a constant offset, | 360 | * |
341 | but in the same order as the HT nodeids. | 361 | * - The APIC IDs differ from the HyperTransport node IDs |
342 | If that doesn't result in a usable node fall back to the | 362 | * which the K8 northbridge parsing fills in. Assume |
343 | path for the previous case. */ | 363 | * they are all increased by a constant offset, but in |
344 | 364 | * the same order as the HT nodeids. If that doesn't | |
365 | * result in a usable node fall back to the path for the | ||
366 | * previous case. | ||
367 | * | ||
368 | * This workaround operates directly on the mapping between | ||
369 | * APIC ID and NUMA node, assuming certain relationship | ||
370 | * between APIC ID, HT node ID and NUMA topology. As going | ||
371 | * through CPU mapping may alter the outcome, directly | ||
372 | * access __apicid_to_node[]. | ||
373 | */ | ||
345 | int ht_nodeid = c->initial_apicid; | 374 | int ht_nodeid = c->initial_apicid; |
346 | 375 | ||
347 | if (ht_nodeid >= 0 && | 376 | if (ht_nodeid >= 0 && |
348 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | 377 | __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) |
349 | node = apicid_to_node[ht_nodeid]; | 378 | node = __apicid_to_node[ht_nodeid]; |
350 | /* Pick a nearby node */ | 379 | /* Pick a nearby node */ |
351 | if (!node_online(node)) | 380 | if (!node_online(node)) |
352 | node = nearby_node(apicid); | 381 | node = nearby_node(apicid); |
@@ -412,6 +441,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
412 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 441 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
413 | } | 442 | } |
414 | #endif | 443 | #endif |
444 | |||
445 | /* We need to do the following only once */ | ||
446 | if (c != &boot_cpu_data) | ||
447 | return; | ||
448 | |||
449 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { | ||
450 | |||
451 | if (c->x86 > 0x10 || | ||
452 | (c->x86 == 0x10 && c->x86_model >= 0x2)) { | ||
453 | u64 val; | ||
454 | |||
455 | rdmsrl(MSR_K7_HWCR, val); | ||
456 | if (!(val & BIT(24))) | ||
457 | printk(KERN_WARNING FW_BUG "TSC doesn't count " | ||
458 | "with P0 frequency!\n"); | ||
459 | } | ||
460 | } | ||
415 | } | 461 | } |
416 | 462 | ||
417 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 463 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
@@ -523,7 +569,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
523 | #endif | 569 | #endif |
524 | 570 | ||
525 | if (c->extended_cpuid_level >= 0x80000006) { | 571 | if (c->extended_cpuid_level >= 0x80000006) { |
526 | if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) | 572 | if (cpuid_edx(0x80000006) & 0xf000) |
527 | num_cache_leaves = 4; | 573 | num_cache_leaves = 4; |
528 | else | 574 | else |
529 | num_cache_leaves = 3; | 575 | num_cache_leaves = 3; |
@@ -565,6 +611,35 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
565 | } | 611 | } |
566 | } | 612 | } |
567 | #endif | 613 | #endif |
614 | |||
615 | /* | ||
616 | * Family 0x12 and above processors have APIC timer | ||
617 | * running in deep C states. | ||
618 | */ | ||
619 | if (c->x86 > 0x11) | ||
620 | set_cpu_cap(c, X86_FEATURE_ARAT); | ||
621 | |||
622 | /* | ||
623 | * Disable GART TLB Walk Errors on Fam10h. We do this here | ||
624 | * because this is always needed when GART is enabled, even in a | ||
625 | * kernel which has no MCE support built in. | ||
626 | */ | ||
627 | if (c->x86 == 0x10) { | ||
628 | /* | ||
629 | * BIOS should disable GartTlbWlk Errors themself. If | ||
630 | * it doesn't do it here as suggested by the BKDG. | ||
631 | * | ||
632 | * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 | ||
633 | */ | ||
634 | u64 mask; | ||
635 | int err; | ||
636 | |||
637 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); | ||
638 | if (err == 0) { | ||
639 | mask |= (1 << 10); | ||
640 | checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); | ||
641 | } | ||
642 | } | ||
568 | } | 643 | } |
569 | 644 | ||
570 | #ifdef CONFIG_X86_32 | 645 | #ifdef CONFIG_X86_32 |
@@ -639,7 +714,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383); | |||
639 | 714 | ||
640 | bool cpu_has_amd_erratum(const int *erratum) | 715 | bool cpu_has_amd_erratum(const int *erratum) |
641 | { | 716 | { |
642 | struct cpuinfo_x86 *cpu = ¤t_cpu_data; | 717 | struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info); |
643 | int osvw_id = *erratum++; | 718 | int osvw_id = *erratum++; |
644 | u32 range; | 719 | u32 range; |
645 | u32 ms; | 720 | u32 ms; |