aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c13
-rw-r--r--arch/x86/kernel/cpu/common.c50
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c221
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c60
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c93
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile9
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c272
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h38
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd_64.c)0
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c250
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c248
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c129
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c3
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c445
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c17
23 files changed, 1100 insertions, 839 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3efcb2b96a15..c1f253dac155 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg 7CFLAGS_REMOVE_common.o = -pg
8endif 8endif
9 9
10# Make sure load_percpu_segment has no stackprotector
11nostackp := $(call cc-option, -fno-stack-protector)
12CFLAGS_common.o := $(nostackp)
13
10obj-y := intel_cacheinfo.o addon_cpuid_features.o 14obj-y := intel_cacheinfo.o addon_cpuid_features.o
11obj-y += proc.o capflags.o powerflags.o common.o 15obj-y += proc.o capflags.o powerflags.o common.o
12obj-y += vmware.o hypervisor.o 16obj-y += vmware.o hypervisor.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e5b27d8f1b47..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
258{ 258{
259#ifdef CONFIG_X86_HT 259#ifdef CONFIG_X86_HT
260 unsigned bits; 260 unsigned bits;
261 int cpu = smp_processor_id();
261 262
262 bits = c->x86_coreid_bits; 263 bits = c->x86_coreid_bits;
263
264 /* Low order bits define the core id (index of core in socket) */ 264 /* Low order bits define the core id (index of core in socket) */
265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
266 /* Convert the initial APIC ID into the socket ID */ 266 /* Convert the initial APIC ID into the socket ID */
267 c->phys_proc_id = c->initial_apicid >> bits; 267 c->phys_proc_id = c->initial_apicid >> bits;
268 /* use socket ID also for last level cache */
269 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
268#endif 270#endif
269} 271}
270 272
@@ -354,7 +356,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
354#endif 356#endif
355#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 357#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
356 /* check CPU config space for extended APIC ID */ 358 /* check CPU config space for extended APIC ID */
357 if (c->x86 >= 0xf) { 359 if (cpu_has_apic && c->x86 >= 0xf) {
358 unsigned int val; 360 unsigned int val;
359 val = read_pci_config(0, 24, 0, 0x68); 361 val = read_pci_config(0, 24, 0, 0x68);
360 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) 362 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
@@ -398,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
398 level = cpuid_eax(1); 400 level = cpuid_eax(1);
399 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
400 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
401 } 410 }
402 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
403 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9fa33886c0d7..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -108,7 +131,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
108 /* data */ 131 /* data */
109 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 132 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
110 133
111 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 134 [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
112 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 135 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
113 GDT_STACK_CANARY_INIT 136 GDT_STACK_CANARY_INIT
114#endif 137#endif
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index cf52215d9eb1..2a50ef891000 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,3 +1,4 @@
1
1/* 2/*
2 * (c) 2003-2006 Advanced Micro Devices, Inc. 3 * (c) 2003-2006 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the 4 * Your use of this code is subject to the terms and conditions of the
@@ -117,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
117 u32 i = 0; 118 u32 i = 0;
118 119
119 if (cpu_family == CPU_HW_PSTATE) { 120 if (cpu_family == CPU_HW_PSTATE) {
120 if (data->currpstate == HW_PSTATE_INVALID) { 121 rdmsr(MSR_PSTATE_STATUS, lo, hi);
121 /* read (initial) hw pstate if not yet set */ 122 i = lo & HW_PSTATE_MASK;
122 rdmsr(MSR_PSTATE_STATUS, lo, hi); 123 data->currpstate = i;
123 i = lo & HW_PSTATE_MASK; 124
124 125 /*
125 /* 126 * a workaround for family 11h erratum 311 might cause
126 * a workaround for family 11h erratum 311 might cause 127 * an "out-of-range Pstate if the core is in Pstate-0
127 * an "out-of-range Pstate if the core is in Pstate-0 128 */
128 */ 129 if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
129 if (i >= data->numps) 130 data->currpstate = HW_PSTATE_0;
130 data->currpstate = HW_PSTATE_0; 131
131 else
132 data->currpstate = i;
133 }
134 return 0; 132 return 0;
135 } 133 }
136 do { 134 do {
@@ -301,7 +299,7 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
301static int transition_fid_vid(struct powernow_k8_data *data, 299static int transition_fid_vid(struct powernow_k8_data *data,
302 u32 reqfid, u32 reqvid) 300 u32 reqfid, u32 reqvid)
303{ 301{
304 if (core_voltage_pre_transition(data, reqvid)) 302 if (core_voltage_pre_transition(data, reqvid, reqfid))
305 return 1; 303 return 1;
306 304
307 if (core_frequency_transition(data, reqfid)) 305 if (core_frequency_transition(data, reqfid))
@@ -329,17 +327,20 @@ static int transition_fid_vid(struct powernow_k8_data *data,
329 327
330/* Phase 1 - core voltage transition ... setup voltage */ 328/* Phase 1 - core voltage transition ... setup voltage */
331static int core_voltage_pre_transition(struct powernow_k8_data *data, 329static int core_voltage_pre_transition(struct powernow_k8_data *data,
332 u32 reqvid) 330 u32 reqvid, u32 reqfid)
333{ 331{
334 u32 rvosteps = data->rvo; 332 u32 rvosteps = data->rvo;
335 u32 savefid = data->currfid; 333 u32 savefid = data->currfid;
336 u32 maxvid, lo; 334 u32 maxvid, lo, rvomult = 1;
337 335
338 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " 336 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
339 "reqvid 0x%x, rvo 0x%x\n", 337 "reqvid 0x%x, rvo 0x%x\n",
340 smp_processor_id(), 338 smp_processor_id(),
341 data->currfid, data->currvid, reqvid, data->rvo); 339 data->currfid, data->currvid, reqvid, data->rvo);
342 340
341 if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
342 rvomult = 2;
343 rvosteps *= rvomult;
343 rdmsr(MSR_FIDVID_STATUS, lo, maxvid); 344 rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
344 maxvid = 0x1f & (maxvid >> 16); 345 maxvid = 0x1f & (maxvid >> 16);
345 dprintk("ph1 maxvid=0x%x\n", maxvid); 346 dprintk("ph1 maxvid=0x%x\n", maxvid);
@@ -353,7 +354,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
353 return 1; 354 return 1;
354 } 355 }
355 356
356 while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { 357 while ((rvosteps > 0) &&
358 ((rvomult * data->rvo + data->currvid) > reqvid)) {
357 if (data->currvid == maxvid) { 359 if (data->currvid == maxvid) {
358 rvosteps = 0; 360 rvosteps = 0;
359 } else { 361 } else {
@@ -386,13 +388,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
386 u32 vcoreqfid, vcocurrfid, vcofiddiff; 388 u32 vcoreqfid, vcocurrfid, vcofiddiff;
387 u32 fid_interval, savevid = data->currvid; 389 u32 fid_interval, savevid = data->currvid;
388 390
389 if ((reqfid < HI_FID_TABLE_BOTTOM) &&
390 (data->currfid < HI_FID_TABLE_BOTTOM)) {
391 printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
392 "0x%x 0x%x\n", reqfid, data->currfid);
393 return 1;
394 }
395
396 if (data->currfid == reqfid) { 391 if (data->currfid == reqfid) {
397 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", 392 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
398 data->currfid); 393 data->currfid);
@@ -409,6 +404,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
409 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid 404 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
410 : vcoreqfid - vcocurrfid; 405 : vcoreqfid - vcocurrfid;
411 406
407 if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
408 vcofiddiff = 0;
409
412 while (vcofiddiff > 2) { 410 while (vcofiddiff > 2) {
413 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); 411 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
414 412
@@ -510,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
510 return 0; 508 return 0;
511} 509}
512 510
513static int check_supported_cpu(unsigned int cpu) 511static void check_supported_cpu(void *_rc)
514{ 512{
515 cpumask_t oldmask;
516 u32 eax, ebx, ecx, edx; 513 u32 eax, ebx, ecx, edx;
517 unsigned int rc = 0; 514 int *rc = _rc;
518 515
519 oldmask = current->cpus_allowed; 516 *rc = -ENODEV;
520 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
521
522 if (smp_processor_id() != cpu) {
523 printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu);
524 goto out;
525 }
526 517
527 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) 518 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
528 goto out; 519 return;
529 520
530 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 521 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
531 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && 522 if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
532 ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) 523 ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
533 goto out; 524 return;
534 525
535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 526 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 527 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { 528 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
538 printk(KERN_INFO PFX 529 printk(KERN_INFO PFX
539 "Processor cpuid %x not supported\n", eax); 530 "Processor cpuid %x not supported\n", eax);
540 goto out; 531 return;
541 } 532 }
542 533
543 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); 534 eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
544 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { 535 if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
545 printk(KERN_INFO PFX 536 printk(KERN_INFO PFX
546 "No frequency change capabilities detected\n"); 537 "No frequency change capabilities detected\n");
547 goto out; 538 return;
548 } 539 }
549 540
550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 541 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
@@ -552,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu)
552 != P_STATE_TRANSITION_CAPABLE) { 543 != P_STATE_TRANSITION_CAPABLE) {
553 printk(KERN_INFO PFX 544 printk(KERN_INFO PFX
554 "Power state transitions not supported\n"); 545 "Power state transitions not supported\n");
555 goto out; 546 return;
556 } 547 }
557 } else { /* must be a HW Pstate capable processor */ 548 } else { /* must be a HW Pstate capable processor */
558 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 549 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
559 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) 550 if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
560 cpu_family = CPU_HW_PSTATE; 551 cpu_family = CPU_HW_PSTATE;
561 else 552 else
562 goto out; 553 return;
563 } 554 }
564 555
565 rc = 1; 556 *rc = 0;
566
567out:
568 set_cpus_allowed_ptr(current, &oldmask);
569 return rc;
570} 557}
571 558
572static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, 559static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
@@ -823,13 +810,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
823 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 810 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
824 return; 811 return;
825 812
826 control = data->acpi_data.states[index].control; data->irt = (control 813 control = data->acpi_data.states[index].control;
827 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> 814 data->irt = (control >> IRT_SHIFT) & IRT_MASK;
828 RVO_SHIFT) & RVO_MASK; data->exttype = (control 815 data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
829 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 816 data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
830 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 817 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
831 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = 818 data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
832 (control >> VST_SHIFT) & VST_MASK; } 819 data->vstable = (control >> VST_SHIFT) & VST_MASK;
820}
833 821
834static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 822static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
835{ 823{
@@ -1046,6 +1034,19 @@ static int get_transition_latency(struct powernow_k8_data *data)
1046 if (cur_latency > max_latency) 1034 if (cur_latency > max_latency)
1047 max_latency = cur_latency; 1035 max_latency = cur_latency;
1048 } 1036 }
1037 if (max_latency == 0) {
1038 /*
1039 * Fam 11h always returns 0 as transition latency.
1040 * This is intended and means "very fast". While cpufreq core
1041 * and governors currently can handle that gracefully, better
1042 * set it to 1 to avoid problems in the future.
1043 * For all others it's a BIOS bug.
1044 */
1045 if (!boot_cpu_data.x86 == 0x11)
1046 printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1047 "latency\n");
1048 max_latency = 1;
1049 }
1049 /* value in usecs, needs to be in nanoseconds */ 1050 /* value in usecs, needs to be in nanoseconds */
1050 return 1000 * max_latency; 1051 return 1000 * max_latency;
1051} 1052}
@@ -1080,20 +1081,12 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1080 return 0; 1081 return 0;
1081 } 1082 }
1082 1083
1083 if ((fid < HI_FID_TABLE_BOTTOM) &&
1084 (data->currfid < HI_FID_TABLE_BOTTOM)) {
1085 printk(KERN_ERR PFX
1086 "ignoring illegal change in lo freq table-%x to 0x%x\n",
1087 data->currfid, fid);
1088 return 1;
1089 }
1090
1091 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", 1084 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1092 smp_processor_id(), fid, vid); 1085 smp_processor_id(), fid, vid);
1093 freqs.old = find_khz_freq_from_fid(data->currfid); 1086 freqs.old = find_khz_freq_from_fid(data->currfid);
1094 freqs.new = find_khz_freq_from_fid(fid); 1087 freqs.new = find_khz_freq_from_fid(fid);
1095 1088
1096 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1089 for_each_cpu(i, data->available_cores) {
1097 freqs.cpu = i; 1090 freqs.cpu = i;
1098 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1091 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1099 } 1092 }
@@ -1101,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1101 res = transition_fid_vid(data, fid, vid); 1094 res = transition_fid_vid(data, fid, vid);
1102 freqs.new = find_khz_freq_from_fid(data->currfid); 1095 freqs.new = find_khz_freq_from_fid(data->currfid);
1103 1096
1104 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1097 for_each_cpu(i, data->available_cores) {
1105 freqs.cpu = i; 1098 freqs.cpu = i;
1106 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1099 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1107 } 1100 }
@@ -1126,7 +1119,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1126 data->currpstate); 1119 data->currpstate);
1127 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1120 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1128 1121
1129 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1122 for_each_cpu(i, data->available_cores) {
1130 freqs.cpu = i; 1123 freqs.cpu = i;
1131 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 1124 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1132 } 1125 }
@@ -1134,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1134 res = transition_pstate(data, pstate); 1127 res = transition_pstate(data, pstate);
1135 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1128 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1136 1129
1137 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1130 for_each_cpu(i, data->available_cores) {
1138 freqs.cpu = i; 1131 freqs.cpu = i;
1139 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 1132 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1140 } 1133 }
@@ -1235,21 +1228,47 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1235 return cpufreq_frequency_table_verify(pol, data->powernow_table); 1228 return cpufreq_frequency_table_verify(pol, data->powernow_table);
1236} 1229}
1237 1230
1238static const char ACPI_PSS_BIOS_BUG_MSG[] = 1231struct init_on_cpu {
1239 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" 1232 struct powernow_k8_data *data;
1240 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; 1233 int rc;
1234};
1235
1236static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1237{
1238 struct init_on_cpu *init_on_cpu = _init_on_cpu;
1239
1240 if (pending_bit_stuck()) {
1241 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1242 init_on_cpu->rc = -ENODEV;
1243 return;
1244 }
1245
1246 if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1247 init_on_cpu->rc = -ENODEV;
1248 return;
1249 }
1250
1251 if (cpu_family == CPU_OPTERON)
1252 fidvid_msr_init();
1253
1254 init_on_cpu->rc = 0;
1255}
1241 1256
1242/* per CPU init entry point to the driver */ 1257/* per CPU init entry point to the driver */
1243static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1258static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1244{ 1259{
1260 static const char ACPI_PSS_BIOS_BUG_MSG[] =
1261 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1262 FW_BUG PFX "Try again with latest BIOS.\n";
1245 struct powernow_k8_data *data; 1263 struct powernow_k8_data *data;
1246 cpumask_t oldmask; 1264 struct init_on_cpu init_on_cpu;
1247 int rc; 1265 int rc;
1248 1266
1249 if (!cpu_online(pol->cpu)) 1267 if (!cpu_online(pol->cpu))
1250 return -ENODEV; 1268 return -ENODEV;
1251 1269
1252 if (!check_supported_cpu(pol->cpu)) 1270 smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1271 if (rc)
1253 return -ENODEV; 1272 return -ENODEV;
1254 1273
1255 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); 1274 data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
@@ -1289,27 +1308,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1289 pol->cpuinfo.transition_latency = get_transition_latency(data); 1308 pol->cpuinfo.transition_latency = get_transition_latency(data);
1290 1309
1291 /* only run on specific CPU from here on */ 1310 /* only run on specific CPU from here on */
1292 oldmask = current->cpus_allowed; 1311 init_on_cpu.data = data;
1293 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1312 smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1294 1313 &init_on_cpu, 1);
1295 if (smp_processor_id() != pol->cpu) { 1314 rc = init_on_cpu.rc;
1296 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1315 if (rc != 0)
1297 goto err_out_unmask; 1316 goto err_out_exit_acpi;
1298 }
1299
1300 if (pending_bit_stuck()) {
1301 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1302 goto err_out_unmask;
1303 }
1304
1305 if (query_current_values_with_pending_wait(data))
1306 goto err_out_unmask;
1307
1308 if (cpu_family == CPU_OPTERON)
1309 fidvid_msr_init();
1310
1311 /* run on any CPU again */
1312 set_cpus_allowed_ptr(current, &oldmask);
1313 1317
1314 if (cpu_family == CPU_HW_PSTATE) 1318 if (cpu_family == CPU_HW_PSTATE)
1315 cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); 1319 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
@@ -1346,8 +1350,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1346 1350
1347 return 0; 1351 return 0;
1348 1352
1349err_out_unmask: 1353err_out_exit_acpi:
1350 set_cpus_allowed_ptr(current, &oldmask);
1351 powernow_k8_cpu_exit_acpi(data); 1354 powernow_k8_cpu_exit_acpi(data);
1352 1355
1353err_out: 1356err_out:
@@ -1372,28 +1375,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1372 return 0; 1375 return 0;
1373} 1376}
1374 1377
1378static void query_values_on_cpu(void *_err)
1379{
1380 int *err = _err;
1381 struct powernow_k8_data *data = __get_cpu_var(powernow_data);
1382
1383 *err = query_current_values_with_pending_wait(data);
1384}
1385
1375static unsigned int powernowk8_get(unsigned int cpu) 1386static unsigned int powernowk8_get(unsigned int cpu)
1376{ 1387{
1377 struct powernow_k8_data *data; 1388 struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1378 cpumask_t oldmask = current->cpus_allowed;
1379 unsigned int khz = 0; 1389 unsigned int khz = 0;
1380 unsigned int first; 1390 int err;
1381
1382 first = cpumask_first(cpu_core_mask(cpu));
1383 data = per_cpu(powernow_data, first);
1384 1391
1385 if (!data) 1392 if (!data)
1386 return -EINVAL; 1393 return -EINVAL;
1387 1394
1388 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 1395 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1389 if (smp_processor_id() != cpu) { 1396 if (err)
1390 printk(KERN_ERR PFX
1391 "limiting to CPU %d failed in powernowk8_get\n", cpu);
1392 set_cpus_allowed_ptr(current, &oldmask);
1393 return 0;
1394 }
1395
1396 if (query_current_values_with_pending_wait(data))
1397 goto out; 1397 goto out;
1398 1398
1399 if (cpu_family == CPU_HW_PSTATE) 1399 if (cpu_family == CPU_HW_PSTATE)
@@ -1404,7 +1404,6 @@ static unsigned int powernowk8_get(unsigned int cpu)
1404 1404
1405 1405
1406out: 1406out:
1407 set_cpus_allowed_ptr(current, &oldmask);
1408 return khz; 1407 return khz;
1409} 1408}
1410 1409
@@ -1430,7 +1429,9 @@ static int __cpuinit powernowk8_init(void)
1430 unsigned int i, supported_cpus = 0; 1429 unsigned int i, supported_cpus = 0;
1431 1430
1432 for_each_online_cpu(i) { 1431 for_each_online_cpu(i) {
1433 if (check_supported_cpu(i)) 1432 int rc;
1433 smp_call_function_single(i, check_supported_cpu, &rc, 1);
1434 if (rc == 0)
1434 supported_cpus++; 1435 supported_cpus++;
1435 } 1436 }
1436 1437
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 6c6698feade1..02ce824073cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -215,7 +215,8 @@ struct pst_s {
215 215
216#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) 216#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
217 217
218static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); 218static int core_voltage_pre_transition(struct powernow_k8_data *data,
219 u32 reqvid, u32 regfid);
219static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); 220static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
220static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); 221static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
221 222
@@ -223,14 +224,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
223 224
224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 225static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 226static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
226
227#ifdef CONFIG_SMP
228static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
229{
230}
231#else
232static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
233{
234 cpu_set(0, cpu_sharedcore_mask[0]);
235}
236#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 55c831ed71ce..8d672ef162ce 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu)
323{ 323{
324 unsigned l, h; 324 unsigned l, h;
325 unsigned clock_freq; 325 unsigned clock_freq;
326 cpumask_t saved_mask;
327 326
328 saved_mask = current->cpus_allowed; 327 rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
329 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
330 if (smp_processor_id() != cpu)
331 return 0;
332
333 rdmsr(MSR_IA32_PERF_STATUS, l, h);
334 clock_freq = extract_clock(l, cpu, 0); 328 clock_freq = extract_clock(l, cpu, 0);
335 329
336 if (unlikely(clock_freq == 0)) { 330 if (unlikely(clock_freq == 0)) {
@@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu)
340 * P-state transition (like TM2). Get the last freq set 334 * P-state transition (like TM2). Get the last freq set
341 * in PERF_CTL. 335 * in PERF_CTL.
342 */ 336 */
343 rdmsr(MSR_IA32_PERF_CTL, l, h); 337 rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
344 clock_freq = extract_clock(l, cpu, 1); 338 clock_freq = extract_clock(l, cpu, 1);
345 } 339 }
346
347 set_cpus_allowed_ptr(current, &saved_mask);
348 return clock_freq; 340 return clock_freq;
349} 341}
350 342
@@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy,
467 struct cpufreq_freqs freqs; 459 struct cpufreq_freqs freqs;
468 int retval = 0; 460 int retval = 0;
469 unsigned int j, k, first_cpu, tmp; 461 unsigned int j, k, first_cpu, tmp;
470 cpumask_var_t saved_mask, covered_cpus; 462 cpumask_var_t covered_cpus;
471 463
472 if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) 464 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
473 return -ENOMEM;
474 if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
475 free_cpumask_var(saved_mask);
476 return -ENOMEM; 465 return -ENOMEM;
477 }
478 cpumask_copy(saved_mask, &current->cpus_allowed);
479 466
480 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { 467 if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
481 retval = -ENODEV; 468 retval = -ENODEV;
@@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy,
493 480
494 first_cpu = 1; 481 first_cpu = 1;
495 for_each_cpu(j, policy->cpus) { 482 for_each_cpu(j, policy->cpus) {
496 const struct cpumask *mask; 483 int good_cpu;
497 484
498 /* cpufreq holds the hotplug lock, so we are safe here */ 485 /* cpufreq holds the hotplug lock, so we are safe here */
499 if (!cpu_online(j)) 486 if (!cpu_online(j))
@@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy,
504 * Make sure we are running on CPU that wants to change freq 491 * Make sure we are running on CPU that wants to change freq
505 */ 492 */
506 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) 493 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
507 mask = policy->cpus; 494 good_cpu = cpumask_any_and(policy->cpus,
495 cpu_online_mask);
508 else 496 else
509 mask = cpumask_of(j); 497 good_cpu = j;
510 498
511 set_cpus_allowed_ptr(current, mask); 499 if (good_cpu >= nr_cpu_ids) {
512 preempt_disable();
513 if (unlikely(!cpu_isset(smp_processor_id(), *mask))) {
514 dprintk("couldn't limit to CPUs in this domain\n"); 500 dprintk("couldn't limit to CPUs in this domain\n");
515 retval = -EAGAIN; 501 retval = -EAGAIN;
516 if (first_cpu) { 502 if (first_cpu) {
517 /* We haven't started the transition yet. */ 503 /* We haven't started the transition yet. */
518 goto migrate_end; 504 goto out;
519 } 505 }
520 preempt_enable();
521 break; 506 break;
522 } 507 }
523 508
524 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; 509 msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
525 510
526 if (first_cpu) { 511 if (first_cpu) {
527 rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); 512 rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
528 if (msr == (oldmsr & 0xffff)) { 513 if (msr == (oldmsr & 0xffff)) {
529 dprintk("no change needed - msr was and needs " 514 dprintk("no change needed - msr was and needs "
530 "to be %x\n", oldmsr); 515 "to be %x\n", oldmsr);
531 retval = 0; 516 retval = 0;
532 goto migrate_end; 517 goto out;
533 } 518 }
534 519
535 freqs.old = extract_clock(oldmsr, cpu, 0); 520 freqs.old = extract_clock(oldmsr, cpu, 0);
@@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy,
553 oldmsr |= msr; 538 oldmsr |= msr;
554 } 539 }
555 540
556 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); 541 wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
557 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 542 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
558 preempt_enable();
559 break; 543 break;
560 }
561 544
562 cpu_set(j, *covered_cpus); 545 cpumask_set_cpu(j, covered_cpus);
563 preempt_enable();
564 } 546 }
565 547
566 for_each_cpu(k, policy->cpus) { 548 for_each_cpu(k, policy->cpus) {
@@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy,
578 * Best effort undo.. 560 * Best effort undo..
579 */ 561 */
580 562
581 for_each_cpu_mask_nr(j, *covered_cpus) { 563 for_each_cpu(j, covered_cpus)
582 set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); 564 wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
583 wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
584 }
585 565
586 tmp = freqs.new; 566 tmp = freqs.new;
587 freqs.new = freqs.old; 567 freqs.new = freqs.old;
@@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy,
593 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 573 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
594 } 574 }
595 } 575 }
596 set_cpus_allowed_ptr(current, saved_mask);
597 retval = 0; 576 retval = 0;
598 goto out;
599 577
600migrate_end:
601 preempt_enable();
602 set_cpus_allowed_ptr(current, saved_mask);
603out: 578out:
604 free_cpumask_var(saved_mask);
605 free_cpumask_var(covered_cpus); 579 free_cpumask_var(covered_cpus);
606 return retval; 580 return retval;
607} 581}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 016c1a4fa3fc..6911e91fb4f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -89,7 +89,8 @@ static int speedstep_find_register(void)
89 * speedstep_set_state - set the SpeedStep state 89 * speedstep_set_state - set the SpeedStep state
90 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 90 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
91 * 91 *
92 * Tries to change the SpeedStep state. 92 * Tries to change the SpeedStep state. Can be called from
93 * smp_call_function_single.
93 */ 94 */
94static void speedstep_set_state(unsigned int state) 95static void speedstep_set_state(unsigned int state)
95{ 96{
@@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state)
143 return; 144 return;
144} 145}
145 146
147/* Wrapper for smp_call_function_single. */
148static void _speedstep_set_state(void *_state)
149{
150 speedstep_set_state(*(unsigned int *)_state);
151}
146 152
147/** 153/**
148 * speedstep_activate - activate SpeedStep control in the chipset 154 * speedstep_activate - activate SpeedStep control in the chipset
@@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void)
226 return 0; 232 return 0;
227} 233}
228 234
229static unsigned int _speedstep_get(const struct cpumask *cpus) 235struct get_freq_data {
230{
231 unsigned int speed; 236 unsigned int speed;
232 cpumask_t cpus_allowed; 237 unsigned int processor;
233 238};
234 cpus_allowed = current->cpus_allowed; 239
235 set_cpus_allowed_ptr(current, cpus); 240static void get_freq_data(void *_data)
236 speed = speedstep_get_frequency(speedstep_processor); 241{
237 set_cpus_allowed_ptr(current, &cpus_allowed); 242 struct get_freq_data *data = _data;
238 dprintk("detected %u kHz as current frequency\n", speed); 243
239 return speed; 244 data->speed = speedstep_get_frequency(data->processor);
240} 245}
241 246
242static unsigned int speedstep_get(unsigned int cpu) 247static unsigned int speedstep_get(unsigned int cpu)
243{ 248{
244 return _speedstep_get(cpumask_of(cpu)); 249 struct get_freq_data data = { .processor = cpu };
250
251 /* You're supposed to ensure CPU is online. */
252 if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0)
253 BUG();
254
255 dprintk("detected %u kHz as current frequency\n", data.speed);
256 return data.speed;
245} 257}
246 258
247/** 259/**
@@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy,
257 unsigned int target_freq, 269 unsigned int target_freq,
258 unsigned int relation) 270 unsigned int relation)
259{ 271{
260 unsigned int newstate = 0; 272 unsigned int newstate = 0, policy_cpu;
261 struct cpufreq_freqs freqs; 273 struct cpufreq_freqs freqs;
262 cpumask_t cpus_allowed;
263 int i; 274 int i;
264 275
265 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], 276 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
266 target_freq, relation, &newstate)) 277 target_freq, relation, &newstate))
267 return -EINVAL; 278 return -EINVAL;
268 279
269 freqs.old = _speedstep_get(policy->cpus); 280 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
281 freqs.old = speedstep_get(policy_cpu);
270 freqs.new = speedstep_freqs[newstate].frequency; 282 freqs.new = speedstep_freqs[newstate].frequency;
271 freqs.cpu = policy->cpu; 283 freqs.cpu = policy->cpu;
272 284
@@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy,
276 if (freqs.old == freqs.new) 288 if (freqs.old == freqs.new)
277 return 0; 289 return 0;
278 290
279 cpus_allowed = current->cpus_allowed;
280
281 for_each_cpu(i, policy->cpus) { 291 for_each_cpu(i, policy->cpus) {
282 freqs.cpu = i; 292 freqs.cpu = i;
283 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 293 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
284 } 294 }
285 295
286 /* switch to physical CPU where state is to be changed */ 296 smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
287 set_cpus_allowed_ptr(current, policy->cpus); 297 true);
288
289 speedstep_set_state(newstate);
290
291 /* allow to be run on all CPUs */
292 set_cpus_allowed_ptr(current, &cpus_allowed);
293 298
294 for_each_cpu(i, policy->cpus) { 299 for_each_cpu(i, policy->cpus) {
295 freqs.cpu = i; 300 freqs.cpu = i;
@@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy)
312 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 317 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
313} 318}
314 319
320struct get_freqs {
321 struct cpufreq_policy *policy;
322 int ret;
323};
324
325static void get_freqs_on_cpu(void *_get_freqs)
326{
327 struct get_freqs *get_freqs = _get_freqs;
328
329 get_freqs->ret =
330 speedstep_get_freqs(speedstep_processor,
331 &speedstep_freqs[SPEEDSTEP_LOW].frequency,
332 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
333 &get_freqs->policy->cpuinfo.transition_latency,
334 &speedstep_set_state);
335}
315 336
316static int speedstep_cpu_init(struct cpufreq_policy *policy) 337static int speedstep_cpu_init(struct cpufreq_policy *policy)
317{ 338{
318 int result = 0; 339 int result;
319 unsigned int speed; 340 unsigned int policy_cpu, speed;
320 cpumask_t cpus_allowed; 341 struct get_freqs gf;
321 342
322 /* only run on CPU to be set, or on its sibling */ 343 /* only run on CPU to be set, or on its sibling */
323#ifdef CONFIG_SMP 344#ifdef CONFIG_SMP
324 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); 345 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
325#endif 346#endif
326 347 policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
327 cpus_allowed = current->cpus_allowed;
328 set_cpus_allowed_ptr(current, policy->cpus);
329 348
330 /* detect low and high frequency and transition latency */ 349 /* detect low and high frequency and transition latency */
331 result = speedstep_get_freqs(speedstep_processor, 350 gf.policy = policy;
332 &speedstep_freqs[SPEEDSTEP_LOW].frequency, 351 smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
333 &speedstep_freqs[SPEEDSTEP_HIGH].frequency, 352 if (gf.ret)
334 &policy->cpuinfo.transition_latency, 353 return gf.ret;
335 &speedstep_set_state);
336 set_cpus_allowed_ptr(current, &cpus_allowed);
337 if (result)
338 return result;
339 354
340 /* get current speed setting */ 355 /* get current speed setting */
341 speed = _speedstep_get(policy->cpus); 356 speed = speedstep_get(policy_cpu);
342 if (!speed) 357 if (!speed)
343 return -EIO; 358 return -EIO;
344 359
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 2e3c6862657b..f4c290b8482f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void)
226} 226}
227 227
228 228
229/* Warning: may get called from smp_call_function_single. */
229unsigned int speedstep_get_frequency(unsigned int processor) 230unsigned int speedstep_get_frequency(unsigned int processor)
230{ 231{
231 switch (processor) { 232 switch (processor) {
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 45004faf67ea..188a1ca5ad2b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,12 @@
1obj-y = mce.o therm_throt.o 1obj-y = mce.o
2 2
3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o 3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o 4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
6obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o 6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
7obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o 7obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
8obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
9obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
10obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
11obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
11
12obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index 89e510424152..b945d5dbc609 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -10,10 +10,9 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* Machine Check Handler For AMD Athlon/Duron: */ 16/* Machine Check Handler For AMD Athlon/Duron: */
18static void k7_machine_check(struct pt_regs *regs, long error_code) 17static void k7_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fabba15e4558..01213048f62f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,7 +44,6 @@
44#include <asm/msr.h> 44#include <asm/msr.h>
45 45
46#include "mce-internal.h" 46#include "mce-internal.h"
47#include "mce.h"
48 47
49/* Handle unconfigured int18 (should never happen) */ 48/* Handle unconfigured int18 (should never happen) */
50static void unexpected_machine_check(struct pt_regs *regs, long error_code) 49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
57void (*machine_check_vector)(struct pt_regs *, long error_code) = 56void (*machine_check_vector)(struct pt_regs *, long error_code) =
58 unexpected_machine_check; 57 unexpected_machine_check;
59 58
60int mce_disabled; 59int mce_disabled __read_mostly;
61 60
62#ifdef CONFIG_X86_NEW_MCE 61#ifdef CONFIG_X86_NEW_MCE
63 62
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
76 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 75 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
77 * 3: never panic or SIGBUS, log all errors (for testing only) 76 * 3: never panic or SIGBUS, log all errors (for testing only)
78 */ 77 */
79static int tolerant = 1; 78static int tolerant __read_mostly = 1;
80static int banks; 79static int banks __read_mostly;
81static u64 *bank; 80static u64 *bank __read_mostly;
82static unsigned long notify_user; 81static int rip_msr __read_mostly;
83static int rip_msr; 82static int mce_bootlog __read_mostly = -1;
84static int mce_bootlog = -1; 83static int monarch_timeout __read_mostly = -1;
85static int monarch_timeout = -1; 84static int mce_panic_timeout __read_mostly;
86static int mce_panic_timeout; 85static int mce_dont_log_ce __read_mostly;
87static int mce_dont_log_ce; 86int mce_cmci_disabled __read_mostly;
88int mce_cmci_disabled; 87int mce_ignore_ce __read_mostly;
89int mce_ignore_ce; 88int mce_ser __read_mostly;
90int mce_ser; 89
91 90/* User mode helper program triggered by machine check event */
92static char trigger[128]; 91static unsigned long mce_need_notify;
93static char *trigger_argv[2] = { trigger, NULL }; 92static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL };
94 94
95static unsigned long dont_init_banks; 95static unsigned long dont_init_banks;
96 96
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
180 wmb(); 180 wmb();
181 181
182 mce->finished = 1; 182 mce->finished = 1;
183 set_bit(0, &notify_user); 183 set_bit(0, &mce_need_notify);
184} 184}
185 185
186static void print_mce(struct mce *m) 186static void print_mce(struct mce *m)
@@ -194,14 +194,14 @@ static void print_mce(struct mce *m)
194 m->cs, m->ip); 194 m->cs, m->ip);
195 if (m->cs == __KERNEL_CS) 195 if (m->cs == __KERNEL_CS)
196 print_symbol("{%s}", m->ip); 196 print_symbol("{%s}", m->ip);
197 printk("\n"); 197 printk(KERN_CONT "\n");
198 } 198 }
199 printk(KERN_EMERG "TSC %llx ", m->tsc); 199 printk(KERN_EMERG "TSC %llx ", m->tsc);
200 if (m->addr) 200 if (m->addr)
201 printk("ADDR %llx ", m->addr); 201 printk(KERN_CONT "ADDR %llx ", m->addr);
202 if (m->misc) 202 if (m->misc)
203 printk("MISC %llx ", m->misc); 203 printk(KERN_CONT "MISC %llx ", m->misc);
204 printk("\n"); 204 printk(KERN_CONT "\n");
205 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", 205 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
206 m->cpuvendor, m->cpuid, m->time, m->socketid, 206 m->cpuvendor, m->cpuid, m->time, m->socketid,
207 m->apicid); 207 m->apicid);
@@ -209,13 +209,13 @@ static void print_mce(struct mce *m)
209 209
210static void print_mce_head(void) 210static void print_mce_head(void)
211{ 211{
212 printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); 212 printk(KERN_EMERG "\nHARDWARE ERROR\n");
213} 213}
214 214
215static void print_mce_tail(void) 215static void print_mce_tail(void)
216{ 216{
217 printk(KERN_EMERG "This is not a software problem!\n" 217 printk(KERN_EMERG "This is not a software problem!\n"
218 KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); 218 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
219} 219}
220 220
221#define PANIC_TIMEOUT 5 /* 5 seconds */ 221#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
691 * in the entry order. 691 * in the entry order.
692 * TBD double check parallel CPU hotunplug 692 * TBD double check parallel CPU hotunplug
693 */ 693 */
694static int mce_start(int no_way_out, int *order) 694static int mce_start(int *no_way_out)
695{ 695{
696 int nwo; 696 int order;
697 int cpus = num_online_cpus(); 697 int cpus = num_online_cpus();
698 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 698 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
699 699
700 if (!timeout) { 700 if (!timeout)
701 *order = -1; 701 return -1;
702 return no_way_out;
703 }
704 702
705 atomic_add(no_way_out, &global_nwo); 703 atomic_add(*no_way_out, &global_nwo);
704 /*
705 * global_nwo should be updated before mce_callin
706 */
707 smp_wmb();
708 order = atomic_add_return(1, &mce_callin);
706 709
707 /* 710 /*
708 * Wait for everyone. 711 * Wait for everyone.
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
710 while (atomic_read(&mce_callin) != cpus) { 713 while (atomic_read(&mce_callin) != cpus) {
711 if (mce_timed_out(&timeout)) { 714 if (mce_timed_out(&timeout)) {
712 atomic_set(&global_nwo, 0); 715 atomic_set(&global_nwo, 0);
713 *order = -1; 716 return -1;
714 return no_way_out;
715 } 717 }
716 ndelay(SPINUNIT); 718 ndelay(SPINUNIT);
717 } 719 }
718 720
719 /* 721 /*
720 * Cache the global no_way_out state. 722 * mce_callin should be read before global_nwo
721 */ 723 */
722 nwo = atomic_read(&global_nwo); 724 smp_rmb();
723 725
724 /* 726 if (order == 1) {
725 * Monarch starts executing now, the others wait. 727 /*
726 */ 728 * Monarch: Starts executing now, the others wait.
727 if (*order == 1) { 729 */
728 atomic_set(&mce_executing, 1); 730 atomic_set(&mce_executing, 1);
729 return nwo; 731 } else {
732 /*
733 * Subject: Now start the scanning loop one by one in
734 * the original callin order.
735 * This way when there are any shared banks it will be
736 * only seen by one CPU before cleared, avoiding duplicates.
737 */
738 while (atomic_read(&mce_executing) < order) {
739 if (mce_timed_out(&timeout)) {
740 atomic_set(&global_nwo, 0);
741 return -1;
742 }
743 ndelay(SPINUNIT);
744 }
730 } 745 }
731 746
732 /* 747 /*
733 * Now start the scanning loop one by one 748 * Cache the global no_way_out state.
734 * in the original callin order.
735 * This way when there are any shared banks it will
736 * be only seen by one CPU before cleared, avoiding duplicates.
737 */ 749 */
738 while (atomic_read(&mce_executing) < *order) { 750 *no_way_out = atomic_read(&global_nwo);
739 if (mce_timed_out(&timeout)) { 751
740 atomic_set(&global_nwo, 0); 752 return order;
741 *order = -1;
742 return no_way_out;
743 }
744 ndelay(SPINUNIT);
745 }
746 return nwo;
747} 753}
748 754
749/* 755/*
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
863 * check handler. 869 * check handler.
864 */ 870 */
865 int order; 871 int order;
866
867 /* 872 /*
868 * If no_way_out gets set, there is no safe way to recover from this 873 * If no_way_out gets set, there is no safe way to recover from this
869 * MCE. If tolerant is cranked up, we'll try anyway. 874 * MCE. If tolerant is cranked up, we'll try anyway.
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
887 if (!banks) 892 if (!banks)
888 goto out; 893 goto out;
889 894
890 order = atomic_add_return(1, &mce_callin);
891 mce_setup(&m); 895 mce_setup(&m);
892 896
893 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 897 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
909 * This way we don't report duplicated events on shared banks 913 * This way we don't report duplicated events on shared banks
910 * because the first one to see it will clear it. 914 * because the first one to see it will clear it.
911 */ 915 */
912 no_way_out = mce_start(no_way_out, &order); 916 order = mce_start(&no_way_out);
913 for (i = 0; i < banks; i++) { 917 for (i = 0; i < banks; i++) {
914 __clear_bit(i, toclear); 918 __clear_bit(i, toclear);
915 if (!bank[i]) 919 if (!bank[i])
@@ -1113,12 +1117,12 @@ static void mcheck_timer(unsigned long data)
1113 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1117 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
1114 1118
1115 t->expires = jiffies + *n; 1119 t->expires = jiffies + *n;
1116 add_timer(t); 1120 add_timer_on(t, smp_processor_id());
1117} 1121}
1118 1122
1119static void mce_do_trigger(struct work_struct *work) 1123static void mce_do_trigger(struct work_struct *work)
1120{ 1124{
1121 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); 1125 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
1122} 1126}
1123 1127
1124static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 1128static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
1135 1139
1136 clear_thread_flag(TIF_MCE_NOTIFY); 1140 clear_thread_flag(TIF_MCE_NOTIFY);
1137 1141
1138 if (test_and_clear_bit(0, &notify_user)) { 1142 if (test_and_clear_bit(0, &mce_need_notify)) {
1139 wake_up_interruptible(&mce_wait); 1143 wake_up_interruptible(&mce_wait);
1140 1144
1141 /* 1145 /*
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
1143 * work_pending is always cleared before the function is 1147 * work_pending is always cleared before the function is
1144 * executed. 1148 * executed.
1145 */ 1149 */
1146 if (trigger[0] && !work_pending(&mce_trigger_work)) 1150 if (mce_helper[0] && !work_pending(&mce_trigger_work))
1147 schedule_work(&mce_trigger_work); 1151 schedule_work(&mce_trigger_work);
1148 1152
1149 if (__ratelimit(&ratelimit)) 1153 if (__ratelimit(&ratelimit))
@@ -1222,8 +1226,13 @@ static void mce_init(void)
1222} 1226}
1223 1227
1224/* Add per CPU specific workarounds here */ 1228/* Add per CPU specific workarounds here */
1225static void mce_cpu_quirks(struct cpuinfo_x86 *c) 1229static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1226{ 1230{
1231 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1232 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
1233 return -EOPNOTSUPP;
1234 }
1235
1227 /* This should be disabled by the BIOS, but isn't always */ 1236 /* This should be disabled by the BIOS, but isn't always */
1228 if (c->x86_vendor == X86_VENDOR_AMD) { 1237 if (c->x86_vendor == X86_VENDOR_AMD) {
1229 if (c->x86 == 15 && banks > 4) { 1238 if (c->x86 == 15 && banks > 4) {
@@ -1245,7 +1254,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1245 * Various K7s with broken bank 0 around. Always disable 1254 * Various K7s with broken bank 0 around. Always disable
1246 * by default. 1255 * by default.
1247 */ 1256 */
1248 if (c->x86 == 6) 1257 if (c->x86 == 6 && banks > 0)
1249 bank[0] = 0; 1258 bank[0] = 0;
1250 } 1259 }
1251 1260
@@ -1269,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1269 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && 1278 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1270 monarch_timeout < 0) 1279 monarch_timeout < 0)
1271 monarch_timeout = USEC_PER_SEC; 1280 monarch_timeout = USEC_PER_SEC;
1281
1282 /*
1283 * There are also broken BIOSes on some Pentium M and
1284 * earlier systems:
1285 */
1286 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
1287 mce_bootlog = 0;
1272 } 1288 }
1273 if (monarch_timeout < 0) 1289 if (monarch_timeout < 0)
1274 monarch_timeout = 0; 1290 monarch_timeout = 0;
1275 if (mce_bootlog != 0) 1291 if (mce_bootlog != 0)
1276 mce_panic_timeout = 30; 1292 mce_panic_timeout = 30;
1293
1294 return 0;
1277} 1295}
1278 1296
1279static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) 1297static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
@@ -1282,8 +1300,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
1282 return; 1300 return;
1283 switch (c->x86_vendor) { 1301 switch (c->x86_vendor) {
1284 case X86_VENDOR_INTEL: 1302 case X86_VENDOR_INTEL:
1285 if (mce_p5_enabled()) 1303 intel_p5_mcheck_init(c);
1286 intel_p5_mcheck_init(c);
1287 break; 1304 break;
1288 case X86_VENDOR_CENTAUR: 1305 case X86_VENDOR_CENTAUR:
1289 winchip_mcheck_init(c); 1306 winchip_mcheck_init(c);
@@ -1318,7 +1335,7 @@ static void mce_init_timer(void)
1318 return; 1335 return;
1319 setup_timer(t, mcheck_timer, smp_processor_id()); 1336 setup_timer(t, mcheck_timer, smp_processor_id());
1320 t->expires = round_jiffies(jiffies + *n); 1337 t->expires = round_jiffies(jiffies + *n);
1321 add_timer(t); 1338 add_timer_on(t, smp_processor_id());
1322} 1339}
1323 1340
1324/* 1341/*
@@ -1335,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1335 if (!mce_available(c)) 1352 if (!mce_available(c))
1336 return; 1353 return;
1337 1354
1338 if (mce_cap_init() < 0) { 1355 if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
1339 mce_disabled = 1; 1356 mce_disabled = 1;
1340 return; 1357 return;
1341 } 1358 }
1342 mce_cpu_quirks(c);
1343 1359
1344 machine_check_vector = do_machine_check; 1360 machine_check_vector = do_machine_check;
1345 1361
@@ -1609,8 +1625,9 @@ static int mce_resume(struct sys_device *dev)
1609static void mce_cpu_restart(void *data) 1625static void mce_cpu_restart(void *data)
1610{ 1626{
1611 del_timer_sync(&__get_cpu_var(mce_timer)); 1627 del_timer_sync(&__get_cpu_var(mce_timer));
1612 if (mce_available(&current_cpu_data)) 1628 if (!mce_available(&current_cpu_data))
1613 mce_init(); 1629 return;
1630 mce_init();
1614 mce_init_timer(); 1631 mce_init_timer();
1615} 1632}
1616 1633
@@ -1620,6 +1637,26 @@ static void mce_restart(void)
1620 on_each_cpu(mce_cpu_restart, NULL, 1); 1637 on_each_cpu(mce_cpu_restart, NULL, 1);
1621} 1638}
1622 1639
1640/* Toggle features for corrected errors */
1641static void mce_disable_ce(void *all)
1642{
1643 if (!mce_available(&current_cpu_data))
1644 return;
1645 if (all)
1646 del_timer_sync(&__get_cpu_var(mce_timer));
1647 cmci_clear();
1648}
1649
1650static void mce_enable_ce(void *all)
1651{
1652 if (!mce_available(&current_cpu_data))
1653 return;
1654 cmci_reenable();
1655 cmci_recheck();
1656 if (all)
1657 mce_init_timer();
1658}
1659
1623static struct sysdev_class mce_sysclass = { 1660static struct sysdev_class mce_sysclass = {
1624 .suspend = mce_suspend, 1661 .suspend = mce_suspend,
1625 .shutdown = mce_shutdown, 1662 .shutdown = mce_shutdown,
@@ -1659,26 +1696,70 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1659static ssize_t 1696static ssize_t
1660show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) 1697show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
1661{ 1698{
1662 strcpy(buf, trigger); 1699 strcpy(buf, mce_helper);
1663 strcat(buf, "\n"); 1700 strcat(buf, "\n");
1664 return strlen(trigger) + 1; 1701 return strlen(mce_helper) + 1;
1665} 1702}
1666 1703
1667static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1704static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1668 const char *buf, size_t siz) 1705 const char *buf, size_t siz)
1669{ 1706{
1670 char *p; 1707 char *p;
1671 int len;
1672 1708
1673 strncpy(trigger, buf, sizeof(trigger)); 1709 strncpy(mce_helper, buf, sizeof(mce_helper));
1674 trigger[sizeof(trigger)-1] = 0; 1710 mce_helper[sizeof(mce_helper)-1] = 0;
1675 len = strlen(trigger); 1711 p = strchr(mce_helper, '\n');
1676 p = strchr(trigger, '\n');
1677 1712
1678 if (*p) 1713 if (p)
1679 *p = 0; 1714 *p = 0;
1680 1715
1681 return len; 1716 return strlen(mce_helper) + !!p;
1717}
1718
1719static ssize_t set_ignore_ce(struct sys_device *s,
1720 struct sysdev_attribute *attr,
1721 const char *buf, size_t size)
1722{
1723 u64 new;
1724
1725 if (strict_strtoull(buf, 0, &new) < 0)
1726 return -EINVAL;
1727
1728 if (mce_ignore_ce ^ !!new) {
1729 if (new) {
1730 /* disable ce features */
1731 on_each_cpu(mce_disable_ce, (void *)1, 1);
1732 mce_ignore_ce = 1;
1733 } else {
1734 /* enable ce features */
1735 mce_ignore_ce = 0;
1736 on_each_cpu(mce_enable_ce, (void *)1, 1);
1737 }
1738 }
1739 return size;
1740}
1741
1742static ssize_t set_cmci_disabled(struct sys_device *s,
1743 struct sysdev_attribute *attr,
1744 const char *buf, size_t size)
1745{
1746 u64 new;
1747
1748 if (strict_strtoull(buf, 0, &new) < 0)
1749 return -EINVAL;
1750
1751 if (mce_cmci_disabled ^ !!new) {
1752 if (new) {
1753 /* disable cmci */
1754 on_each_cpu(mce_disable_ce, NULL, 1);
1755 mce_cmci_disabled = 1;
1756 } else {
1757 /* enable cmci */
1758 mce_cmci_disabled = 0;
1759 on_each_cpu(mce_enable_ce, NULL, 1);
1760 }
1761 }
1762 return size;
1682} 1763}
1683 1764
1684static ssize_t store_int_with_restart(struct sys_device *s, 1765static ssize_t store_int_with_restart(struct sys_device *s,
@@ -1693,6 +1774,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
1693static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1774static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
1694static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1775static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
1695static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1776static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
1777static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
1696 1778
1697static struct sysdev_ext_attribute attr_check_interval = { 1779static struct sysdev_ext_attribute attr_check_interval = {
1698 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, 1780 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
@@ -1700,9 +1782,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
1700 &check_interval 1782 &check_interval
1701}; 1783};
1702 1784
1785static struct sysdev_ext_attribute attr_ignore_ce = {
1786 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
1787 &mce_ignore_ce
1788};
1789
1790static struct sysdev_ext_attribute attr_cmci_disabled = {
1791 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
1792 &mce_cmci_disabled
1793};
1794
1703static struct sysdev_attribute *mce_attrs[] = { 1795static struct sysdev_attribute *mce_attrs[] = {
1704 &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, 1796 &attr_tolerant.attr,
1797 &attr_check_interval.attr,
1798 &attr_trigger,
1705 &attr_monarch_timeout.attr, 1799 &attr_monarch_timeout.attr,
1800 &attr_dont_log_ce.attr,
1801 &attr_ignore_ce.attr,
1802 &attr_cmci_disabled.attr,
1706 NULL 1803 NULL
1707}; 1804};
1708 1805
@@ -1712,7 +1809,7 @@ static cpumask_var_t mce_dev_initialized;
1712static __cpuinit int mce_create_device(unsigned int cpu) 1809static __cpuinit int mce_create_device(unsigned int cpu)
1713{ 1810{
1714 int err; 1811 int err;
1715 int i; 1812 int i, j;
1716 1813
1717 if (!mce_available(&boot_cpu_data)) 1814 if (!mce_available(&boot_cpu_data))
1718 return -EIO; 1815 return -EIO;
@@ -1730,9 +1827,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1730 if (err) 1827 if (err)
1731 goto error; 1828 goto error;
1732 } 1829 }
1733 for (i = 0; i < banks; i++) { 1830 for (j = 0; j < banks; j++) {
1734 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1831 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1735 &bank_attrs[i]); 1832 &bank_attrs[j]);
1736 if (err) 1833 if (err)
1737 goto error2; 1834 goto error2;
1738 } 1835 }
@@ -1740,8 +1837,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1740 1837
1741 return 0; 1838 return 0;
1742error2: 1839error2:
1743 while (--i >= 0) 1840 while (--j >= 0)
1744 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1841 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
1745error: 1842error:
1746 while (--i >= 0) 1843 while (--i >= 0)
1747 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1844 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
@@ -1883,7 +1980,7 @@ static __init int mce_init_device(void)
1883 if (!mce_available(&boot_cpu_data)) 1980 if (!mce_available(&boot_cpu_data))
1884 return -EIO; 1981 return -EIO;
1885 1982
1886 alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 1983 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1887 1984
1888 err = mce_init_banks(); 1985 err = mce_init_banks();
1889 if (err) 1986 if (err)
@@ -1915,7 +2012,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
1915/* This has to be run for each processor */ 2012/* This has to be run for each processor */
1916void mcheck_init(struct cpuinfo_x86 *c) 2013void mcheck_init(struct cpuinfo_x86 *c)
1917{ 2014{
1918 if (mce_disabled == 1) 2015 if (mce_disabled)
1919 return; 2016 return;
1920 2017
1921 switch (c->x86_vendor) { 2018 switch (c->x86_vendor) {
@@ -1945,10 +2042,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
1945 2042
1946static int __init mcheck_enable(char *str) 2043static int __init mcheck_enable(char *str)
1947{ 2044{
1948 mce_disabled = -1; 2045 mce_p5_enabled = 1;
1949 return 1; 2046 return 1;
1950} 2047}
1951
1952__setup("mce", mcheck_enable); 2048__setup("mce", mcheck_enable);
1953 2049
1954#endif /* CONFIG_X86_OLD_MCE */ 2050#endif /* CONFIG_X86_OLD_MCE */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
deleted file mode 100644
index 84a552b458c8..000000000000
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ /dev/null
@@ -1,38 +0,0 @@
1#include <linux/init.h>
2#include <asm/mce.h>
3
4#ifdef CONFIG_X86_OLD_MCE
5void amd_mcheck_init(struct cpuinfo_x86 *c);
6void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
7void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
8#endif
9
10#ifdef CONFIG_X86_ANCIENT_MCE
11void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
12void winchip_mcheck_init(struct cpuinfo_x86 *c);
13extern int mce_p5_enable;
14static inline int mce_p5_enabled(void) { return mce_p5_enable; }
15static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
16#else
17static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
18static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
19static inline int mce_p5_enabled(void) { return 0; }
20static inline void enable_p5_mce(void) { }
21#endif
22
23/* Call the installed machine check handler for this CPU setup. */
24extern void (*machine_check_vector)(struct pt_regs *, long error_code);
25
26#ifdef CONFIG_X86_OLD_MCE
27
28extern int nr_mce_banks;
29
30void intel_set_thermal_handler(void);
31
32#else
33
34static inline void intel_set_thermal_handler(void) { }
35
36#endif
37
38void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bda..ddae21620bda 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 2b011d2d8579..e1acec0f7a32 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,74 +1,226 @@
1/* 1/*
2 * Common code for Intel machine checks 2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
3 */ 6 */
4#include <linux/interrupt.h>
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/init.h>
8#include <linux/smp.h>
9 7
10#include <asm/therm_throt.h> 8#include <linux/init.h>
11#include <asm/processor.h> 9#include <linux/interrupt.h>
12#include <asm/system.h> 10#include <linux/percpu.h>
13#include <asm/apic.h> 11#include <asm/apic.h>
12#include <asm/processor.h>
14#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/mce.h>
15
16/*
17 * Support for Intel Correct Machine Check Interrupts. This allows
18 * the CPU to raise an interrupt when a corrected machine check happened.
19 * Normally we pick those up using a regular polling timer.
20 * Also supports reliable discovery of shared banks.
21 */
15 22
16#include "mce.h" 23static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
17 24
18void intel_init_thermal(struct cpuinfo_x86 *c) 25/*
26 * cmci_discover_lock protects against parallel discovery attempts
27 * which could race against each other.
28 */
29static DEFINE_SPINLOCK(cmci_discover_lock);
30
31#define CMCI_THRESHOLD 1
32
33static int cmci_supported(int *banks)
19{ 34{
20 unsigned int cpu = smp_processor_id(); 35 u64 cap;
21 int tm2 = 0;
22 u32 l, h;
23 36
24 /* Thermal monitoring depends on ACPI and clock modulation*/ 37 if (mce_cmci_disabled || mce_ignore_ce)
25 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) 38 return 0;
26 return;
27 39
28 /* 40 /*
29 * First check if its enabled already, in which case there might 41 * Vendor check is not strictly needed, but the initial
30 * be some SMM goo which handles it, so we can't even put a handler 42 * initialization is vendor keyed and this
31 * since it might be delivered via SMI already: 43 * makes sure none of the backdoors are entered otherwise.
32 */ 44 */
33 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 45 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
34 h = apic_read(APIC_LVTTHMR); 46 return 0;
35 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 47 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
36 printk(KERN_DEBUG 48 return 0;
37 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 49 rdmsrl(MSR_IA32_MCG_CAP, cap);
38 return; 50 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
51 return !!(cap & MCG_CMCI_P);
52}
53
54/*
55 * The interrupt handler. This is called on every event.
56 * Just call the poller directly to log any events.
57 * This could in theory increase the threshold under high load,
58 * but doesn't for now.
59 */
60static void intel_threshold_interrupt(void)
61{
62 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
63 mce_notify_irq();
64}
65
66static void print_update(char *type, int *hdr, int num)
67{
68 if (*hdr == 0)
69 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
70 *hdr = 1;
71 printk(KERN_CONT " %s:%d", type, num);
72}
73
74/*
75 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
76 * on this CPU. Use the algorithm recommended in the SDM to discover shared
77 * banks.
78 */
79static void cmci_discover(int banks, int boot)
80{
81 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
82 unsigned long flags;
83 int hdr = 0;
84 int i;
85
86 spin_lock_irqsave(&cmci_discover_lock, flags);
87 for (i = 0; i < banks; i++) {
88 u64 val;
89
90 if (test_bit(i, owned))
91 continue;
92
93 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
94
95 /* Already owned by someone else? */
96 if (val & CMCI_EN) {
97 if (test_and_clear_bit(i, owned) || boot)
98 print_update("SHD", &hdr, i);
99 __clear_bit(i, __get_cpu_var(mce_poll_banks));
100 continue;
101 }
102
103 val |= CMCI_EN | CMCI_THRESHOLD;
104 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
105 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
106
107 /* Did the enable bit stick? -- the bank supports CMCI */
108 if (val & CMCI_EN) {
109 if (!test_and_set_bit(i, owned) || boot)
110 print_update("CMCI", &hdr, i);
111 __clear_bit(i, __get_cpu_var(mce_poll_banks));
112 } else {
113 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
114 }
39 } 115 }
116 spin_unlock_irqrestore(&cmci_discover_lock, flags);
117 if (hdr)
118 printk(KERN_CONT "\n");
119}
120
121/*
122 * Just in case we missed an event during initialization check
123 * all the CMCI owned banks.
124 */
125void cmci_recheck(void)
126{
127 unsigned long flags;
128 int banks;
129
130 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
131 return;
132 local_irq_save(flags);
133 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
134 local_irq_restore(flags);
135}
40 136
41 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) 137/*
42 tm2 = 1; 138 * Disable CMCI on this CPU for all banks it owns when it goes down.
139 * This allows other CPUs to claim the banks on rediscovery.
140 */
141void cmci_clear(void)
142{
143 unsigned long flags;
144 int i;
145 int banks;
146 u64 val;
43 147
44 /* Check whether a vector already exists */ 148 if (!cmci_supported(&banks))
45 if (h & APIC_VECTOR_MASK) {
46 printk(KERN_DEBUG
47 "CPU%d: Thermal LVT vector (%#x) already installed\n",
48 cpu, (h & APIC_VECTOR_MASK));
49 return; 149 return;
150 spin_lock_irqsave(&cmci_discover_lock, flags);
151 for (i = 0; i < banks; i++) {
152 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
153 continue;
154 /* Disable CMCI */
155 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
157 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
158 __clear_bit(i, __get_cpu_var(mce_banks_owned));
50 } 159 }
160 spin_unlock_irqrestore(&cmci_discover_lock, flags);
161}
162
163/*
164 * After a CPU went down cycle through all the others and rediscover
165 * Must run in process context.
166 */
167void cmci_rediscover(int dying)
168{
169 int banks;
170 int cpu;
171 cpumask_var_t old;
172
173 if (!cmci_supported(&banks))
174 return;
175 if (!alloc_cpumask_var(&old, GFP_KERNEL))
176 return;
177 cpumask_copy(old, &current->cpus_allowed);
51 178
52 /* We'll mask the thermal vector in the lapic till we're ready: */ 179 for_each_online_cpu(cpu) {
53 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 180 if (cpu == dying)
54 apic_write(APIC_LVTTHMR, h); 181 continue;
182 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
183 continue;
184 /* Recheck banks in case CPUs don't all have the same */
185 if (cmci_supported(&banks))
186 cmci_discover(banks, 0);
187 }
55 188
56 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 189 set_cpus_allowed_ptr(current, old);
57 wrmsr(MSR_IA32_THERM_INTERRUPT, 190 free_cpumask_var(old);
58 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); 191}
59 192
60 intel_set_thermal_handler(); 193/*
194 * Reenable CMCI on this CPU in case a CPU down failed.
195 */
196void cmci_reenable(void)
197{
198 int banks;
199 if (cmci_supported(&banks))
200 cmci_discover(banks, 0);
201}
61 202
62 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 203static void intel_init_cmci(void)
63 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); 204{
205 int banks;
64 206
65 /* Unmask the thermal vector: */ 207 if (!cmci_supported(&banks))
66 l = apic_read(APIC_LVTTHMR); 208 return;
67 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
68 209
69 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 210 mce_threshold_vector = intel_threshold_interrupt;
70 cpu, tm2 ? "TM2" : "TM1"); 211 cmci_discover(banks, 1);
212 /*
213 * For CPU #0 this runs with still disabled APIC, but that's
214 * ok because only the vector is set up. We still do another
215 * check for the banks later for CPU #0 just to make sure
216 * to not miss any events.
217 */
218 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
219 cmci_recheck();
220}
71 221
72 /* enable thermal throttle processing */ 222void mce_intel_feature_init(struct cpuinfo_x86 *c)
73 atomic_set(&therm_throt_en, 1); 223{
224 intel_init_thermal(c);
225 intel_init_cmci();
74} 226}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
deleted file mode 100644
index f2ef6952c400..000000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ /dev/null
@@ -1,248 +0,0 @@
1/*
2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
6 */
7
8#include <linux/init.h>
9#include <linux/interrupt.h>
10#include <linux/percpu.h>
11#include <asm/processor.h>
12#include <asm/apic.h>
13#include <asm/msr.h>
14#include <asm/mce.h>
15#include <asm/hw_irq.h>
16#include <asm/idle.h>
17#include <asm/therm_throt.h>
18
19#include "mce.h"
20
21asmlinkage void smp_thermal_interrupt(void)
22{
23 __u64 msr_val;
24
25 ack_APIC_irq();
26
27 exit_idle();
28 irq_enter();
29
30 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
31 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
32 mce_log_therm_throt_event(msr_val);
33
34 inc_irq_stat(irq_thermal_count);
35 irq_exit();
36}
37
38/*
39 * Support for Intel Correct Machine Check Interrupts. This allows
40 * the CPU to raise an interrupt when a corrected machine check happened.
41 * Normally we pick those up using a regular polling timer.
42 * Also supports reliable discovery of shared banks.
43 */
44
45static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
46
47/*
48 * cmci_discover_lock protects against parallel discovery attempts
49 * which could race against each other.
50 */
51static DEFINE_SPINLOCK(cmci_discover_lock);
52
53#define CMCI_THRESHOLD 1
54
55static int cmci_supported(int *banks)
56{
57 u64 cap;
58
59 if (mce_cmci_disabled || mce_ignore_ce)
60 return 0;
61
62 /*
63 * Vendor check is not strictly needed, but the initial
64 * initialization is vendor keyed and this
65 * makes sure none of the backdoors are entered otherwise.
66 */
67 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
68 return 0;
69 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
70 return 0;
71 rdmsrl(MSR_IA32_MCG_CAP, cap);
72 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
73 return !!(cap & MCG_CMCI_P);
74}
75
76/*
77 * The interrupt handler. This is called on every event.
78 * Just call the poller directly to log any events.
79 * This could in theory increase the threshold under high load,
80 * but doesn't for now.
81 */
82static void intel_threshold_interrupt(void)
83{
84 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
85 mce_notify_irq();
86}
87
88static void print_update(char *type, int *hdr, int num)
89{
90 if (*hdr == 0)
91 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
92 *hdr = 1;
93 printk(KERN_CONT " %s:%d", type, num);
94}
95
96/*
97 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
98 * on this CPU. Use the algorithm recommended in the SDM to discover shared
99 * banks.
100 */
101static void cmci_discover(int banks, int boot)
102{
103 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
104 unsigned long flags;
105 int hdr = 0;
106 int i;
107
108 spin_lock_irqsave(&cmci_discover_lock, flags);
109 for (i = 0; i < banks; i++) {
110 u64 val;
111
112 if (test_bit(i, owned))
113 continue;
114
115 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
116
117 /* Already owned by someone else? */
118 if (val & CMCI_EN) {
119 if (test_and_clear_bit(i, owned) || boot)
120 print_update("SHD", &hdr, i);
121 __clear_bit(i, __get_cpu_var(mce_poll_banks));
122 continue;
123 }
124
125 val |= CMCI_EN | CMCI_THRESHOLD;
126 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
127 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
128
129 /* Did the enable bit stick? -- the bank supports CMCI */
130 if (val & CMCI_EN) {
131 if (!test_and_set_bit(i, owned) || boot)
132 print_update("CMCI", &hdr, i);
133 __clear_bit(i, __get_cpu_var(mce_poll_banks));
134 } else {
135 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
136 }
137 }
138 spin_unlock_irqrestore(&cmci_discover_lock, flags);
139 if (hdr)
140 printk(KERN_CONT "\n");
141}
142
143/*
144 * Just in case we missed an event during initialization check
145 * all the CMCI owned banks.
146 */
147void cmci_recheck(void)
148{
149 unsigned long flags;
150 int banks;
151
152 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
153 return;
154 local_irq_save(flags);
155 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
156 local_irq_restore(flags);
157}
158
159/*
160 * Disable CMCI on this CPU for all banks it owns when it goes down.
161 * This allows other CPUs to claim the banks on rediscovery.
162 */
163void cmci_clear(void)
164{
165 unsigned long flags;
166 int i;
167 int banks;
168 u64 val;
169
170 if (!cmci_supported(&banks))
171 return;
172 spin_lock_irqsave(&cmci_discover_lock, flags);
173 for (i = 0; i < banks; i++) {
174 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
175 continue;
176 /* Disable CMCI */
177 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
178 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
179 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
180 __clear_bit(i, __get_cpu_var(mce_banks_owned));
181 }
182 spin_unlock_irqrestore(&cmci_discover_lock, flags);
183}
184
185/*
186 * After a CPU went down cycle through all the others and rediscover
187 * Must run in process context.
188 */
189void cmci_rediscover(int dying)
190{
191 int banks;
192 int cpu;
193 cpumask_var_t old;
194
195 if (!cmci_supported(&banks))
196 return;
197 if (!alloc_cpumask_var(&old, GFP_KERNEL))
198 return;
199 cpumask_copy(old, &current->cpus_allowed);
200
201 for_each_online_cpu(cpu) {
202 if (cpu == dying)
203 continue;
204 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
205 continue;
206 /* Recheck banks in case CPUs don't all have the same */
207 if (cmci_supported(&banks))
208 cmci_discover(banks, 0);
209 }
210
211 set_cpus_allowed_ptr(current, old);
212 free_cpumask_var(old);
213}
214
215/*
216 * Reenable CMCI on this CPU in case a CPU down failed.
217 */
218void cmci_reenable(void)
219{
220 int banks;
221 if (cmci_supported(&banks))
222 cmci_discover(banks, 0);
223}
224
225static void intel_init_cmci(void)
226{
227 int banks;
228
229 if (!cmci_supported(&banks))
230 return;
231
232 mce_threshold_vector = intel_threshold_interrupt;
233 cmci_discover(banks, 1);
234 /*
235 * For CPU #0 this runs with still disabled APIC, but that's
236 * ok because only the vector is set up. We still do another
237 * check for the banks later for CPU #0 just to make sure
238 * to not miss any events.
239 */
240 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
241 cmci_recheck();
242}
243
244void mce_intel_feature_init(struct cpuinfo_x86 *c)
245{
246 intel_init_thermal(c);
247 intel_init_cmci();
248}
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 70b710420f74..f5f2d6f71fb6 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -17,10 +17,9 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/mce.h>
20#include <asm/msr.h> 21#include <asm/msr.h>
21 22
22#include "mce.h"
23
24static int firstbank; 23static int firstbank;
25 24
26#define MCE_RATE (15*HZ) /* timer rate is 15s */ 25#define MCE_RATE (15*HZ) /* timer rate is 15s */
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 82cee108a2d3..4482aea9aa2e 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -1,21 +1,15 @@
1/* 1/*
2 * P4 specific Machine Check Exception Reporting 2 * P4 specific Machine Check Exception Reporting
3 */ 3 */
4
5#include <linux/interrupt.h>
6#include <linux/kernel.h> 4#include <linux/kernel.h>
7#include <linux/types.h> 5#include <linux/types.h>
8#include <linux/init.h> 6#include <linux/init.h>
9#include <linux/smp.h> 7#include <linux/smp.h>
10 8
11#include <asm/therm_throt.h>
12#include <asm/processor.h> 9#include <asm/processor.h>
13#include <asm/system.h> 10#include <asm/mce.h>
14#include <asm/apic.h>
15#include <asm/msr.h> 11#include <asm/msr.h>
16 12
17#include "mce.h"
18
19/* as supported by the P4/Xeon family */ 13/* as supported by the P4/Xeon family */
20struct intel_mce_extended_msrs { 14struct intel_mce_extended_msrs {
21 u32 eax; 15 u32 eax;
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
33 27
34static int mce_num_extended_msrs; 28static int mce_num_extended_msrs;
35 29
36
37#ifdef CONFIG_X86_MCE_P4THERMAL
38
39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK);
44}
45
46/* P4/Xeon Thermal transition interrupt handler: */
47static void intel_thermal_interrupt(struct pt_regs *regs)
48{
49 __u64 msr_val;
50
51 ack_APIC_irq();
52
53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
55}
56
57/* Thermal interrupt handler for this CPU setup: */
58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
59 unexpected_thermal_interrupt;
60
61void smp_thermal_interrupt(struct pt_regs *regs)
62{
63 irq_enter();
64 vendor_thermal_interrupt(regs);
65 __get_cpu_var(irq_stat).irq_thermal_count++;
66 irq_exit();
67}
68
69void intel_set_thermal_handler(void)
70{
71 vendor_thermal_interrupt = intel_thermal_interrupt;
72}
73
74#endif /* CONFIG_X86_MCE_P4THERMAL */
75
76/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ 30/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
77static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) 31static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
78{ 32{
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 015f481ab1b0..5c0e6533d9bc 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -10,12 +10,11 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* By default disabled */ 16/* By default disabled */
18int mce_p5_enable; 17int mce_p5_enabled __read_mostly;
19 18
20/* Machine check handler for Pentium class Intel CPUs: */ 19/* Machine check handler for Pentium class Intel CPUs: */
21static void pentium_machine_check(struct pt_regs *regs, long error_code) 20static void pentium_machine_check(struct pt_regs *regs, long error_code)
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
43{ 42{
44 u32 l, h; 43 u32 l, h;
45 44
46 /* Check for MCE support: */ 45 /* Default P5 to off as its often misconnected: */
47 if (!cpu_has(c, X86_FEATURE_MCE)) 46 if (!mce_p5_enabled)
48 return; 47 return;
49 48
50#ifdef CONFIG_X86_OLD_MCE 49 /* Check for MCE support: */
51 /* Default P5 to off as its often misconnected: */ 50 if (!cpu_has(c, X86_FEATURE_MCE))
52 if (mce_disabled != -1)
53 return; 51 return;
54#endif
55 52
56 machine_check_vector = pentium_machine_check; 53 machine_check_vector = pentium_machine_check;
57 /* Make sure the vector pointer is visible before we enable MCEs: */ 54 /* Make sure the vector pointer is visible before we enable MCEs: */
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 43c24e667457..01e4f8178183 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -10,10 +10,9 @@
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/mce.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14 15
15#include "mce.h"
16
17/* Machine Check Handler For PII/PIII */ 16/* Machine Check Handler For PII/PIII */
18static void intel_machine_check(struct pt_regs *regs, long error_code) 17static void intel_machine_check(struct pt_regs *regs, long error_code)
19{ 18{
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 7b1ae2e20ba5..5957a93e5173 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -13,21 +13,32 @@
13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 13 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14 * Inspired by Ross Biro's and Al Borchers' counter code. 14 * Inspired by Ross Biro's and Al Borchers' counter code.
15 */ 15 */
16#include <linux/interrupt.h>
16#include <linux/notifier.h> 17#include <linux/notifier.h>
17#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/kernel.h>
18#include <linux/percpu.h> 20#include <linux/percpu.h>
19#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/types.h>
23#include <linux/init.h>
24#include <linux/smp.h>
20#include <linux/cpu.h> 25#include <linux/cpu.h>
21 26
22#include <asm/therm_throt.h> 27#include <asm/processor.h>
28#include <asm/system.h>
29#include <asm/apic.h>
30#include <asm/idle.h>
31#include <asm/mce.h>
32#include <asm/msr.h>
23 33
24/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
25#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
26 36
27static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
28static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
29 40
30atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
31 42
32#ifdef CONFIG_SYSFS 43#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \ 44#define define_therm_throt_sysdev_one_ro(_name) \
@@ -82,31 +93,37 @@ static struct attribute_group thermal_throttle_attr_group = {
82 * 1 : Event should be logged further, and a message has been 93 * 1 : Event should be logged further, and a message has been
83 * printed to the syslog. 94 * printed to the syslog.
84 */ 95 */
85int therm_throt_process(int curr) 96static int therm_throt_process(int curr)
86{ 97{
87 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
88 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
89 102
90 if (curr) 103 if (is_throttled)
91 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
92 105
93 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
94 return 0; 108 return 0;
95 109
96 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
97 111
98 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
99 if (curr) { 113 if (is_throttled) {
100 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
101 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
102 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
103 117
104 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
105 } else { 119 return 1;
106 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 }
121 if (was_throttled) {
122 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
123 return 1;
107 } 124 }
108 125
109 return 1; 126 return 0;
110} 127}
111 128
112#ifdef CONFIG_SYSFS 129#ifdef CONFIG_SYSFS
@@ -186,6 +203,94 @@ static __init int thermal_throttle_init_device(void)
186 203
187 return 0; 204 return 0;
188} 205}
189
190device_initcall(thermal_throttle_init_device); 206device_initcall(thermal_throttle_init_device);
207
191#endif /* CONFIG_SYSFS */ 208#endif /* CONFIG_SYSFS */
209
210/* Thermal transition interrupt handler */
211static void intel_thermal_interrupt(void)
212{
213 __u64 msr_val;
214
215 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
216 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
217 mce_log_therm_throt_event(msr_val);
218}
219
220static void unexpected_thermal_interrupt(void)
221{
222 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
223 smp_processor_id());
224 add_taint(TAINT_MACHINE_CHECK);
225}
226
227static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
228
229asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
230{
231 exit_idle();
232 irq_enter();
233 inc_irq_stat(irq_thermal_count);
234 smp_thermal_vector();
235 irq_exit();
236 /* Ack only at the end to avoid potential reentry */
237 ack_APIC_irq();
238}
239
240void intel_init_thermal(struct cpuinfo_x86 *c)
241{
242 unsigned int cpu = smp_processor_id();
243 int tm2 = 0;
244 u32 l, h;
245
246 /* Thermal monitoring depends on ACPI and clock modulation*/
247 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
248 return;
249
250 /*
251 * First check if its enabled already, in which case there might
252 * be some SMM goo which handles it, so we can't even put a handler
253 * since it might be delivered via SMI already:
254 */
255 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
256 h = apic_read(APIC_LVTTHMR);
257 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
258 printk(KERN_DEBUG
259 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
260 return;
261 }
262
263 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
264 tm2 = 1;
265
266 /* Check whether a vector already exists */
267 if (h & APIC_VECTOR_MASK) {
268 printk(KERN_DEBUG
269 "CPU%d: Thermal LVT vector (%#x) already installed\n",
270 cpu, (h & APIC_VECTOR_MASK));
271 return;
272 }
273
274 /* We'll mask the thermal vector in the lapic till we're ready: */
275 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
276 apic_write(APIC_LVTTHMR, h);
277
278 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
279 wrmsr(MSR_IA32_THERM_INTERRUPT,
280 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
281
282 smp_thermal_vector = intel_thermal_interrupt;
283
284 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
285 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
286
287 /* Unmask the thermal vector: */
288 l = apic_read(APIC_LVTTHMR);
289 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
290
291 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
292 cpu, tm2 ? "TM2" : "TM1");
293
294 /* enable thermal throttle processing */
295 atomic_set(&therm_throt_en, 1);
296}
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 81b02487090b..54060f565974 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -9,10 +9,9 @@
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
14#include "mce.h"
15
16/* Machine check handler for WinChip C6: */ 15/* Machine check handler for WinChip C6: */
17static void winchip_machine_check(struct pt_regs *regs, long error_code) 16static void winchip_machine_check(struct pt_regs *regs, long error_code)
18{ 17{
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 275bc142cd5d..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -19,6 +19,7 @@
19#include <linux/kdebug.h> 19#include <linux/kdebug.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/highmem.h>
22 23
23#include <asm/apic.h> 24#include <asm/apic.h>
24#include <asm/stacktrace.h> 25#include <asm/stacktrace.h>
@@ -54,6 +55,7 @@ struct x86_pmu {
54 int num_counters_fixed; 55 int num_counters_fixed;
55 int counter_bits; 56 int counter_bits;
56 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
57 u64 max_period; 59 u64 max_period;
58 u64 intel_ctrl; 60 u64 intel_ctrl;
59}; 61};
@@ -65,6 +67,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
65}; 67};
66 68
67/* 69/*
70 * Not sure about some of these
71 */
72static const u64 p6_perfmon_event_map[] =
73{
74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
81};
82
83static u64 p6_pmu_event_map(int event)
84{
85 return p6_perfmon_event_map[event];
86}
87
88/*
89 * Counter setting that is specified not to count anything.
90 * We use this to effectively disable a counter.
91 *
92 * L2_RQSTS with 0 MESI unit mask.
93 */
94#define P6_NOP_COUNTER 0x0000002EULL
95
96static u64 p6_pmu_raw_event(u64 event)
97{
98#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
99#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
100#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
101#define P6_EVNTSEL_INV_MASK 0x00800000ULL
102#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
103
104#define P6_EVNTSEL_MASK \
105 (P6_EVNTSEL_EVENT_MASK | \
106 P6_EVNTSEL_UNIT_MASK | \
107 P6_EVNTSEL_EDGE_MASK | \
108 P6_EVNTSEL_INV_MASK | \
109 P6_EVNTSEL_COUNTER_MASK)
110
111 return event & P6_EVNTSEL_MASK;
112}
113
114
115/*
68 * Intel PerfMon v3. Used on Core2 and later. 116 * Intel PerfMon v3. Used on Core2 and later.
69 */ 117 */
70static const u64 intel_perfmon_event_map[] = 118static const u64 intel_perfmon_event_map[] =
@@ -389,23 +437,23 @@ static u64 intel_pmu_raw_event(u64 event)
389 return event & CORE_EVNTSEL_MASK; 437 return event & CORE_EVNTSEL_MASK;
390} 438}
391 439
392static const u64 amd_0f_hw_cache_event_ids 440static const u64 amd_hw_cache_event_ids
393 [PERF_COUNT_HW_CACHE_MAX] 441 [PERF_COUNT_HW_CACHE_MAX]
394 [PERF_COUNT_HW_CACHE_OP_MAX] 442 [PERF_COUNT_HW_CACHE_OP_MAX]
395 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 443 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
396{ 444{
397 [ C(L1D) ] = { 445 [ C(L1D) ] = {
398 [ C(OP_READ) ] = { 446 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0, 447 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
400 [ C(RESULT_MISS) ] = 0, 448 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
401 }, 449 },
402 [ C(OP_WRITE) ] = { 450 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0, 451 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
404 [ C(RESULT_MISS) ] = 0, 452 [ C(RESULT_MISS) ] = 0,
405 }, 453 },
406 [ C(OP_PREFETCH) ] = { 454 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0, 455 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
408 [ C(RESULT_MISS) ] = 0, 456 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
409 }, 457 },
410 }, 458 },
411 [ C(L1I ) ] = { 459 [ C(L1I ) ] = {
@@ -418,17 +466,17 @@ static const u64 amd_0f_hw_cache_event_ids
418 [ C(RESULT_MISS) ] = -1, 466 [ C(RESULT_MISS) ] = -1,
419 }, 467 },
420 [ C(OP_PREFETCH) ] = { 468 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0, 469 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
422 [ C(RESULT_MISS) ] = 0, 470 [ C(RESULT_MISS) ] = 0,
423 }, 471 },
424 }, 472 },
425 [ C(LL ) ] = { 473 [ C(LL ) ] = {
426 [ C(OP_READ) ] = { 474 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0, 475 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
428 [ C(RESULT_MISS) ] = 0, 476 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
429 }, 477 },
430 [ C(OP_WRITE) ] = { 478 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = 0, 479 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
432 [ C(RESULT_MISS) ] = 0, 480 [ C(RESULT_MISS) ] = 0,
433 }, 481 },
434 [ C(OP_PREFETCH) ] = { 482 [ C(OP_PREFETCH) ] = {
@@ -438,8 +486,8 @@ static const u64 amd_0f_hw_cache_event_ids
438 }, 486 },
439 [ C(DTLB) ] = { 487 [ C(DTLB) ] = {
440 [ C(OP_READ) ] = { 488 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0, 489 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
442 [ C(RESULT_MISS) ] = 0, 490 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
443 }, 491 },
444 [ C(OP_WRITE) ] = { 492 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = 0, 493 [ C(RESULT_ACCESS) ] = 0,
@@ -566,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
566 614
567static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
568{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
569 int i; 618 int i;
570 619
571 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -580,9 +629,11 @@ static bool reserve_pmc_hardware(void)
580 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
581 goto eventsel_fail; 630 goto eventsel_fail;
582 } 631 }
632#endif
583 633
584 return true; 634 return true;
585 635
636#ifdef CONFIG_X86_LOCAL_APIC
586eventsel_fail: 637eventsel_fail:
587 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
588 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -597,10 +648,12 @@ perfctr_fail:
597 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
598 649
599 return false; 650 return false;
651#endif
600} 652}
601 653
602static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
603{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
604 int i; 657 int i;
605 658
606 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -610,6 +663,7 @@ static void release_pmc_hardware(void)
610 663
611 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
612 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
613} 667}
614 668
615static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -665,6 +719,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
665{ 719{
666 struct perf_counter_attr *attr = &counter->attr; 720 struct perf_counter_attr *attr = &counter->attr;
667 struct hw_perf_counter *hwc = &counter->hw; 721 struct hw_perf_counter *hwc = &counter->hw;
722 u64 config;
668 int err; 723 int err;
669 724
670 if (!x86_pmu_initialized()) 725 if (!x86_pmu_initialized())
@@ -700,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
700 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
701 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
702 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
703 } 767 }
704 768
705 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -717,14 +781,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
717 781
718 if (attr->config >= x86_pmu.max_events) 782 if (attr->config >= x86_pmu.max_events)
719 return -EINVAL; 783 return -EINVAL;
784
720 /* 785 /*
721 * The generic map: 786 * The generic map:
722 */ 787 */
723 hwc->config |= x86_pmu.event_map(attr->config); 788 config = x86_pmu.event_map(attr->config);
789
790 if (config == 0)
791 return -ENOENT;
792
793 if (config == -1LL)
794 return -EINVAL;
795
796 hwc->config |= config;
724 797
725 return 0; 798 return 0;
726} 799}
727 800
801static void p6_pmu_disable_all(void)
802{
803 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
804 u64 val;
805
806 if (!cpuc->enabled)
807 return;
808
809 cpuc->enabled = 0;
810 barrier();
811
812 /* p6 only has one enable register */
813 rdmsrl(MSR_P6_EVNTSEL0, val);
814 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
815 wrmsrl(MSR_P6_EVNTSEL0, val);
816}
817
728static void intel_pmu_disable_all(void) 818static void intel_pmu_disable_all(void)
729{ 819{
730 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 820 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -766,6 +856,23 @@ void hw_perf_disable(void)
766 return x86_pmu.disable_all(); 856 return x86_pmu.disable_all();
767} 857}
768 858
859static void p6_pmu_enable_all(void)
860{
861 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
862 unsigned long val;
863
864 if (cpuc->enabled)
865 return;
866
867 cpuc->enabled = 1;
868 barrier();
869
870 /* p6 only has one enable register */
871 rdmsrl(MSR_P6_EVNTSEL0, val);
872 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
873 wrmsrl(MSR_P6_EVNTSEL0, val);
874}
875
769static void intel_pmu_enable_all(void) 876static void intel_pmu_enable_all(void)
770{ 877{
771 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 878 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -783,13 +890,13 @@ static void amd_pmu_enable_all(void)
783 barrier(); 890 barrier();
784 891
785 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 892 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
893 struct perf_counter *counter = cpuc->counters[idx];
786 u64 val; 894 u64 val;
787 895
788 if (!test_bit(idx, cpuc->active_mask)) 896 if (!test_bit(idx, cpuc->active_mask))
789 continue; 897 continue;
790 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 898
791 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 899 val = counter->hw.config;
792 continue;
793 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 900 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
794 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 901 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
795 } 902 }
@@ -818,16 +925,13 @@ static inline void intel_pmu_ack_status(u64 ack)
818 925
819static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 926static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
820{ 927{
821 int err; 928 (void)checking_wrmsrl(hwc->config_base + idx,
822 err = checking_wrmsrl(hwc->config_base + idx,
823 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 929 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
824} 930}
825 931
826static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 932static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
827{ 933{
828 int err; 934 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
829 err = checking_wrmsrl(hwc->config_base + idx,
830 hwc->config);
831} 935}
832 936
833static inline void 937static inline void
@@ -835,13 +939,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
835{ 939{
836 int idx = __idx - X86_PMC_IDX_FIXED; 940 int idx = __idx - X86_PMC_IDX_FIXED;
837 u64 ctrl_val, mask; 941 u64 ctrl_val, mask;
838 int err;
839 942
840 mask = 0xfULL << (idx * 4); 943 mask = 0xfULL << (idx * 4);
841 944
842 rdmsrl(hwc->config_base, ctrl_val); 945 rdmsrl(hwc->config_base, ctrl_val);
843 ctrl_val &= ~mask; 946 ctrl_val &= ~mask;
844 err = checking_wrmsrl(hwc->config_base, ctrl_val); 947 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
948}
949
950static inline void
951p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
952{
953 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
954 u64 val = P6_NOP_COUNTER;
955
956 if (cpuc->enabled)
957 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
958
959 (void)checking_wrmsrl(hwc->config_base + idx, val);
845} 960}
846 961
847static inline void 962static inline void
@@ -911,6 +1026,8 @@ x86_perf_counter_set_period(struct perf_counter *counter,
911 err = checking_wrmsrl(hwc->counter_base + idx, 1026 err = checking_wrmsrl(hwc->counter_base + idx,
912 (u64)(-left) & x86_pmu.counter_mask); 1027 (u64)(-left) & x86_pmu.counter_mask);
913 1028
1029 perf_counter_update_userpage(counter);
1030
914 return ret; 1031 return ret;
915} 1032}
916 1033
@@ -940,6 +1057,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
940 err = checking_wrmsrl(hwc->config_base, ctrl_val); 1057 err = checking_wrmsrl(hwc->config_base, ctrl_val);
941} 1058}
942 1059
1060static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1061{
1062 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1063 u64 val;
1064
1065 val = hwc->config;
1066 if (cpuc->enabled)
1067 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1068
1069 (void)checking_wrmsrl(hwc->config_base + idx, val);
1070}
1071
1072
943static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1073static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
944{ 1074{
945 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1075 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -956,8 +1086,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
956 1086
957 if (cpuc->enabled) 1087 if (cpuc->enabled)
958 x86_pmu_enable_counter(hwc, idx); 1088 x86_pmu_enable_counter(hwc, idx);
959 else
960 x86_pmu_disable_counter(hwc, idx);
961} 1089}
962 1090
963static int 1091static int
@@ -968,13 +1096,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
968 if (!x86_pmu.num_counters_fixed) 1096 if (!x86_pmu.num_counters_fixed)
969 return -1; 1097 return -1;
970 1098
971 /*
972 * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
973 */
974 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
975 boot_cpu_data.x86_model == 28)
976 return -1;
977
978 event = hwc->config & ARCH_PERFMON_EVENT_MASK; 1099 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
979 1100
980 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 1101 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
@@ -1040,6 +1161,8 @@ try_generic:
1040 x86_perf_counter_set_period(counter, hwc, idx); 1161 x86_perf_counter_set_period(counter, hwc, idx);
1041 x86_pmu.enable(hwc, idx); 1162 x86_pmu.enable(hwc, idx);
1042 1163
1164 perf_counter_update_userpage(counter);
1165
1043 return 0; 1166 return 0;
1044} 1167}
1045 1168
@@ -1132,6 +1255,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
1132 x86_perf_counter_update(counter, hwc, idx); 1255 x86_perf_counter_update(counter, hwc, idx);
1133 cpuc->counters[idx] = NULL; 1256 cpuc->counters[idx] = NULL;
1134 clear_bit(idx, cpuc->used_mask); 1257 clear_bit(idx, cpuc->used_mask);
1258
1259 perf_counter_update_userpage(counter);
1135} 1260}
1136 1261
1137/* 1262/*
@@ -1176,6 +1301,49 @@ static void intel_pmu_reset(void)
1176 local_irq_restore(flags); 1301 local_irq_restore(flags);
1177} 1302}
1178 1303
1304static int p6_pmu_handle_irq(struct pt_regs *regs)
1305{
1306 struct perf_sample_data data;
1307 struct cpu_hw_counters *cpuc;
1308 struct perf_counter *counter;
1309 struct hw_perf_counter *hwc;
1310 int idx, handled = 0;
1311 u64 val;
1312
1313 data.regs = regs;
1314 data.addr = 0;
1315
1316 cpuc = &__get_cpu_var(cpu_hw_counters);
1317
1318 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1319 if (!test_bit(idx, cpuc->active_mask))
1320 continue;
1321
1322 counter = cpuc->counters[idx];
1323 hwc = &counter->hw;
1324
1325 val = x86_perf_counter_update(counter, hwc, idx);
1326 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1327 continue;
1328
1329 /*
1330 * counter overflow
1331 */
1332 handled = 1;
1333 data.period = counter->hw.last_period;
1334
1335 if (!x86_perf_counter_set_period(counter, hwc, idx))
1336 continue;
1337
1338 if (perf_counter_overflow(counter, 1, &data))
1339 p6_pmu_disable_counter(hwc, idx);
1340 }
1341
1342 if (handled)
1343 inc_irq_stat(apic_perf_irqs);
1344
1345 return handled;
1346}
1179 1347
1180/* 1348/*
1181 * This handler is triggered by the local APIC, so the APIC IRQ handling 1349 * This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1185,14 +1353,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1185{ 1353{
1186 struct perf_sample_data data; 1354 struct perf_sample_data data;
1187 struct cpu_hw_counters *cpuc; 1355 struct cpu_hw_counters *cpuc;
1188 int bit, cpu, loops; 1356 int bit, loops;
1189 u64 ack, status; 1357 u64 ack, status;
1190 1358
1191 data.regs = regs; 1359 data.regs = regs;
1192 data.addr = 0; 1360 data.addr = 0;
1193 1361
1194 cpu = smp_processor_id(); 1362 cpuc = &__get_cpu_var(cpu_hw_counters);
1195 cpuc = &per_cpu(cpu_hw_counters, cpu);
1196 1363
1197 perf_disable(); 1364 perf_disable();
1198 status = intel_pmu_get_status(); 1365 status = intel_pmu_get_status();
@@ -1223,6 +1390,8 @@ again:
1223 if (!intel_pmu_save_and_restart(counter)) 1390 if (!intel_pmu_save_and_restart(counter))
1224 continue; 1391 continue;
1225 1392
1393 data.period = counter->hw.last_period;
1394
1226 if (perf_counter_overflow(counter, 1, &data)) 1395 if (perf_counter_overflow(counter, 1, &data))
1227 intel_pmu_disable_counter(&counter->hw, bit); 1396 intel_pmu_disable_counter(&counter->hw, bit);
1228 } 1397 }
@@ -1247,14 +1416,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1247 struct cpu_hw_counters *cpuc; 1416 struct cpu_hw_counters *cpuc;
1248 struct perf_counter *counter; 1417 struct perf_counter *counter;
1249 struct hw_perf_counter *hwc; 1418 struct hw_perf_counter *hwc;
1250 int cpu, idx, handled = 0; 1419 int idx, handled = 0;
1251 u64 val; 1420 u64 val;
1252 1421
1253 data.regs = regs; 1422 data.regs = regs;
1254 data.addr = 0; 1423 data.addr = 0;
1255 1424
1256 cpu = smp_processor_id(); 1425 cpuc = &__get_cpu_var(cpu_hw_counters);
1257 cpuc = &per_cpu(cpu_hw_counters, cpu);
1258 1426
1259 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1427 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1260 if (!test_bit(idx, cpuc->active_mask)) 1428 if (!test_bit(idx, cpuc->active_mask))
@@ -1297,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1297 1465
1298void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1299{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1300 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1301} 1471}
1302 1472
1303void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1304{ 1474{
1305 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1306 return; 1477 return;
1307 1478
1308 /* 1479 /*
1309 * Always use NMI for PMU 1480 * Always use NMI for PMU
1310 */ 1481 */
1311 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1312} 1484}
1313 1485
1314static int __kprobes 1486static int __kprobes
@@ -1332,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1332 1504
1333 regs = args->regs; 1505 regs = args->regs;
1334 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1335 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1336 /* 1510 /*
1337 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1338 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1351,6 +1525,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1351 .priority = 1 1525 .priority = 1
1352}; 1526};
1353 1527
1528static struct x86_pmu p6_pmu = {
1529 .name = "p6",
1530 .handle_irq = p6_pmu_handle_irq,
1531 .disable_all = p6_pmu_disable_all,
1532 .enable_all = p6_pmu_enable_all,
1533 .enable = p6_pmu_enable_counter,
1534 .disable = p6_pmu_disable_counter,
1535 .eventsel = MSR_P6_EVNTSEL0,
1536 .perfctr = MSR_P6_PERFCTR0,
1537 .event_map = p6_pmu_event_map,
1538 .raw_event = p6_pmu_raw_event,
1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1541 .max_period = (1ULL << 31) - 1,
1542 .version = 0,
1543 .num_counters = 2,
1544 /*
1545 * Counters have 40 bits implemented. However they are designed such
1546 * that bits [32-39] are sign extensions of bit 31. As such the
1547 * effective width of a counter for P6-like PMU is 32 bits only.
1548 *
1549 * See IA-32 Intel Architecture Software developer manual Vol 3B
1550 */
1551 .counter_bits = 32,
1552 .counter_mask = (1ULL << 32) - 1,
1553};
1554
1354static struct x86_pmu intel_pmu = { 1555static struct x86_pmu intel_pmu = {
1355 .name = "Intel", 1556 .name = "Intel",
1356 .handle_irq = intel_pmu_handle_irq, 1557 .handle_irq = intel_pmu_handle_irq,
@@ -1363,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1363 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1364 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1365 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1366 /* 1568 /*
1367 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1368 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1386,10 +1588,43 @@ static struct x86_pmu amd_pmu = {
1386 .num_counters = 4, 1588 .num_counters = 4,
1387 .counter_bits = 48, 1589 .counter_bits = 48,
1388 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1389 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1390 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1391}; 1594};
1392 1595
1596static int p6_pmu_init(void)
1597{
1598 switch (boot_cpu_data.x86_model) {
1599 case 1:
1600 case 3: /* Pentium Pro */
1601 case 5:
1602 case 6: /* Pentium II */
1603 case 7:
1604 case 8:
1605 case 11: /* Pentium III */
1606 break;
1607 case 9:
1608 case 13:
1609 /* Pentium M */
1610 break;
1611 default:
1612 pr_cont("unsupported p6 CPU model %d ",
1613 boot_cpu_data.x86_model);
1614 return -ENODEV;
1615 }
1616
1617 x86_pmu = p6_pmu;
1618
1619 if (!cpu_has_apic) {
1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1623 }
1624
1625 return 0;
1626}
1627
1393static int intel_pmu_init(void) 1628static int intel_pmu_init(void)
1394{ 1629{
1395 union cpuid10_edx edx; 1630 union cpuid10_edx edx;
@@ -1398,8 +1633,14 @@ static int intel_pmu_init(void)
1398 unsigned int ebx; 1633 unsigned int ebx;
1399 int version; 1634 int version;
1400 1635
1401 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 1636 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1637 /* check for P6 processor family */
1638 if (boot_cpu_data.x86 == 6) {
1639 return p6_pmu_init();
1640 } else {
1402 return -ENODEV; 1641 return -ENODEV;
1642 }
1643 }
1403 1644
1404 /* 1645 /*
1405 * Check whether the Architectural PerfMon supports 1646 * Check whether the Architectural PerfMon supports
@@ -1425,8 +1666,6 @@ static int intel_pmu_init(void)
1425 */ 1666 */
1426 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 1667 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1427 1668
1428 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1429
1430 /* 1669 /*
1431 * Install the hw-cache-events table: 1670 * Install the hw-cache-events table:
1432 */ 1671 */
@@ -1459,18 +1698,16 @@ static int intel_pmu_init(void)
1459 1698
1460static int amd_pmu_init(void) 1699static int amd_pmu_init(void)
1461{ 1700{
1701 /* Performance-monitoring supported from K7 and later: */
1702 if (boot_cpu_data.x86 < 6)
1703 return -ENODEV;
1704
1462 x86_pmu = amd_pmu; 1705 x86_pmu = amd_pmu;
1463 1706
1464 switch (boot_cpu_data.x86) { 1707 /* Events are common for all AMDs */
1465 case 0x0f: 1708 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1466 case 0x10: 1709 sizeof(hw_cache_event_ids));
1467 case 0x11:
1468 memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids,
1469 sizeof(hw_cache_event_ids));
1470 1710
1471 pr_cont("AMD Family 0f/10/11 events, ");
1472 break;
1473 }
1474 return 0; 1711 return 0;
1475} 1712}
1476 1713
@@ -1498,21 +1735,22 @@ void __init init_hw_perf_counters(void)
1498 pr_cont("%s PMU driver.\n", x86_pmu.name); 1735 pr_cont("%s PMU driver.\n", x86_pmu.name);
1499 1736
1500 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1737 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1501 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1502 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", 1738 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1503 x86_pmu.num_counters, X86_PMC_MAX_GENERIC); 1739 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1740 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1504 } 1741 }
1505 perf_counter_mask = (1 << x86_pmu.num_counters) - 1; 1742 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1506 perf_max_counters = x86_pmu.num_counters; 1743 perf_max_counters = x86_pmu.num_counters;
1507 1744
1508 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1745 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1509 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1510 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", 1746 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1511 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); 1747 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1748 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1512 } 1749 }
1513 1750
1514 perf_counter_mask |= 1751 perf_counter_mask |=
1515 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1752 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1753 x86_pmu.intel_ctrl = perf_counter_mask;
1516 1754
1517 perf_counters_lapic_init(); 1755 perf_counters_lapic_init();
1518 register_die_notifier(&perf_counter_nmi_notifier); 1756 register_die_notifier(&perf_counter_nmi_notifier);
@@ -1554,14 +1792,15 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1554 */ 1792 */
1555 1793
1556static inline 1794static inline
1557void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) 1795void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1558{ 1796{
1559 if (entry->nr < MAX_STACK_DEPTH) 1797 if (entry->nr < PERF_MAX_STACK_DEPTH)
1560 entry->ip[entry->nr++] = ip; 1798 entry->ip[entry->nr++] = ip;
1561} 1799}
1562 1800
1563static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); 1801static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1564static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); 1802static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1803static DEFINE_PER_CPU(int, in_nmi_frame);
1565 1804
1566 1805
1567static void 1806static void
@@ -1577,14 +1816,19 @@ static void backtrace_warning(void *data, char *msg)
1577 1816
1578static int backtrace_stack(void *data, char *name) 1817static int backtrace_stack(void *data, char *name)
1579{ 1818{
1580 /* Don't bother with IRQ stacks for now */ 1819 per_cpu(in_nmi_frame, smp_processor_id()) =
1581 return -1; 1820 x86_is_stack_id(NMI_STACK, name);
1821
1822 return 0;
1582} 1823}
1583 1824
1584static void backtrace_address(void *data, unsigned long addr, int reliable) 1825static void backtrace_address(void *data, unsigned long addr, int reliable)
1585{ 1826{
1586 struct perf_callchain_entry *entry = data; 1827 struct perf_callchain_entry *entry = data;
1587 1828
1829 if (per_cpu(in_nmi_frame, smp_processor_id()))
1830 return;
1831
1588 if (reliable) 1832 if (reliable)
1589 callchain_store(entry, addr); 1833 callchain_store(entry, addr);
1590} 1834}
@@ -1596,47 +1840,59 @@ static const struct stacktrace_ops backtrace_ops = {
1596 .address = backtrace_address, 1840 .address = backtrace_address,
1597}; 1841};
1598 1842
1843#include "../dumpstack.h"
1844
1599static void 1845static void
1600perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1846perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1601{ 1847{
1602 unsigned long bp; 1848 callchain_store(entry, PERF_CONTEXT_KERNEL);
1603 char *stack; 1849 callchain_store(entry, regs->ip);
1604 int nr = entry->nr;
1605 1850
1606 callchain_store(entry, instruction_pointer(regs)); 1851 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1852}
1607 1853
1608 stack = ((char *)regs + sizeof(struct pt_regs)); 1854/*
1609#ifdef CONFIG_FRAME_POINTER 1855 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1610 bp = frame_pointer(regs); 1856 */
1611#else 1857static unsigned long
1612 bp = 0; 1858copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1613#endif 1859{
1860 unsigned long offset, addr = (unsigned long)from;
1861 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1862 unsigned long size, len = 0;
1863 struct page *page;
1864 void *map;
1865 int ret;
1614 1866
1615 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); 1867 do {
1868 ret = __get_user_pages_fast(addr, 1, 0, &page);
1869 if (!ret)
1870 break;
1616 1871
1617 entry->kernel = entry->nr - nr; 1872 offset = addr & (PAGE_SIZE - 1);
1618} 1873 size = min(PAGE_SIZE - offset, n - len);
1619 1874
1875 map = kmap_atomic(page, type);
1876 memcpy(to, map+offset, size);
1877 kunmap_atomic(map, type);
1878 put_page(page);
1620 1879
1621struct stack_frame { 1880 len += size;
1622 const void __user *next_fp; 1881 to += size;
1623 unsigned long return_address; 1882 addr += size;
1624}; 1883
1884 } while (len < n);
1885
1886 return len;
1887}
1625 1888
1626static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1889static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1627{ 1890{
1628 int ret; 1891 unsigned long bytes;
1629
1630 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
1631 return 0;
1632 1892
1633 ret = 1; 1893 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1634 pagefault_disable();
1635 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
1636 ret = 0;
1637 pagefault_enable();
1638 1894
1639 return ret; 1895 return bytes == sizeof(*frame);
1640} 1896}
1641 1897
1642static void 1898static void
@@ -1644,28 +1900,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1644{ 1900{
1645 struct stack_frame frame; 1901 struct stack_frame frame;
1646 const void __user *fp; 1902 const void __user *fp;
1647 int nr = entry->nr;
1648 1903
1649 regs = (struct pt_regs *)current->thread.sp0 - 1; 1904 if (!user_mode(regs))
1650 fp = (void __user *)regs->bp; 1905 regs = task_pt_regs(current);
1906
1907 fp = (void __user *)regs->bp;
1651 1908
1909 callchain_store(entry, PERF_CONTEXT_USER);
1652 callchain_store(entry, regs->ip); 1910 callchain_store(entry, regs->ip);
1653 1911
1654 while (entry->nr < MAX_STACK_DEPTH) { 1912 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1655 frame.next_fp = NULL; 1913 frame.next_frame = NULL;
1656 frame.return_address = 0; 1914 frame.return_address = 0;
1657 1915
1658 if (!copy_stack_frame(fp, &frame)) 1916 if (!copy_stack_frame(fp, &frame))
1659 break; 1917 break;
1660 1918
1661 if ((unsigned long)fp < user_stack_pointer(regs)) 1919 if ((unsigned long)fp < regs->sp)
1662 break; 1920 break;
1663 1921
1664 callchain_store(entry, frame.return_address); 1922 callchain_store(entry, frame.return_address);
1665 fp = frame.next_fp; 1923 fp = frame.next_frame;
1666 } 1924 }
1667
1668 entry->user = entry->nr - nr;
1669} 1925}
1670 1926
1671static void 1927static void
@@ -1701,9 +1957,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1701 entry = &__get_cpu_var(irq_entry); 1957 entry = &__get_cpu_var(irq_entry);
1702 1958
1703 entry->nr = 0; 1959 entry->nr = 0;
1704 entry->hv = 0;
1705 entry->kernel = 0;
1706 entry->user = 0;
1707 1960
1708 perf_do_callchain(regs, entry); 1961 perf_do_callchain(regs, entry);
1709 1962
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d6f5b9fbde32..e60ed740d2b3 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void)
716 wd_ops = &k7_wd_ops; 716 wd_ops = &k7_wd_ops;
717 break; 717 break;
718 case X86_VENDOR_INTEL: 718 case X86_VENDOR_INTEL:
719 /* 719 /* Work around where perfctr1 doesn't have a working enable
720 * Work around Core Duo (Yonah) errata AE49 where perfctr1 720 * bit as described in the following errata:
721 * doesn't have a working enable bit. 721 * AE49 Core Duo and Intel Core Solo 65 nm
722 * AN49 Intel Pentium Dual-Core
723 * AF49 Dual-Core Intel Xeon Processor LV
722 */ 724 */
723 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 725 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
726 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
727 boot_cpu_data.x86_mask == 4))) {
724 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 728 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
725 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 729 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
726 } 730 }
@@ -799,8 +803,3 @@ int __kprobes lapic_wd_event(unsigned nmi_hz)
799 wd_ops->rearm(wd, nmi_hz); 803 wd_ops->rearm(wd, nmi_hz);
800 return 1; 804 return 1;
801} 805}
802
803int lapic_watchdog_ok(void)
804{
805 return wd_ops != NULL;
806}