aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c67
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c24
6 files changed, 85 insertions, 42 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index ddc72f839332..5ac2d1fb28bc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m)
153 return; 153 return;
154 154
155#ifdef CONFIG_X86_LOCAL_APIC 155#ifdef CONFIG_X86_LOCAL_APIC
156 if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { 156 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
157 unsigned long start; 157 unsigned long start;
158 int cpu; 158 int cpu;
159 159
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m)
167 cpumask_clear_cpu(cpu, mce_inject_cpumask); 167 cpumask_clear_cpu(cpu, mce_inject_cpumask);
168 } 168 }
169 if (!cpumask_empty(mce_inject_cpumask)) { 169 if (!cpumask_empty(mce_inject_cpumask)) {
170 if (m->inject_flags & MCJ_IRQ_BRAODCAST) { 170 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
171 /* 171 /*
172 * don't wait because mce_irq_ipi is necessary 172 * don't wait because mce_irq_ipi is necessary
173 * to be sync with following raise_local 173 * to be sync with following raise_local
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f1689e52..e2703520d120 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
110 /* known AR MCACODs: */ 110 /* known AR MCACODs: */
111#ifdef CONFIG_MEMORY_FAILURE 111#ifdef CONFIG_MEMORY_FAILURE
112 MCESEV( 112 MCESEV(
113 KEEP, "HT thread notices Action required: data load error", 113 KEEP, "Action required but unaffected thread is continuable",
114 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 114 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
115 MCGMASK(MCG_STATUS_EIPV, 0) 115 MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
116 ), 116 ),
117 MCESEV( 117 MCESEV(
118 AR, "Action required: data load error", 118 AR, "Action required: data load error in a user process",
119 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 119 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
120 USER 120 USER
121 ), 121 ),
122 MCESEV( 122 MCESEV(
123 KEEP, "HT thread notices Action required: instruction fetch error", 123 AR, "Action required: instruction fetch error in a user process",
124 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
125 MCGMASK(MCG_STATUS_EIPV, 0)
126 ),
127 MCESEV(
128 AR, "Action required: instruction fetch error",
129 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), 124 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
130 USER 125 USER
131 ), 126 ),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504b41cb..bf49cdbb010f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
89static DEFINE_PER_CPU(struct mce, mces_seen); 89static DEFINE_PER_CPU(struct mce, mces_seen);
90static int cpu_missing; 90static int cpu_missing;
91 91
92/* MCA banks polled by the period polling timer for corrected events */ 92/*
93 * MCA banks polled by the period polling timer for corrected events.
94 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
95 */
93DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 96DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
94 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
95}; 98};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index ae1697c2afe3..d56405309dc1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -24,6 +24,18 @@
24 * Also supports reliable discovery of shared banks. 24 * Also supports reliable discovery of shared banks.
25 */ 25 */
26 26
27/*
28 * CMCI can be delivered to multiple cpus that share a machine check bank
29 * so we need to designate a single cpu to process errors logged in each bank
30 * in the interrupt handler (otherwise we would have many races and potential
31 * double reporting of the same error).
32 * Note that this can change when a cpu is offlined or brought online since
33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
35 * this point, cmci_rediscover() kicks in and a different cpu may end up
36 * taking ownership of some of the shared MCA banks that were previously
37 * owned by the offlined cpu.
38 */
27static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
28 40
29/* 41/*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4131c0393594..41e8e00a6637 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -29,6 +29,7 @@
29#include <asm/idle.h> 29#include <asm/idle.h>
30#include <asm/mce.h> 30#include <asm/mce.h>
31#include <asm/msr.h> 31#include <asm/msr.h>
32#include <asm/trace/irq_vectors.h>
32 33
33/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
34#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
@@ -193,11 +194,6 @@ static int therm_throt_process(bool new_event, int event, int level)
193 this_cpu, 194 this_cpu,
194 level == CORE_LEVEL ? "Core" : "Package", 195 level == CORE_LEVEL ? "Core" : "Package",
195 state->count); 196 state->count);
196 else
197 printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
198 this_cpu,
199 level == CORE_LEVEL ? "Core" : "Package",
200 state->count);
201 return 1; 197 return 1;
202 } 198 }
203 if (old_event) { 199 if (old_event) {
@@ -205,10 +201,6 @@ static int therm_throt_process(bool new_event, int event, int level)
205 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", 201 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
206 this_cpu, 202 this_cpu,
207 level == CORE_LEVEL ? "Core" : "Package"); 203 level == CORE_LEVEL ? "Core" : "Package");
208 else
209 printk(KERN_INFO "CPU%d: %s power limit normal\n",
210 this_cpu,
211 level == CORE_LEVEL ? "Core" : "Package");
212 return 1; 204 return 1;
213 } 205 }
214 206
@@ -237,6 +229,15 @@ static int thresh_event_valid(int level, int event)
237 return 1; 229 return 1;
238} 230}
239 231
232static bool int_pln_enable;
233static int __init int_pln_enable_setup(char *s)
234{
235 int_pln_enable = true;
236
237 return 1;
238}
239__setup("int_pln_enable", int_pln_enable_setup);
240
240#ifdef CONFIG_SYSFS 241#ifdef CONFIG_SYSFS
241/* Add/Remove thermal_throttle interface for CPU device: */ 242/* Add/Remove thermal_throttle interface for CPU device: */
242static __cpuinit int thermal_throttle_add_dev(struct device *dev, 243static __cpuinit int thermal_throttle_add_dev(struct device *dev,
@@ -249,7 +250,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
249 if (err) 250 if (err)
250 return err; 251 return err;
251 252
252 if (cpu_has(c, X86_FEATURE_PLN)) 253 if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
253 err = sysfs_add_file_to_group(&dev->kobj, 254 err = sysfs_add_file_to_group(&dev->kobj,
254 &dev_attr_core_power_limit_count.attr, 255 &dev_attr_core_power_limit_count.attr,
255 thermal_attr_group.name); 256 thermal_attr_group.name);
@@ -257,7 +258,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
257 err = sysfs_add_file_to_group(&dev->kobj, 258 err = sysfs_add_file_to_group(&dev->kobj,
258 &dev_attr_package_throttle_count.attr, 259 &dev_attr_package_throttle_count.attr,
259 thermal_attr_group.name); 260 thermal_attr_group.name);
260 if (cpu_has(c, X86_FEATURE_PLN)) 261 if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
261 err = sysfs_add_file_to_group(&dev->kobj, 262 err = sysfs_add_file_to_group(&dev->kobj,
262 &dev_attr_package_power_limit_count.attr, 263 &dev_attr_package_power_limit_count.attr,
263 thermal_attr_group.name); 264 thermal_attr_group.name);
@@ -405,7 +406,7 @@ static void intel_thermal_interrupt(void)
405 CORE_LEVEL) != 0) 406 CORE_LEVEL) != 0)
406 mce_log_therm_throt_event(msr_val); 407 mce_log_therm_throt_event(msr_val);
407 408
408 if (this_cpu_has(X86_FEATURE_PLN)) 409 if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
409 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 410 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
410 POWER_LIMIT_EVENT, 411 POWER_LIMIT_EVENT,
411 CORE_LEVEL); 412 CORE_LEVEL);
@@ -417,7 +418,7 @@ static void intel_thermal_interrupt(void)
417 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 418 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
418 THERMAL_THROTTLING_EVENT, 419 THERMAL_THROTTLING_EVENT,
419 PACKAGE_LEVEL); 420 PACKAGE_LEVEL);
420 if (this_cpu_has(X86_FEATURE_PLN)) 421 if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
421 therm_throt_process(msr_val & 422 therm_throt_process(msr_val &
422 PACKAGE_THERM_STATUS_POWER_LIMIT, 423 PACKAGE_THERM_STATUS_POWER_LIMIT,
423 POWER_LIMIT_EVENT, 424 POWER_LIMIT_EVENT,
@@ -433,15 +434,26 @@ static void unexpected_thermal_interrupt(void)
433 434
434static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; 435static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
435 436
436asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 437static inline void __smp_thermal_interrupt(void)
437{ 438{
438 irq_enter();
439 exit_idle();
440 inc_irq_stat(irq_thermal_count); 439 inc_irq_stat(irq_thermal_count);
441 smp_thermal_vector(); 440 smp_thermal_vector();
442 irq_exit(); 441}
443 /* Ack only at the end to avoid potential reentry */ 442
444 ack_APIC_irq(); 443asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
444{
445 entering_irq();
446 __smp_thermal_interrupt();
447 exiting_ack_irq();
448}
449
450asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
451{
452 entering_irq();
453 trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
454 __smp_thermal_interrupt();
455 trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
456 exiting_ack_irq();
445} 457}
446 458
447/* Thermal monitoring depends on APIC, ACPI and clock modulation */ 459/* Thermal monitoring depends on APIC, ACPI and clock modulation */
@@ -525,9 +537,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
525 apic_write(APIC_LVTTHMR, h); 537 apic_write(APIC_LVTTHMR, h);
526 538
527 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 539 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
528 if (cpu_has(c, X86_FEATURE_PLN)) 540 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
541 wrmsr(MSR_IA32_THERM_INTERRUPT,
542 (l | (THERM_INT_LOW_ENABLE
543 | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
544 else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
529 wrmsr(MSR_IA32_THERM_INTERRUPT, 545 wrmsr(MSR_IA32_THERM_INTERRUPT,
530 l | (THERM_INT_LOW_ENABLE 546 l | (THERM_INT_LOW_ENABLE
531 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); 547 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
532 else 548 else
533 wrmsr(MSR_IA32_THERM_INTERRUPT, 549 wrmsr(MSR_IA32_THERM_INTERRUPT,
@@ -535,9 +551,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
535 551
536 if (cpu_has(c, X86_FEATURE_PTS)) { 552 if (cpu_has(c, X86_FEATURE_PTS)) {
537 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 553 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
538 if (cpu_has(c, X86_FEATURE_PLN)) 554 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
539 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 555 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
540 l | (PACKAGE_THERM_INT_LOW_ENABLE 556 (l | (PACKAGE_THERM_INT_LOW_ENABLE
557 | PACKAGE_THERM_INT_HIGH_ENABLE))
558 & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
559 else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
560 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
561 l | (PACKAGE_THERM_INT_LOW_ENABLE
541 | PACKAGE_THERM_INT_HIGH_ENABLE 562 | PACKAGE_THERM_INT_HIGH_ENABLE
542 | PACKAGE_THERM_INT_PLN_ENABLE), h); 563 | PACKAGE_THERM_INT_PLN_ENABLE), h);
543 else 564 else
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index aa578cadb940..fe6b1c86645b 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -8,6 +8,7 @@
8#include <asm/apic.h> 8#include <asm/apic.h>
9#include <asm/idle.h> 9#include <asm/idle.h>
10#include <asm/mce.h> 10#include <asm/mce.h>
11#include <asm/trace/irq_vectors.h>
11 12
12static void default_threshold_interrupt(void) 13static void default_threshold_interrupt(void)
13{ 14{
@@ -17,13 +18,24 @@ static void default_threshold_interrupt(void)
17 18
18void (*mce_threshold_vector)(void) = default_threshold_interrupt; 19void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 20
20asmlinkage void smp_threshold_interrupt(void) 21static inline void __smp_threshold_interrupt(void)
21{ 22{
22 irq_enter();
23 exit_idle();
24 inc_irq_stat(irq_threshold_count); 23 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector(); 24 mce_threshold_vector();
26 irq_exit(); 25}
27 /* Ack only at the end to avoid potential reentry */ 26
28 ack_APIC_irq(); 27asmlinkage void smp_threshold_interrupt(void)
28{
29 entering_irq();
30 __smp_threshold_interrupt();
31 exiting_ack_irq();
32}
33
34asmlinkage void smp_trace_threshold_interrupt(void)
35{
36 entering_irq();
37 trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
38 __smp_threshold_interrupt();
39 trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
40 exiting_ack_irq();
29} 41}