diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 67 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 24 |
6 files changed, 85 insertions, 42 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index ddc72f839332..5ac2d1fb28bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m) | |||
153 | return; | 153 | return; |
154 | 154 | ||
155 | #ifdef CONFIG_X86_LOCAL_APIC | 155 | #ifdef CONFIG_X86_LOCAL_APIC |
156 | if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { | 156 | if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { |
157 | unsigned long start; | 157 | unsigned long start; |
158 | int cpu; | 158 | int cpu; |
159 | 159 | ||
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m) | |||
167 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 167 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
168 | } | 168 | } |
169 | if (!cpumask_empty(mce_inject_cpumask)) { | 169 | if (!cpumask_empty(mce_inject_cpumask)) { |
170 | if (m->inject_flags & MCJ_IRQ_BRAODCAST) { | 170 | if (m->inject_flags & MCJ_IRQ_BROADCAST) { |
171 | /* | 171 | /* |
172 | * don't wait because mce_irq_ipi is necessary | 172 | * don't wait because mce_irq_ipi is necessary |
173 | * to be sync with following raise_local | 173 | * to be sync with following raise_local |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index beb1f1689e52..e2703520d120 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -110,22 +110,17 @@ static struct severity { | |||
110 | /* known AR MCACODs: */ | 110 | /* known AR MCACODs: */ |
111 | #ifdef CONFIG_MEMORY_FAILURE | 111 | #ifdef CONFIG_MEMORY_FAILURE |
112 | MCESEV( | 112 | MCESEV( |
113 | KEEP, "HT thread notices Action required: data load error", | 113 | KEEP, "Action required but unaffected thread is continuable", |
114 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), | 114 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR), |
115 | MCGMASK(MCG_STATUS_EIPV, 0) | 115 | MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV) |
116 | ), | 116 | ), |
117 | MCESEV( | 117 | MCESEV( |
118 | AR, "Action required: data load error", | 118 | AR, "Action required: data load error in a user process", |
119 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), | 119 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
120 | USER | 120 | USER |
121 | ), | 121 | ), |
122 | MCESEV( | 122 | MCESEV( |
123 | KEEP, "HT thread notices Action required: instruction fetch error", | 123 | AR, "Action required: instruction fetch error in a user process", |
124 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), | ||
125 | MCGMASK(MCG_STATUS_EIPV, 0) | ||
126 | ), | ||
127 | MCESEV( | ||
128 | AR, "Action required: instruction fetch error", | ||
129 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), | 124 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), |
130 | USER | 125 | USER |
131 | ), | 126 | ), |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9239504b41cb..bf49cdbb010f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
89 | static DEFINE_PER_CPU(struct mce, mces_seen); | 89 | static DEFINE_PER_CPU(struct mce, mces_seen); |
90 | static int cpu_missing; | 90 | static int cpu_missing; |
91 | 91 | ||
92 | /* MCA banks polled by the period polling timer for corrected events */ | 92 | /* |
93 | * MCA banks polled by the period polling timer for corrected events. | ||
94 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). | ||
95 | */ | ||
93 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 96 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
94 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 97 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
95 | }; | 98 | }; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index ae1697c2afe3..d56405309dc1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -24,6 +24,18 @@ | |||
24 | * Also supports reliable discovery of shared banks. | 24 | * Also supports reliable discovery of shared banks. |
25 | */ | 25 | */ |
26 | 26 | ||
27 | /* | ||
28 | * CMCI can be delivered to multiple cpus that share a machine check bank | ||
29 | * so we need to designate a single cpu to process errors logged in each bank | ||
30 | * in the interrupt handler (otherwise we would have many races and potential | ||
31 | * double reporting of the same error). | ||
32 | * Note that this can change when a cpu is offlined or brought online since | ||
33 | * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() | ||
34 | * disables CMCI on all banks owned by the cpu and clears this bitfield. At | ||
35 | * this point, cmci_rediscover() kicks in and a different cpu may end up | ||
36 | * taking ownership of some of the shared MCA banks that were previously | ||
37 | * owned by the offlined cpu. | ||
38 | */ | ||
27 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | 39 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
28 | 40 | ||
29 | /* | 41 | /* |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 4131c0393594..41e8e00a6637 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/idle.h> | 29 | #include <asm/idle.h> |
30 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
31 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
32 | #include <asm/trace/irq_vectors.h> | ||
32 | 33 | ||
33 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
34 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
@@ -193,11 +194,6 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
193 | this_cpu, | 194 | this_cpu, |
194 | level == CORE_LEVEL ? "Core" : "Package", | 195 | level == CORE_LEVEL ? "Core" : "Package", |
195 | state->count); | 196 | state->count); |
196 | else | ||
197 | printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", | ||
198 | this_cpu, | ||
199 | level == CORE_LEVEL ? "Core" : "Package", | ||
200 | state->count); | ||
201 | return 1; | 197 | return 1; |
202 | } | 198 | } |
203 | if (old_event) { | 199 | if (old_event) { |
@@ -205,10 +201,6 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
205 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", | 201 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", |
206 | this_cpu, | 202 | this_cpu, |
207 | level == CORE_LEVEL ? "Core" : "Package"); | 203 | level == CORE_LEVEL ? "Core" : "Package"); |
208 | else | ||
209 | printk(KERN_INFO "CPU%d: %s power limit normal\n", | ||
210 | this_cpu, | ||
211 | level == CORE_LEVEL ? "Core" : "Package"); | ||
212 | return 1; | 204 | return 1; |
213 | } | 205 | } |
214 | 206 | ||
@@ -237,6 +229,15 @@ static int thresh_event_valid(int level, int event) | |||
237 | return 1; | 229 | return 1; |
238 | } | 230 | } |
239 | 231 | ||
232 | static bool int_pln_enable; | ||
233 | static int __init int_pln_enable_setup(char *s) | ||
234 | { | ||
235 | int_pln_enable = true; | ||
236 | |||
237 | return 1; | ||
238 | } | ||
239 | __setup("int_pln_enable", int_pln_enable_setup); | ||
240 | |||
240 | #ifdef CONFIG_SYSFS | 241 | #ifdef CONFIG_SYSFS |
241 | /* Add/Remove thermal_throttle interface for CPU device: */ | 242 | /* Add/Remove thermal_throttle interface for CPU device: */ |
242 | static __cpuinit int thermal_throttle_add_dev(struct device *dev, | 243 | static __cpuinit int thermal_throttle_add_dev(struct device *dev, |
@@ -249,7 +250,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev, | |||
249 | if (err) | 250 | if (err) |
250 | return err; | 251 | return err; |
251 | 252 | ||
252 | if (cpu_has(c, X86_FEATURE_PLN)) | 253 | if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) |
253 | err = sysfs_add_file_to_group(&dev->kobj, | 254 | err = sysfs_add_file_to_group(&dev->kobj, |
254 | &dev_attr_core_power_limit_count.attr, | 255 | &dev_attr_core_power_limit_count.attr, |
255 | thermal_attr_group.name); | 256 | thermal_attr_group.name); |
@@ -257,7 +258,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev, | |||
257 | err = sysfs_add_file_to_group(&dev->kobj, | 258 | err = sysfs_add_file_to_group(&dev->kobj, |
258 | &dev_attr_package_throttle_count.attr, | 259 | &dev_attr_package_throttle_count.attr, |
259 | thermal_attr_group.name); | 260 | thermal_attr_group.name); |
260 | if (cpu_has(c, X86_FEATURE_PLN)) | 261 | if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) |
261 | err = sysfs_add_file_to_group(&dev->kobj, | 262 | err = sysfs_add_file_to_group(&dev->kobj, |
262 | &dev_attr_package_power_limit_count.attr, | 263 | &dev_attr_package_power_limit_count.attr, |
263 | thermal_attr_group.name); | 264 | thermal_attr_group.name); |
@@ -405,7 +406,7 @@ static void intel_thermal_interrupt(void) | |||
405 | CORE_LEVEL) != 0) | 406 | CORE_LEVEL) != 0) |
406 | mce_log_therm_throt_event(msr_val); | 407 | mce_log_therm_throt_event(msr_val); |
407 | 408 | ||
408 | if (this_cpu_has(X86_FEATURE_PLN)) | 409 | if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) |
409 | therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | 410 | therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, |
410 | POWER_LIMIT_EVENT, | 411 | POWER_LIMIT_EVENT, |
411 | CORE_LEVEL); | 412 | CORE_LEVEL); |
@@ -417,7 +418,7 @@ static void intel_thermal_interrupt(void) | |||
417 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 418 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
418 | THERMAL_THROTTLING_EVENT, | 419 | THERMAL_THROTTLING_EVENT, |
419 | PACKAGE_LEVEL); | 420 | PACKAGE_LEVEL); |
420 | if (this_cpu_has(X86_FEATURE_PLN)) | 421 | if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) |
421 | therm_throt_process(msr_val & | 422 | therm_throt_process(msr_val & |
422 | PACKAGE_THERM_STATUS_POWER_LIMIT, | 423 | PACKAGE_THERM_STATUS_POWER_LIMIT, |
423 | POWER_LIMIT_EVENT, | 424 | POWER_LIMIT_EVENT, |
@@ -433,15 +434,26 @@ static void unexpected_thermal_interrupt(void) | |||
433 | 434 | ||
434 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | 435 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; |
435 | 436 | ||
436 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | 437 | static inline void __smp_thermal_interrupt(void) |
437 | { | 438 | { |
438 | irq_enter(); | ||
439 | exit_idle(); | ||
440 | inc_irq_stat(irq_thermal_count); | 439 | inc_irq_stat(irq_thermal_count); |
441 | smp_thermal_vector(); | 440 | smp_thermal_vector(); |
442 | irq_exit(); | 441 | } |
443 | /* Ack only at the end to avoid potential reentry */ | 442 | |
444 | ack_APIC_irq(); | 443 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) |
444 | { | ||
445 | entering_irq(); | ||
446 | __smp_thermal_interrupt(); | ||
447 | exiting_ack_irq(); | ||
448 | } | ||
449 | |||
450 | asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs) | ||
451 | { | ||
452 | entering_irq(); | ||
453 | trace_thermal_apic_entry(THERMAL_APIC_VECTOR); | ||
454 | __smp_thermal_interrupt(); | ||
455 | trace_thermal_apic_exit(THERMAL_APIC_VECTOR); | ||
456 | exiting_ack_irq(); | ||
445 | } | 457 | } |
446 | 458 | ||
447 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ | 459 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ |
@@ -525,9 +537,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
525 | apic_write(APIC_LVTTHMR, h); | 537 | apic_write(APIC_LVTTHMR, h); |
526 | 538 | ||
527 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 539 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
528 | if (cpu_has(c, X86_FEATURE_PLN)) | 540 | if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) |
541 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
542 | (l | (THERM_INT_LOW_ENABLE | ||
543 | | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h); | ||
544 | else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) | ||
529 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 545 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
530 | l | (THERM_INT_LOW_ENABLE | 546 | l | (THERM_INT_LOW_ENABLE |
531 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); | 547 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); |
532 | else | 548 | else |
533 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 549 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
@@ -535,9 +551,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
535 | 551 | ||
536 | if (cpu_has(c, X86_FEATURE_PTS)) { | 552 | if (cpu_has(c, X86_FEATURE_PTS)) { |
537 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | 553 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); |
538 | if (cpu_has(c, X86_FEATURE_PLN)) | 554 | if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) |
539 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | 555 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, |
540 | l | (PACKAGE_THERM_INT_LOW_ENABLE | 556 | (l | (PACKAGE_THERM_INT_LOW_ENABLE |
557 | | PACKAGE_THERM_INT_HIGH_ENABLE)) | ||
558 | & ~PACKAGE_THERM_INT_PLN_ENABLE, h); | ||
559 | else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) | ||
560 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
561 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
541 | | PACKAGE_THERM_INT_HIGH_ENABLE | 562 | | PACKAGE_THERM_INT_HIGH_ENABLE |
542 | | PACKAGE_THERM_INT_PLN_ENABLE), h); | 563 | | PACKAGE_THERM_INT_PLN_ENABLE), h); |
543 | else | 564 | else |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index aa578cadb940..fe6b1c86645b 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <asm/apic.h> | 8 | #include <asm/apic.h> |
9 | #include <asm/idle.h> | 9 | #include <asm/idle.h> |
10 | #include <asm/mce.h> | 10 | #include <asm/mce.h> |
11 | #include <asm/trace/irq_vectors.h> | ||
11 | 12 | ||
12 | static void default_threshold_interrupt(void) | 13 | static void default_threshold_interrupt(void) |
13 | { | 14 | { |
@@ -17,13 +18,24 @@ static void default_threshold_interrupt(void) | |||
17 | 18 | ||
18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; | 19 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; |
19 | 20 | ||
20 | asmlinkage void smp_threshold_interrupt(void) | 21 | static inline void __smp_threshold_interrupt(void) |
21 | { | 22 | { |
22 | irq_enter(); | ||
23 | exit_idle(); | ||
24 | inc_irq_stat(irq_threshold_count); | 23 | inc_irq_stat(irq_threshold_count); |
25 | mce_threshold_vector(); | 24 | mce_threshold_vector(); |
26 | irq_exit(); | 25 | } |
27 | /* Ack only at the end to avoid potential reentry */ | 26 | |
28 | ack_APIC_irq(); | 27 | asmlinkage void smp_threshold_interrupt(void) |
28 | { | ||
29 | entering_irq(); | ||
30 | __smp_threshold_interrupt(); | ||
31 | exiting_ack_irq(); | ||
32 | } | ||
33 | |||
34 | asmlinkage void smp_trace_threshold_interrupt(void) | ||
35 | { | ||
36 | entering_irq(); | ||
37 | trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); | ||
38 | __smp_threshold_interrupt(); | ||
39 | trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); | ||
40 | exiting_ack_irq(); | ||
29 | } | 41 | } |