aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>2015-05-06 07:58:56 -0400
committerBorislav Petkov <bp@suse.de>2015-05-07 04:23:32 -0400
commit24fd78a81f6d3fe7f7a440c8629f9c52cd5f830e (patch)
tree9c32cc0f0aa92425ebf780a9d93e3517b2f36ea7
parent7559e13fb4abe7880dfaf985d6a1630ca90a67ce (diff)
x86/mce/amd: Introduce deferred error interrupt handler
Deferred errors indicate error conditions that were not corrected, but require no action from S/W (or action is optional).These errors provide info about a latent UC MCE that can occur when a poisoned data is consumed by the processor. Processors that report these errors can be configured to generate APIC interrupts to notify OS about the error. Provide an interrupt handler in this patch so that OS can catch these errors as and when they happen. Currently, we simply log the errors and exit the handler as S/W action is not mandated. Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com> Cc: Tony Luck <tony.luck@intel.com> Cc: x86-ml <x86@kernel.org> Cc: linux-edac <linux-edac@vger.kernel.org> Link: http://lkml.kernel.org/r/1430913538-1415-5-git-send-email-Aravind.Gopalakrishnan@amd.com Signed-off-by: Borislav Petkov <bp@suse.de>
-rw-r--r--arch/x86/include/asm/entry_arch.h3
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h1
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h6
-rw-r--r--arch/x86/include/asm/traps.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c93
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/traps.c5
12 files changed, 133 insertions, 1 deletions
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index dc5fa661465f..6da46dbaac87 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -50,4 +50,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
50BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) 50BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
51#endif 51#endif
52 52
53#ifdef CONFIG_X86_MCE_AMD
54BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR)
55#endif
53#endif 56#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 0f5fb6b6567e..db9f536f482f 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -33,6 +33,9 @@ typedef struct {
33#ifdef CONFIG_X86_MCE_THRESHOLD 33#ifdef CONFIG_X86_MCE_THRESHOLD
34 unsigned int irq_threshold_count; 34 unsigned int irq_threshold_count;
35#endif 35#endif
36#ifdef CONFIG_X86_MCE_AMD
37 unsigned int irq_deferred_error_count;
38#endif
36#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) 39#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
37 unsigned int irq_hv_callback_count; 40 unsigned int irq_hv_callback_count;
38#endif 41#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index e9571ddabc4f..f71e489d7537 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -73,6 +73,7 @@ extern asmlinkage void invalidate_interrupt31(void);
73extern asmlinkage void irq_move_cleanup_interrupt(void); 73extern asmlinkage void irq_move_cleanup_interrupt(void);
74extern asmlinkage void reboot_interrupt(void); 74extern asmlinkage void reboot_interrupt(void);
75extern asmlinkage void threshold_interrupt(void); 75extern asmlinkage void threshold_interrupt(void);
76extern asmlinkage void deferred_error_interrupt(void);
76 77
77extern asmlinkage void call_function_interrupt(void); 78extern asmlinkage void call_function_interrupt(void);
78extern asmlinkage void call_function_single_interrupt(void); 79extern asmlinkage void call_function_single_interrupt(void);
@@ -87,6 +88,7 @@ extern void trace_spurious_interrupt(void);
87extern void trace_thermal_interrupt(void); 88extern void trace_thermal_interrupt(void);
88extern void trace_reschedule_interrupt(void); 89extern void trace_reschedule_interrupt(void);
89extern void trace_threshold_interrupt(void); 90extern void trace_threshold_interrupt(void);
91extern void trace_deferred_error_interrupt(void);
90extern void trace_call_function_interrupt(void); 92extern void trace_call_function_interrupt(void);
91extern void trace_call_function_single_interrupt(void); 93extern void trace_call_function_single_interrupt(void);
92#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt 94#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 666c89ec4bd7..026fc1e1599c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -113,6 +113,7 @@
113#define IRQ_WORK_VECTOR 0xf6 113#define IRQ_WORK_VECTOR 0xf6
114 114
115#define UV_BAU_MESSAGE 0xf5 115#define UV_BAU_MESSAGE 0xf5
116#define DEFERRED_ERROR_VECTOR 0xf4
116 117
117/* Vector on which hypervisor callbacks will be delivered */ 118/* Vector on which hypervisor callbacks will be delivered */
118#define HYPERVISOR_CALLBACK_VECTOR 0xf3 119#define HYPERVISOR_CALLBACK_VECTOR 0xf3
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 407ced642ac1..6a3034a0a072 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -234,6 +234,9 @@ void do_machine_check(struct pt_regs *, long);
234extern void (*mce_threshold_vector)(void); 234extern void (*mce_threshold_vector)(void);
235extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 235extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
236 236
237/* Deferred error interrupt handler */
238extern void (*deferred_error_int_vector)(void);
239
237/* 240/*
238 * Thermal handler 241 * Thermal handler
239 */ 242 */
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 4cab890007a7..38a09a13a9bc 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -101,6 +101,12 @@ DEFINE_IRQ_VECTOR_EVENT(call_function_single);
101DEFINE_IRQ_VECTOR_EVENT(threshold_apic); 101DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
102 102
103/* 103/*
104 * deferred_error_apic - called when entering/exiting a deferred apic interrupt
105 * vector handler
106 */
107DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
108
109/*
104 * thermal_apic - called when entering/exiting a thermal apic interrupt 110 * thermal_apic - called when entering/exiting a thermal apic interrupt
105 * vector handler 111 * vector handler
106 */ 112 */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 4e49d7dff78e..c5380bea2a36 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -108,7 +108,8 @@ extern int panic_on_unrecovered_nmi;
108void math_emulate(struct math_emu_info *); 108void math_emulate(struct math_emu_info *);
109#ifndef CONFIG_X86_32 109#ifndef CONFIG_X86_32
110asmlinkage void smp_thermal_interrupt(void); 110asmlinkage void smp_thermal_interrupt(void);
111asmlinkage void mce_threshold_interrupt(void); 111asmlinkage void smp_threshold_interrupt(void);
112asmlinkage void smp_deferred_error_interrupt(void);
112#endif 113#endif
113 114
114extern enum ctx_state ist_enter(struct pt_regs *regs); 115extern enum ctx_state ist_enter(struct pt_regs *regs);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 607075726e10..2e7ebe7e1e80 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -12,6 +12,8 @@
12 * - added support for AMD Family 0x10 processors 12 * - added support for AMD Family 0x10 processors
13 * May 2012 13 * May 2012
14 * - major scrubbing 14 * - major scrubbing
15 * May 2015
16 * - add support for deferred error interrupts (Aravind Gopalakrishnan)
15 * 17 *
16 * All MC4_MISCi registers are shared between multi-cores 18 * All MC4_MISCi registers are shared between multi-cores
17 */ 19 */
@@ -32,6 +34,7 @@
32#include <asm/idle.h> 34#include <asm/idle.h>
33#include <asm/mce.h> 35#include <asm/mce.h>
34#include <asm/msr.h> 36#include <asm/msr.h>
37#include <asm/trace/irq_vectors.h>
35 38
36#define NR_BLOCKS 9 39#define NR_BLOCKS 9
37#define THRESHOLD_MAX 0xFFF 40#define THRESHOLD_MAX 0xFFF
@@ -47,6 +50,13 @@
47#define MASK_BLKPTR_LO 0xFF000000 50#define MASK_BLKPTR_LO 0xFF000000
48#define MCG_XBLK_ADDR 0xC0000400 51#define MCG_XBLK_ADDR 0xC0000400
49 52
53/* Deferred error settings */
54#define MSR_CU_DEF_ERR 0xC0000410
55#define MASK_DEF_LVTOFF 0x000000F0
56#define MASK_DEF_INT_TYPE 0x00000006
57#define DEF_LVT_OFF 0x2
58#define DEF_INT_TYPE_APIC 0x2
59
50static const char * const th_names[] = { 60static const char * const th_names[] = {
51 "load_store", 61 "load_store",
52 "insn_fetch", 62 "insn_fetch",
@@ -60,6 +70,13 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
60static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 70static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
61 71
62static void amd_threshold_interrupt(void); 72static void amd_threshold_interrupt(void);
73static void amd_deferred_error_interrupt(void);
74
75static void default_deferred_error_interrupt(void)
76{
77 pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
78}
79void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
63 80
64/* 81/*
65 * CPU Initialization 82 * CPU Initialization
@@ -205,6 +222,39 @@ static int setup_APIC_mce(int reserved, int new)
205 return reserved; 222 return reserved;
206} 223}
207 224
225static int setup_APIC_deferred_error(int reserved, int new)
226{
227 if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
228 APIC_EILVT_MSG_FIX, 0))
229 return new;
230
231 return reserved;
232}
233
234static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
235{
236 u32 low = 0, high = 0;
237 int def_offset = -1, def_new;
238
239 if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
240 return;
241
242 def_new = (low & MASK_DEF_LVTOFF) >> 4;
243 if (!(low & MASK_DEF_LVTOFF)) {
244 pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
245 def_new = DEF_LVT_OFF;
246 low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
247 }
248
249 def_offset = setup_APIC_deferred_error(def_offset, def_new);
250 if ((def_offset == def_new) &&
251 (deferred_error_int_vector != amd_deferred_error_interrupt))
252 deferred_error_int_vector = amd_deferred_error_interrupt;
253
254 low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
255 wrmsr(MSR_CU_DEF_ERR, low, high);
256}
257
208/* cpu init entry point, called from mce.c with preempt off */ 258/* cpu init entry point, called from mce.c with preempt off */
209void mce_amd_feature_init(struct cpuinfo_x86 *c) 259void mce_amd_feature_init(struct cpuinfo_x86 *c)
210{ 260{
@@ -262,6 +312,9 @@ init:
262 mce_threshold_block_init(&b, offset); 312 mce_threshold_block_init(&b, offset);
263 } 313 }
264 } 314 }
315
316 if (mce_flags.succor)
317 deferred_error_interrupt_enable(c);
265} 318}
266 319
267static void __log_error(unsigned int bank, bool threshold_err, u64 misc) 320static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
@@ -288,6 +341,46 @@ static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
288 wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); 341 wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
289} 342}
290 343
344static inline void __smp_deferred_error_interrupt(void)
345{
346 inc_irq_stat(irq_deferred_error_count);
347 deferred_error_int_vector();
348}
349
350asmlinkage __visible void smp_deferred_error_interrupt(void)
351{
352 entering_irq();
353 __smp_deferred_error_interrupt();
354 exiting_ack_irq();
355}
356
357asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
358{
359 entering_irq();
360 trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
361 __smp_deferred_error_interrupt();
362 trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
363 exiting_ack_irq();
364}
365
366/* APIC interrupt handler for deferred errors */
367static void amd_deferred_error_interrupt(void)
368{
369 u64 status;
370 unsigned int bank;
371
372 for (bank = 0; bank < mca_cfg.banks; ++bank) {
373 rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
374
375 if (!(status & MCI_STATUS_VAL) ||
376 !(status & MCI_STATUS_DEFERRED))
377 continue;
378
379 __log_error(bank, false, 0);
380 break;
381 }
382}
383
291/* 384/*
292 * APIC Interrupt Handler 385 * APIC Interrupt Handler
293 */ 386 */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 02c2eff7478d..12aea85fe738 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -935,6 +935,11 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
935 threshold_interrupt smp_threshold_interrupt 935 threshold_interrupt smp_threshold_interrupt
936#endif 936#endif
937 937
938#ifdef CONFIG_X86_MCE_AMD
939apicinterrupt DEFERRED_ERROR_VECTOR \
940 deferred_error_interrupt smp_deferred_error_interrupt
941#endif
942
938#ifdef CONFIG_X86_THERMAL_VECTOR 943#ifdef CONFIG_X86_THERMAL_VECTOR
939apicinterrupt THERMAL_APIC_VECTOR \ 944apicinterrupt THERMAL_APIC_VECTOR \
940 thermal_interrupt smp_thermal_interrupt 945 thermal_interrupt smp_thermal_interrupt
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e5952c225532..590ed6c1bf51 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -116,6 +116,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
116 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 116 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
117 seq_puts(p, " Threshold APIC interrupts\n"); 117 seq_puts(p, " Threshold APIC interrupts\n");
118#endif 118#endif
119#ifdef CONFIG_X86_MCE_AMD
120 seq_printf(p, "%*s: ", prec, "DFR");
121 for_each_online_cpu(j)
122 seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
123 seq_puts(p, " Deferred Error APIC interrupts\n");
124#endif
119#ifdef CONFIG_X86_MCE 125#ifdef CONFIG_X86_MCE
120 seq_printf(p, "%*s: ", prec, "MCE"); 126 seq_printf(p, "%*s: ", prec, "MCE");
121 for_each_online_cpu(j) 127 for_each_online_cpu(j)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index cd10a6437264..d7ec6e7b2b5b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -135,6 +135,10 @@ static void __init apic_intr_init(void)
135 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 135 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
136#endif 136#endif
137 137
138#ifdef CONFIG_X86_MCE_AMD
139 alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
140#endif
141
138#ifdef CONFIG_X86_LOCAL_APIC 142#ifdef CONFIG_X86_LOCAL_APIC
139 /* self generated IPI for local APIC timer */ 143 /* self generated IPI for local APIC timer */
140 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); 144 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab5247687..68b1d5979a46 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -827,6 +827,11 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
827{ 827{
828} 828}
829 829
830asmlinkage __visible void __attribute__((weak))
831smp_deferred_error_interrupt(void)
832{
833}
834
830/* 835/*
831 * 'math_state_restore()' saves the current math information in the 836 * 'math_state_restore()' saves the current math information in the
832 * old math state array, and gets the new ones from the current task 837 * old math state array, and gets the new ones from the current task