x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries

Extend the severity checking code to add a new context IN_KERN_RECOV which is used to indicate that the machine check was triggered by code in the kernel tagged with _ASM_EXTABLE_FAULT() so that the ex_handler_fault() handler will provide the fixup code with the trap number. Major re-work to the tail code in do_machine_check() to make all this readable/maintainable. One functional change is that tolerant=3 no longer stops recovery actions. Revert to only skipping sending SIGBUS to the current process. Signed-off-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/89d243d05a7943bb187d1074bb30d9c4f482d5f5.1455732970.git.tony.luck@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Tony Luck <tony.luck@intel.com> 2016-02-17 13:20:13 -0500
committer: Ingo Molnar <mingo@kernel.org> 2016-02-18 03:22:42 -0500
commit: b2f9d678e28ca71ce650eac82f26dd287b47e89a (patch)
tree: 78559fcff433de13a8412e4e726abd8ff5cc0548 /arch/x86/kernel
parent: 548acf19234dbda5a52d5a8e7e205af46e9da840 (diff)
2 files changed, 56 insertions, 36 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c222071..5119766d9889 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <asm/mce.h>
+#include <asm/uaccess.h>
 #include "mce-internal.h"
@@ -29,7 +30,7 @@
 * panic situations)
 */
-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
 enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
@@ -48,6 +49,7 @@ static struct severity {
 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
 #define  KERNEL         .context = IN_KERNEL
 #define  USER           .context = IN_USER
+#define  KERNEL_RECOV   .context = IN_KERNEL_RECOV
 #define  SER            .ser = SER_REQUIRED
 #define  NOSER          .ser = NO_SER
 #define  EXCP           .excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@ static struct severity {
                EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
                ),
        MCESEV(
+                PANIC, "In kernel and no restart IP",
+                EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+                ),
+        MCESEV(
                DEFERRED, "Deferred error",
                NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
                ),
@@ -123,6 +129,11 @@ static struct severity {
                MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
                ),
        MCESEV(
+                AR, "Action required: data load in error recoverable area of kernel",
+                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+                KERNEL_RECOV
+                ),
+        MCESEV(
                AR, "Action required: data load error in a user process",
                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
                USER
@@ -170,6 +181,9 @@ static struct severity {
                )       /* always matches. keep at end */
 };
+#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
+                                (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
 /*
 * If mcgstatus indicated that ip/cs on the stack were
 * no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@ static struct severity {
 */
 static int error_context(struct mce *m)
 {
-        return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+        if ((m->cs & 3) == 3)
+                return IN_USER;
+        if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
+                return IN_KERNEL_RECOV;
+        return IN_KERNEL;
 }
 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b7180801ea33..524f2a8492d7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
        }
 }
+static int do_memory_failure(struct mce *m)
+{
+        int flags = MF_ACTION_REQUIRED;
+        int ret;
+        pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
+        if (!(m->mcgstatus & MCG_STATUS_RIPV))
+                flags |= MF_MUST_KILL;
+        ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+        if (ret)
+                pr_err("Memory error not recovered");
+        return ret;
+}
 /*
 * The actual machine check handler. This only handles real
 * exceptions when something got corrupted coming in through int 18.
@@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
        DECLARE_BITMAP(toclear, MAX_NR_BANKS);
        DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
        char *msg = "Unknown";
-        u64 recover_paddr = ~0ull;
-        int flags = MF_ACTION_REQUIRED;
        int lmce = 0;
        /* If this CPU is offline, just bail out. */
@@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
        }
        /*
-         * At insane "tolerant" levels we take no action. Otherwise
+         * If tolerant is at an insane level we drop requests to kill
-         * we only die if we have no other choice. For less serious
+         * processes and continue even when there is no way out.
-         * issues we try to recover, or limit damage to the current
-         * process.
         */
-        if (cfg->tolerant < 3) {
+        if (cfg->tolerant == 3)
-                if (no_way_out)
+                kill_it = 0;
-                        mce_panic("Fatal machine check on current CPU", &m, msg);
+        else if (no_way_out)
-                if (worst == MCE_AR_SEVERITY) {
+                mce_panic("Fatal machine check on current CPU", &m, msg);
-                        recover_paddr = m.addr;
-                        if (!(m.mcgstatus & MCG_STATUS_RIPV))
-                                flags |= MF_MUST_KILL;
-                } else if (kill_it) {
-                        force_sig(SIGBUS, current);
-                }
-        }
        if (worst > 0)
                mce_report_event(regs);
@@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 out:
        sync_core();
-        if (recover_paddr == ~0ull)
+        if (worst != MCE_AR_SEVERITY && !kill_it)
-                goto done;
+                goto out_ist;
-        pr_err("Uncorrected hardware memory error in user-access at %llx",
+        /* Fault was in user mode and we need to take some action */
-                 recover_paddr);
+        if ((m.cs & 3) == 3) {
-        /*
+                ist_begin_non_atomic(regs);
-         * We must call memory_failure() here even if the current process is
+                local_irq_enable();
-         * doomed. We still need to mark the page as poisoned and alert any
-         * other users of the page.
+                if (kill_it || do_memory_failure(&m))
-         */
+                        force_sig(SIGBUS, current);
-        ist_begin_non_atomic(regs);
+                local_irq_disable();
-        local_irq_enable();
+                ist_end_non_atomic();
-        if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
+        } else {
-                pr_err("Memory error not recovered");
+                if (!fixup_exception(regs, X86_TRAP_MC))
-                force_sig(SIGBUS, current);
+                        mce_panic("Failed kernel mode recovery", &m, NULL);
        }
-        local_irq_disable();
-        ist_end_non_atomic();
+out_ist:
-done:
        ist_exit(regs);
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
author	Tony Luck <tony.luck@intel.com>	2016-02-17 13:20:13 -0500
committer	Ingo Molnar <mingo@kernel.org>	2016-02-18 03:22:42 -0500
commit	b2f9d678e28ca71ce650eac82f26dd287b47e89a (patch)
tree	78559fcff433de13a8412e4e726abd8ff5cc0548 /arch/x86/kernel
parent	548acf19234dbda5a52d5a8e7e205af46e9da840 (diff)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 9c682c222071..5119766d9889 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -14,6 +14,7 @@
14	#include <linux/init.h>	14	#include <linux/init.h>
15	#include <linux/debugfs.h>	15	#include <linux/debugfs.h>
16	#include <asm/mce.h>	16	#include <asm/mce.h>
		17	#include <asm/uaccess.h>
17		18
18	#include "mce-internal.h"	19	#include "mce-internal.h"
19		20
@@ -29,7 +30,7 @@
29	* panic situations)	30	* panic situations)
30	*/	31	*/
31		32
32	enum context { IN_KERNEL = 1, IN_USER = 2 };	33	enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
33	enum ser { SER_REQUIRED = 1, NO_SER = 2 };	34	enum ser { SER_REQUIRED = 1, NO_SER = 2 };
34	enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };	35	enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
35		36
@@ -48,6 +49,7 @@ static struct severity {
48	#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }	49	#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
49	#define KERNEL .context = IN_KERNEL	50	#define KERNEL .context = IN_KERNEL
50	#define USER .context = IN_USER	51	#define USER .context = IN_USER
		52	#define KERNEL_RECOV .context = IN_KERNEL_RECOV
51	#define SER .ser = SER_REQUIRED	53	#define SER .ser = SER_REQUIRED
52	#define NOSER .ser = NO_SER	54	#define NOSER .ser = NO_SER
53	#define EXCP .excp = EXCP_CONTEXT	55	#define EXCP .excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@ static struct severity {
87	EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)	89	EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
88	),	90	),
89	MCESEV(	91	MCESEV(
		92	PANIC, "In kernel and no restart IP",
		93	EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
		94	),
		95	MCESEV(
90	DEFERRED, "Deferred error",	96	DEFERRED, "Deferred error",
91	NOSER, MASK(MCI_STATUS_UC\|MCI_STATUS_DEFERRED\|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)	97	NOSER, MASK(MCI_STATUS_UC\|MCI_STATUS_DEFERRED\|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
92	),	98	),
@@ -123,6 +129,11 @@ static struct severity {
123	MCGMASK(MCG_STATUS_RIPV\|MCG_STATUS_EIPV, MCG_STATUS_RIPV)	129	MCGMASK(MCG_STATUS_RIPV\|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
124	),	130	),
125	MCESEV(	131	MCESEV(
		132	AR, "Action required: data load in error recoverable area of kernel",
		133	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),
		134	KERNEL_RECOV
		135	),
		136	MCESEV(
126	AR, "Action required: data load error in a user process",	137	AR, "Action required: data load error in a user process",
127	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),	138	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),
128	USER	139	USER
@@ -170,6 +181,9 @@ static struct severity {
170	) /* always matches. keep at end */	181	) /* always matches. keep at end */
171	};	182	};
172		183
		184	#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV\|MCG_STATUS_EIPV)) == \
		185	(MCG_STATUS_RIPV\|MCG_STATUS_EIPV))
		186
173	/*	187	/*
174	* If mcgstatus indicated that ip/cs on the stack were	188	* If mcgstatus indicated that ip/cs on the stack were
175	* no good, then "m->cs" will be zero and we will have	189	* no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@ static struct severity {
183	*/	197	*/
184	static int error_context(struct mce *m)	198	static int error_context(struct mce *m)
185	{	199	{
186	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;	200	if ((m->cs & 3) == 3)
		201	return IN_USER;
		202	if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
		203	return IN_KERNEL_RECOV;
		204	return IN_KERNEL;
187	}	205	}
188		206
189	/*	207	/*


diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b7180801ea33..524f2a8492d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
961	}	961	}
962	}	962	}
963		963
		964	static int do_memory_failure(struct mce *m)
		965	{
		966	int flags = MF_ACTION_REQUIRED;
		967	int ret;
		968
		969	pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
		970	if (!(m->mcgstatus & MCG_STATUS_RIPV))
		971	flags \|= MF_MUST_KILL;
		972	ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
		973	if (ret)
		974	pr_err("Memory error not recovered");
		975	return ret;
		976	}
		977
964	/*	978	/*
965	* The actual machine check handler. This only handles real	979	* The actual machine check handler. This only handles real
966	* exceptions when something got corrupted coming in through int 18.	980	* exceptions when something got corrupted coming in through int 18.
@@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
998	DECLARE_BITMAP(toclear, MAX_NR_BANKS);	1012	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
999	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);	1013	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1000	char *msg = "Unknown";	1014	char *msg = "Unknown";
1001	u64 recover_paddr = ~0ull;
1002	int flags = MF_ACTION_REQUIRED;
1003	int lmce = 0;	1015	int lmce = 0;
1004		1016
1005	/* If this CPU is offline, just bail out. */	1017	/* If this CPU is offline, just bail out. */
@@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1136	}	1148	}
1137		1149
1138	/*	1150	/*
1139	* At insane "tolerant" levels we take no action. Otherwise	1151	* If tolerant is at an insane level we drop requests to kill
1140	* we only die if we have no other choice. For less serious	1152	* processes and continue even when there is no way out.
1141	* issues we try to recover, or limit damage to the current
1142	* process.
1143	*/	1153	*/
1144	if (cfg->tolerant < 3) {	1154	if (cfg->tolerant == 3)
1145	if (no_way_out)	1155	kill_it = 0;
1146	mce_panic("Fatal machine check on current CPU", &m, msg);	1156	else if (no_way_out)
1147	if (worst == MCE_AR_SEVERITY) {	1157	mce_panic("Fatal machine check on current CPU", &m, msg);
1148	recover_paddr = m.addr;
1149	if (!(m.mcgstatus & MCG_STATUS_RIPV))
1150	flags \|= MF_MUST_KILL;
1151	} else if (kill_it) {
1152	force_sig(SIGBUS, current);
1153	}
1154	}
1155		1158
1156	if (worst > 0)	1159	if (worst > 0)
1157	mce_report_event(regs);	1160	mce_report_event(regs);
@@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1159	out:	1162	out:
1160	sync_core();	1163	sync_core();
1161		1164
1162	if (recover_paddr == ~0ull)	1165	if (worst != MCE_AR_SEVERITY && !kill_it)
1163	goto done;	1166	goto out_ist;
1164		1167
1165	pr_err("Uncorrected hardware memory error in user-access at %llx",	1168	/* Fault was in user mode and we need to take some action */
1166	recover_paddr);	1169	if ((m.cs & 3) == 3) {
1167	/*	1170	ist_begin_non_atomic(regs);
1168	* We must call memory_failure() here even if the current process is	1171	local_irq_enable();
1169	* doomed. We still need to mark the page as poisoned and alert any	1172
1170	* other users of the page.	1173	if (kill_it \|\| do_memory_failure(&m))
1171	*/	1174	force_sig(SIGBUS, current);
1172	ist_begin_non_atomic(regs);	1175	local_irq_disable();
1173	local_irq_enable();	1176	ist_end_non_atomic();
1174	if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {	1177	} else {
1175	pr_err("Memory error not recovered");	1178	if (!fixup_exception(regs, X86_TRAP_MC))
1176	force_sig(SIGBUS, current);	1179	mce_panic("Failed kernel mode recovery", &m, NULL);
1177	}	1180	}
1178	local_irq_disable();	1181
1179	ist_end_non_atomic();	1182	out_ist:
1180	done:
1181	ist_exit(regs);	1183	ist_exit(regs);
1182	}	1184	}
1183	EXPORT_SYMBOL_GPL(do_machine_check);	1185	EXPORT_SYMBOL_GPL(do_machine_check);