Merge branch 'ras-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull ras fixes from Thomas Gleixner: "A set of fixes for RAS/MCE: - Improve the error message when the kernel cannot recover from a MCE so the maximum amount of information gets provided. - Individually check MCE recovery features on SkyLake CPUs instead of assuming none when the CAPID0 register does not advertise the general ability for recovery. - Prevent MCE to output inconsistent messages which first show an error location and then claim that the source is unknown. - Prevent overwriting MCi_STATUS in the attempt to gather more information when a fatal MCE has alreay been detected. This leads to empty status values in the printout and failing to react promptly on the fatal event" * 'ras-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Fix incorrect "Machine check from unknown source" message x86/mce: Do not overwrite MCi_STATUS in mce_no_way_out() x86/mce: Check for alternate indication of machine check recovery on Skylake x86/mce: Improve error message when kernel cannot recover
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-06-24 07:22:19 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-06-24 07:22:19 -0400
commit: a43de489934cadcbc4cc08a6590fdcc833768461 (patch)
tree: 61b688a0017e696bd1133d48a7c302d66bfa8ae2
parent: 6242258b6b472f8fdd8ed9b735cc1190c185d16d (diff)
parent: 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 (diff)
3 files changed, 42 insertions, 18 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5bbd06f38ff6..f34d89c01edc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -160,6 +160,11 @@ static struct severity {
                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
                USER
                ),
+        MCESEV(
+                PANIC, "Data load in unrecoverable area of kernel",
+                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+                KERNEL
+                ),
 #endif
        MCESEV(
                PANIC, "Action required: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e4cf6ff1c2e1..c102ad51025e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
 static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                          struct pt_regs *regs)
 {
-        int i, ret = 0;
        char *tmp;
+        int i;
        for (i = 0; i < mca_cfg.banks; i++) {
                m->status = mce_rdmsrl(msr_ops.status(i));
-                if (m->status & MCI_STATUS_VAL) {
+                if (!(m->status & MCI_STATUS_VAL))
-                        __set_bit(i, validp);
+                        continue;
-                        if (quirk_no_way_out)
-                                quirk_no_way_out(i, m, regs);
+                __set_bit(i, validp);
-                }
+                if (quirk_no_way_out)
+                        quirk_no_way_out(i, m, regs);
                if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+                        mce_read_aux(m, i);
                        *msg = tmp;
-                        ret = 1;
+                        return 1;
                }
        }
-        return ret;
+        return 0;
 }
 /*
@@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                lmce = m.mcgstatus & MCG_STATUS_LMCES;
        /*
+         * Local machine check may already know that we have to panic.
+         * Broadcast machine check begins rendezvous in mce_start()
         * Go through all banks in exclusion of the other CPUs. This way we
         * don't report duplicated events on shared banks because the first one
-         * to see it will clear it. If this is a Local MCE, then no need to
+         * to see it will clear it.
-         * perform rendezvous.
         */
-        if (!lmce)
+        if (lmce) {
+                if (no_way_out)
+                        mce_panic("Fatal local machine check", &m, msg);
+        } else {
                order = mce_start(&no_way_out);
+        }
        for (i = 0; i < cfg->banks; i++) {
                __clear_bit(i, toclear);
@@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                        no_way_out = worst >= MCE_PANIC_SEVERITY;
        } else {
                /*
-                 * Local MCE skipped calling mce_reign()
+                 * If there was a fatal machine check we should have
-                 * If we found a fatal error, we need to panic here.
+                 * already called mce_panic earlier in this function.
+                 * Since we re-read the banks, we might have found
+                 * something new. Check again to see if we found a
+                 * fatal error. We call "mce_severity()" again to
+                 * make sure we have the right "msg".
                 */
-                 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+                if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
-                        mce_panic("Machine check from unknown source",
+                        mce_severity(&m, cfg->tolerant, &msg, true);
-                                NULL, NULL);
+                        mce_panic("Local fatal machine check!", &m, msg);
+                }
        }
        /*
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 697a4ce04308..736348ead421 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 /* Skylake */
 static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
 {
-        u32 capid0;
+        u32 capid0, capid5;
        pci_read_config_dword(pdev, 0x84, &capid0);
+        pci_read_config_dword(pdev, 0x98, &capid5);
-        if ((capid0 & 0xc0) == 0xc0)
+        /*
+         * CAPID0{7:6} indicate whether this is an advanced RAS SKU
+         * CAPID5{8:5} indicate that various NVDIMM usage modes are
+         * enabled, so memory machine check recovery is also enabled.
+         */
+        if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
                static_branch_inc(&mcsafe_key);
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-06-24 07:22:19 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-06-24 07:22:19 -0400
commit	a43de489934cadcbc4cc08a6590fdcc833768461 (patch)
tree	61b688a0017e696bd1133d48a7c302d66bfa8ae2
parent	6242258b6b472f8fdd8ed9b735cc1190c185d16d (diff)
parent	40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 (diff)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5bbd06f38ff6..f34d89c01edc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -160,6 +160,11 @@ static struct severity {
160	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_INSTR),	160	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_INSTR),
161	USER	161	USER
162	),	162	),
		163	MCESEV(
		164	PANIC, "Data load in unrecoverable area of kernel",
		165	SER, MASK(MCI_STATUS_OVER\|MCI_UC_SAR\|MCI_ADDR\|MCACOD, MCI_UC_SAR\|MCI_ADDR\|MCACOD_DATA),
		166	KERNEL
		167	),
163	#endif	168	#endif
164	MCESEV(	169	MCESEV(
165	PANIC, "Action required: unknown MCACOD",	170	PANIC, "Action required: unknown MCACOD",


diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e4cf6ff1c2e1..c102ad51025e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
772	static int mce_no_way_out(struct mce m, char msg, unsigned long validp,	772	static int mce_no_way_out(struct mce m, char msg, unsigned long validp,
773	struct pt_regs *regs)	773	struct pt_regs *regs)
774	{	774	{
775	int i, ret = 0;
776	char *tmp;	775	char *tmp;
		776	int i;
777		777
778	for (i = 0; i < mca_cfg.banks; i++) {	778	for (i = 0; i < mca_cfg.banks; i++) {
779	m->status = mce_rdmsrl(msr_ops.status(i));	779	m->status = mce_rdmsrl(msr_ops.status(i));
780	if (m->status & MCI_STATUS_VAL) {	780	if (!(m->status & MCI_STATUS_VAL))
781	__set_bit(i, validp);	781	continue;
782	if (quirk_no_way_out)	782
783	quirk_no_way_out(i, m, regs);	783	__set_bit(i, validp);
784	}	784	if (quirk_no_way_out)
		785	quirk_no_way_out(i, m, regs);
785		786
786	if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {	787	if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
		788	mce_read_aux(m, i);
787	*msg = tmp;	789	*msg = tmp;
788	ret = 1;	790	return 1;
789	}	791	}
790	}	792	}
791	return ret;	793	return 0;
792	}	794	}
793		795
794	/*	796	/*
@@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1205	lmce = m.mcgstatus & MCG_STATUS_LMCES;	1207	lmce = m.mcgstatus & MCG_STATUS_LMCES;
1206		1208
1207	/*	1209	/*
		1210	* Local machine check may already know that we have to panic.
		1211	* Broadcast machine check begins rendezvous in mce_start()
1208	* Go through all banks in exclusion of the other CPUs. This way we	1212	* Go through all banks in exclusion of the other CPUs. This way we
1209	* don't report duplicated events on shared banks because the first one	1213	* don't report duplicated events on shared banks because the first one
1210	* to see it will clear it. If this is a Local MCE, then no need to	1214	* to see it will clear it.
1211	* perform rendezvous.
1212	*/	1215	*/
1213	if (!lmce)	1216	if (lmce) {
		1217	if (no_way_out)
		1218	mce_panic("Fatal local machine check", &m, msg);
		1219	} else {
1214	order = mce_start(&no_way_out);	1220	order = mce_start(&no_way_out);
		1221	}
1215		1222
1216	for (i = 0; i < cfg->banks; i++) {	1223	for (i = 0; i < cfg->banks; i++) {
1217	__clear_bit(i, toclear);	1224	__clear_bit(i, toclear);
@@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1287	no_way_out = worst >= MCE_PANIC_SEVERITY;	1294	no_way_out = worst >= MCE_PANIC_SEVERITY;
1288	} else {	1295	} else {
1289	/*	1296	/*
1290	* Local MCE skipped calling mce_reign()	1297	* If there was a fatal machine check we should have
1291	* If we found a fatal error, we need to panic here.	1298	* already called mce_panic earlier in this function.
		1299	* Since we re-read the banks, we might have found
		1300	* something new. Check again to see if we found a
		1301	* fatal error. We call "mce_severity()" again to
		1302	* make sure we have the right "msg".
1292	*/	1303	*/
1293	if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)	1304	if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
1294	mce_panic("Machine check from unknown source",	1305	mce_severity(&m, cfg->tolerant, &msg, true);
1295	NULL, NULL);	1306	mce_panic("Local fatal machine check!", &m, msg);
		1307	}
1296	}	1308	}
1297		1309
1298	/*	1310	/*


diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 697a4ce04308..736348ead421 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c
@@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
645	/* Skylake */	645	/* Skylake */
646	static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)	646	static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
647	{	647	{
648	u32 capid0;	648	u32 capid0, capid5;
649		649
650	pci_read_config_dword(pdev, 0x84, &capid0);	650	pci_read_config_dword(pdev, 0x84, &capid0);
		651	pci_read_config_dword(pdev, 0x98, &capid5);
651		652
652	if ((capid0 & 0xc0) == 0xc0)	653	/*
		654	* CAPID0{7:6} indicate whether this is an advanced RAS SKU
		655	* CAPID5{8:5} indicate that various NVDIMM usage modes are
		656	* enabled, so memory machine check recovery is also enabled.
		657	*/
		658	if ((capid0 & 0xc0) == 0xc0 \|\| (capid5 & 0x1e0))
653	static_branch_inc(&mcsafe_key);	659	static_branch_inc(&mcsafe_key);
		660
654	}	661	}
655	DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);	662	DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
656	DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);	663	DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);