aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 19:30:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 19:30:46 -0400
commit3045f94a20cc54e3e5b20a843701eeab86f57163 (patch)
tree3f5e4fd6ed396f73ce2120a22ce93df94163fadb
parent52e8ad9066b57510e600acc4bbc4455a81732c6c (diff)
parentfb476cffd5e345434c03f3c0e82a7e8d87f98ab0 (diff)
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS update from Ingo Molnar: "The changes in this tree are: - ACPI APEI (ACPI Platform Error Interface) improvements, by Chen Gong - misc MCE fixes/cleanups" * 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Update MCE severity condition check mce: acpi/apei: Add comments to clarify usage of the various bitfields in the MCA subsystem ACPI/APEI: Update einj documentation for param1/param2 ACPI/APEI: Add parameter check before error injection ACPI, APEI, EINJ: Fix error return code in einj_init() x86, mce: Fix "braodcast" typo
-rw-r--r--Documentation/acpi/apei/einj.txt9
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c12
-rw-r--r--drivers/acpi/apei/einj.c39
-rw-r--r--kernel/resource.c1
8 files changed, 68 insertions, 19 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index e20b6daaced4..a58b63da1a36 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -47,11 +47,16 @@ directory apei/einj. The following files are provided.
47 47
48- param1 48- param1
49 This file is used to set the first error parameter value. Effect of 49 This file is used to set the first error parameter value. Effect of
50 parameter depends on error_type specified. 50 parameter depends on error_type specified. For example, if error
51 type is memory related type, the param1 should be a valid physical
52 memory address.
51 53
52- param2 54- param2
53 This file is used to set the second error parameter value. Effect of 55 This file is used to set the second error parameter value. Effect of
54 parameter depends on error_type specified. 56 parameter depends on error_type specified. For example, if error
57 type is memory related type, the param2 should be a physical memory
58 address mask. Linux requires page or narrower granularity, say,
59 0xfffffffffffff000.
55 60
56- notrigger 61- notrigger
57 The EINJ mechanism is a two step process. First inject the error, then 62 The EINJ mechanism is a two step process. First inject the error, then
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fa5f71e021d5..6b52980c29c1 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -61,7 +61,7 @@
61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ 61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ 62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
63#define MCJ_EXCEPTION 0x8 /* raise as exception */ 63#define MCJ_EXCEPTION 0x8 /* raise as exception */
64#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ 64#define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */
65 65
66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ 66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
67 67
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index ddc72f839332..5ac2d1fb28bc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m)
153 return; 153 return;
154 154
155#ifdef CONFIG_X86_LOCAL_APIC 155#ifdef CONFIG_X86_LOCAL_APIC
156 if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) { 156 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
157 unsigned long start; 157 unsigned long start;
158 int cpu; 158 int cpu;
159 159
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m)
167 cpumask_clear_cpu(cpu, mce_inject_cpumask); 167 cpumask_clear_cpu(cpu, mce_inject_cpumask);
168 } 168 }
169 if (!cpumask_empty(mce_inject_cpumask)) { 169 if (!cpumask_empty(mce_inject_cpumask)) {
170 if (m->inject_flags & MCJ_IRQ_BRAODCAST) { 170 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
171 /* 171 /*
172 * don't wait because mce_irq_ipi is necessary 172 * don't wait because mce_irq_ipi is necessary
173 * to be sync with following raise_local 173 * to be sync with following raise_local
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f1689e52..e2703520d120 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
110 /* known AR MCACODs: */ 110 /* known AR MCACODs: */
111#ifdef CONFIG_MEMORY_FAILURE 111#ifdef CONFIG_MEMORY_FAILURE
112 MCESEV( 112 MCESEV(
113 KEEP, "HT thread notices Action required: data load error", 113 KEEP, "Action required but unaffected thread is continuable",
114 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 114 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
115 MCGMASK(MCG_STATUS_EIPV, 0) 115 MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
116 ), 116 ),
117 MCESEV( 117 MCESEV(
118 AR, "Action required: data load error", 118 AR, "Action required: data load error in a user process",
119 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 119 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
120 USER 120 USER
121 ), 121 ),
122 MCESEV( 122 MCESEV(
123 KEEP, "HT thread notices Action required: instruction fetch error", 123 AR, "Action required: instruction fetch error in a user process",
124 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
125 MCGMASK(MCG_STATUS_EIPV, 0)
126 ),
127 MCESEV(
128 AR, "Action required: instruction fetch error",
129 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), 124 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
130 USER 125 USER
131 ), 126 ),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504b41cb..bf49cdbb010f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,7 +89,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
89static DEFINE_PER_CPU(struct mce, mces_seen); 89static DEFINE_PER_CPU(struct mce, mces_seen);
90static int cpu_missing; 90static int cpu_missing;
91 91
92/* MCA banks polled by the period polling timer for corrected events */ 92/*
93 * MCA banks polled by the period polling timer for corrected events.
94 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
95 */
93DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 96DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
94 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
95}; 98};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index ae1697c2afe3..d56405309dc1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -24,6 +24,18 @@
24 * Also supports reliable discovery of shared banks. 24 * Also supports reliable discovery of shared banks.
25 */ 25 */
26 26
27/*
28 * CMCI can be delivered to multiple cpus that share a machine check bank
29 * so we need to designate a single cpu to process errors logged in each bank
30 * in the interrupt handler (otherwise we would have many races and potential
31 * double reporting of the same error).
32 * Note that this can change when a cpu is offlined or brought online since
33 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
34 * disables CMCI on all banks owned by the cpu and clears this bitfield. At
35 * this point, cmci_rediscover() kicks in and a different cpu may end up
36 * taking ownership of some of the shared MCA banks that were previously
37 * owned by the offlined cpu.
38 */
27static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 39static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
28 40
29/* 41/*
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 8d457b55c55a..fb57d03e698b 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -32,6 +32,7 @@
32#include <linux/seq_file.h> 32#include <linux/seq_file.h>
33#include <linux/nmi.h> 33#include <linux/nmi.h>
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/mm.h>
35#include <acpi/acpi.h> 36#include <acpi/acpi.h>
36 37
37#include "apei-internal.h" 38#include "apei-internal.h"
@@ -41,6 +42,10 @@
41#define SPIN_UNIT 100 /* 100ns */ 42#define SPIN_UNIT 100 /* 100ns */
42/* Firmware should respond within 1 milliseconds */ 43/* Firmware should respond within 1 milliseconds */
43#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC) 44#define FIRMWARE_TIMEOUT (1 * NSEC_PER_MSEC)
45#define ACPI5_VENDOR_BIT BIT(31)
46#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
47 ACPI_EINJ_MEMORY_UNCORRECTABLE | \
48 ACPI_EINJ_MEMORY_FATAL)
44 49
45/* 50/*
46 * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action. 51 * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
@@ -367,7 +372,7 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type,
367 * This will cause resource conflict with regular memory. So 372 * This will cause resource conflict with regular memory. So
368 * remove it from trigger table resources. 373 * remove it from trigger table resources.
369 */ 374 */
370 if ((param_extension || acpi5) && (type & 0x0038) && param2) { 375 if ((param_extension || acpi5) && (type & MEM_ERROR_MASK) && param2) {
371 struct apei_resources addr_resources; 376 struct apei_resources addr_resources;
372 apei_resources_init(&addr_resources); 377 apei_resources_init(&addr_resources);
373 trigger_param_region = einj_get_trigger_parameter_region( 378 trigger_param_region = einj_get_trigger_parameter_region(
@@ -427,7 +432,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
427 struct set_error_type_with_address *v5param = einj_param; 432 struct set_error_type_with_address *v5param = einj_param;
428 433
429 v5param->type = type; 434 v5param->type = type;
430 if (type & 0x80000000) { 435 if (type & ACPI5_VENDOR_BIT) {
431 switch (vendor_flags) { 436 switch (vendor_flags) {
432 case SETWA_FLAGS_APICID: 437 case SETWA_FLAGS_APICID:
433 v5param->apicid = param1; 438 v5param->apicid = param1;
@@ -512,7 +517,34 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
512static int einj_error_inject(u32 type, u64 param1, u64 param2) 517static int einj_error_inject(u32 type, u64 param1, u64 param2)
513{ 518{
514 int rc; 519 int rc;
520 unsigned long pfn;
515 521
522 /*
523 * We need extra sanity checks for memory errors.
524 * Other types leap directly to injection.
525 */
526
527 /* ensure param1/param2 existed */
528 if (!(param_extension || acpi5))
529 goto inject;
530
531 /* ensure injection is memory related */
532 if (type & ACPI5_VENDOR_BIT) {
533 if (vendor_flags != SETWA_FLAGS_MEM)
534 goto inject;
535 } else if (!(type & MEM_ERROR_MASK))
536 goto inject;
537
538 /*
539 * Disallow crazy address masks that give BIOS leeway to pick
540 * injection address almost anywhere. Insist on page or
541 * better granularity and that target address is normal RAM.
542 */
543 pfn = PFN_DOWN(param1 & param2);
544 if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK))
545 return -EINVAL;
546
547inject:
516 mutex_lock(&einj_mutex); 548 mutex_lock(&einj_mutex);
517 rc = __einj_error_inject(type, param1, param2); 549 rc = __einj_error_inject(type, param1, param2);
518 mutex_unlock(&einj_mutex); 550 mutex_unlock(&einj_mutex);
@@ -590,7 +622,7 @@ static int error_type_set(void *data, u64 val)
590 * Vendor defined types have 0x80000000 bit set, and 622 * Vendor defined types have 0x80000000 bit set, and
591 * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE 623 * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
592 */ 624 */
593 vendor = val & 0x80000000; 625 vendor = val & ACPI5_VENDOR_BIT;
594 tval = val & 0x7fffffff; 626 tval = val & 0x7fffffff;
595 627
596 /* Only one error type can be specified */ 628 /* Only one error type can be specified */
@@ -694,6 +726,7 @@ static int __init einj_init(void)
694 if (rc) 726 if (rc)
695 goto err_release; 727 goto err_release;
696 728
729 rc = -ENOMEM;
697 einj_param = einj_get_parameter_address(); 730 einj_param = einj_get_parameter_address();
698 if ((param_extension || acpi5) && einj_param) { 731 if ((param_extension || acpi5) && einj_param) {
699 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, 732 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
diff --git a/kernel/resource.c b/kernel/resource.c
index d7386986e10e..77bf11a86c7d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -409,6 +409,7 @@ int __weak page_is_ram(unsigned long pfn)
409{ 409{
410 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; 410 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
411} 411}
412EXPORT_SYMBOL_GPL(page_is_ram);
412 413
413void __weak arch_remove_reservations(struct resource *avail) 414void __weak arch_remove_reservations(struct resource *avail)
414{ 415{