aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/acpi
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:07:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 14:07:04 -0400
commitb20c99eb668f10b855a9fd87e0a2f5db3fb3637d (patch)
tree87cb380f2006a1c5ee2c612fead142d261c64c4e /drivers/acpi
parentbb8c4701704d81ef98657dc51adb99aa5a0c5ac9 (diff)
parentead6fa95b7e9d38b4526503403ba1c029b03dd72 (diff)
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar: "[ The reason for drivers/ updates is that Boris asked for the drivers/edac/ changes to go via x86/ras in this cycle ] Main changes: - AMD CPUs: . Add ECC event decoding support for new F15h models . Various erratum fixes . Fix single-channel on dual-channel-controllers bug. - Intel CPUs: . UC uncorrectable memory error parsing fix . Add support for CMC (Corrected Machine Check) 'FF' (Firmware First) flag in the APEI HEST - Various cleanups and fixes" * 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: amd64_edac: Fix incorrect wraparounds amd64_edac: Correct erratum 505 range cpc925_edac: Use proper array termination x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured amd64_edac: Get rid of boot_cpu_data accesses amd64_edac: Add ECC decoding support for newer F15h models x86, amd_nb: Clarify F15h, model 30h GART and L3 support pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models. x38_edac: Make a local function static i3200_edac: Make a local function static x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors APEI/ERST: Fix error message formatting amd64_edac: Fix single-channel setups EDAC: Replace strict_strtol() with kstrtol() mce: acpi/apei: Soft-offline a page on firmware GHES notification mce: acpi/apei: Add a boot option to disable ff mode for corrected errors mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
Diffstat (limited to 'drivers/acpi')
-rw-r--r--drivers/acpi/apei/erst.c51
-rw-r--r--drivers/acpi/apei/ghes.c38
-rw-r--r--drivers/acpi/apei/hest.c39
3 files changed, 91 insertions, 37 deletions
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 822b1ed3b00f..26311f23c824 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -39,7 +39,8 @@
39 39
40#include "apei-internal.h" 40#include "apei-internal.h"
41 41
42#define ERST_PFX "ERST: " 42#undef pr_fmt
43#define pr_fmt(fmt) "ERST: " fmt
43 44
44/* ERST command status */ 45/* ERST command status */
45#define ERST_STATUS_SUCCESS 0x0 46#define ERST_STATUS_SUCCESS 0x0
@@ -109,8 +110,7 @@ static inline int erst_errno(int command_status)
109static int erst_timedout(u64 *t, u64 spin_unit) 110static int erst_timedout(u64 *t, u64 spin_unit)
110{ 111{
111 if ((s64)*t < spin_unit) { 112 if ((s64)*t < spin_unit) {
112 pr_warning(FW_WARN ERST_PFX 113 pr_warn(FW_WARN "Firmware does not respond in time.\n");
113 "Firmware does not respond in time\n");
114 return 1; 114 return 1;
115 } 115 }
116 *t -= spin_unit; 116 *t -= spin_unit;
@@ -186,8 +186,8 @@ static int erst_exec_stall(struct apei_exec_context *ctx,
186 186
187 if (ctx->value > FIRMWARE_MAX_STALL) { 187 if (ctx->value > FIRMWARE_MAX_STALL) {
188 if (!in_nmi()) 188 if (!in_nmi())
189 pr_warning(FW_WARN ERST_PFX 189 pr_warn(FW_WARN
190 "Too long stall time for stall instruction: %llx.\n", 190 "Too long stall time for stall instruction: 0x%llx.\n",
191 ctx->value); 191 ctx->value);
192 stall_time = FIRMWARE_MAX_STALL; 192 stall_time = FIRMWARE_MAX_STALL;
193 } else 193 } else
@@ -206,8 +206,8 @@ static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
206 206
207 if (ctx->var1 > FIRMWARE_MAX_STALL) { 207 if (ctx->var1 > FIRMWARE_MAX_STALL) {
208 if (!in_nmi()) 208 if (!in_nmi())
209 pr_warning(FW_WARN ERST_PFX 209 pr_warn(FW_WARN
210 "Too long stall time for stall while true instruction: %llx.\n", 210 "Too long stall time for stall while true instruction: 0x%llx.\n",
211 ctx->var1); 211 ctx->var1);
212 stall_time = FIRMWARE_MAX_STALL; 212 stall_time = FIRMWARE_MAX_STALL;
213 } else 213 } else
@@ -271,8 +271,7 @@ static int erst_exec_move_data(struct apei_exec_context *ctx,
271 271
272 /* ioremap does not work in interrupt context */ 272 /* ioremap does not work in interrupt context */
273 if (in_interrupt()) { 273 if (in_interrupt()) {
274 pr_warning(ERST_PFX 274 pr_warn("MOVE_DATA can not be used in interrupt context.\n");
275 "MOVE_DATA can not be used in interrupt context");
276 return -EBUSY; 275 return -EBUSY;
277 } 276 }
278 277
@@ -524,8 +523,7 @@ retry:
524 ERST_RECORD_ID_CACHE_SIZE_MAX); 523 ERST_RECORD_ID_CACHE_SIZE_MAX);
525 if (new_size <= erst_record_id_cache.size) { 524 if (new_size <= erst_record_id_cache.size) {
526 if (printk_ratelimit()) 525 if (printk_ratelimit())
527 pr_warning(FW_WARN ERST_PFX 526 pr_warn(FW_WARN "too many record IDs!\n");
528 "too many record ID!\n");
529 return 0; 527 return 0;
530 } 528 }
531 alloc_size = new_size * sizeof(entries[0]); 529 alloc_size = new_size * sizeof(entries[0]);
@@ -761,8 +759,7 @@ static int __erst_clear_from_storage(u64 record_id)
761static void pr_unimpl_nvram(void) 759static void pr_unimpl_nvram(void)
762{ 760{
763 if (printk_ratelimit()) 761 if (printk_ratelimit())
764 pr_warning(ERST_PFX 762 pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
765 "NVRAM ERST Log Address Range is not implemented yet\n");
766} 763}
767 764
768static int __erst_write_to_nvram(const struct cper_record_header *record) 765static int __erst_write_to_nvram(const struct cper_record_header *record)
@@ -1133,7 +1130,7 @@ static int __init erst_init(void)
1133 goto err; 1130 goto err;
1134 1131
1135 if (erst_disable) { 1132 if (erst_disable) {
1136 pr_info(ERST_PFX 1133 pr_info(
1137 "Error Record Serialization Table (ERST) support is disabled.\n"); 1134 "Error Record Serialization Table (ERST) support is disabled.\n");
1138 goto err; 1135 goto err;
1139 } 1136 }
@@ -1144,14 +1141,14 @@ static int __init erst_init(void)
1144 goto err; 1141 goto err;
1145 else if (ACPI_FAILURE(status)) { 1142 else if (ACPI_FAILURE(status)) {
1146 const char *msg = acpi_format_exception(status); 1143 const char *msg = acpi_format_exception(status);
1147 pr_err(ERST_PFX "Failed to get table, %s\n", msg); 1144 pr_err("Failed to get table, %s\n", msg);
1148 rc = -EINVAL; 1145 rc = -EINVAL;
1149 goto err; 1146 goto err;
1150 } 1147 }
1151 1148
1152 rc = erst_check_table(erst_tab); 1149 rc = erst_check_table(erst_tab);
1153 if (rc) { 1150 if (rc) {
1154 pr_err(FW_BUG ERST_PFX "ERST table is invalid\n"); 1151 pr_err(FW_BUG "ERST table is invalid.\n");
1155 goto err; 1152 goto err;
1156 } 1153 }
1157 1154
@@ -1169,21 +1166,19 @@ static int __init erst_init(void)
1169 rc = erst_get_erange(&erst_erange); 1166 rc = erst_get_erange(&erst_erange);
1170 if (rc) { 1167 if (rc) {
1171 if (rc == -ENODEV) 1168 if (rc == -ENODEV)
1172 pr_info(ERST_PFX 1169 pr_info(
1173 "The corresponding hardware device or firmware implementation " 1170 "The corresponding hardware device or firmware implementation "
1174 "is not available.\n"); 1171 "is not available.\n");
1175 else 1172 else
1176 pr_err(ERST_PFX 1173 pr_err("Failed to get Error Log Address Range.\n");
1177 "Failed to get Error Log Address Range.\n");
1178 goto err_unmap_reg; 1174 goto err_unmap_reg;
1179 } 1175 }
1180 1176
1181 r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST"); 1177 r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
1182 if (!r) { 1178 if (!r) {
1183 pr_err(ERST_PFX 1179 pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
1184 "Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n", 1180 (unsigned long long)erst_erange.base,
1185 (unsigned long long)erst_erange.base, 1181 (unsigned long long)erst_erange.base + erst_erange.size - 1);
1186 (unsigned long long)erst_erange.base + erst_erange.size);
1187 rc = -EIO; 1182 rc = -EIO;
1188 goto err_unmap_reg; 1183 goto err_unmap_reg;
1189 } 1184 }
@@ -1193,7 +1188,7 @@ static int __init erst_init(void)
1193 if (!erst_erange.vaddr) 1188 if (!erst_erange.vaddr)
1194 goto err_release_erange; 1189 goto err_release_erange;
1195 1190
1196 pr_info(ERST_PFX 1191 pr_info(
1197 "Error Record Serialization Table (ERST) support is initialized.\n"); 1192 "Error Record Serialization Table (ERST) support is initialized.\n");
1198 1193
1199 buf = kmalloc(erst_erange.size, GFP_KERNEL); 1194 buf = kmalloc(erst_erange.size, GFP_KERNEL);
@@ -1205,15 +1200,15 @@ static int __init erst_init(void)
1205 rc = pstore_register(&erst_info); 1200 rc = pstore_register(&erst_info);
1206 if (rc) { 1201 if (rc) {
1207 if (rc != -EPERM) 1202 if (rc != -EPERM)
1208 pr_info(ERST_PFX 1203 pr_info(
1209 "Could not register with persistent store\n"); 1204 "Could not register with persistent store.\n");
1210 erst_info.buf = NULL; 1205 erst_info.buf = NULL;
1211 erst_info.bufsize = 0; 1206 erst_info.bufsize = 0;
1212 kfree(buf); 1207 kfree(buf);
1213 } 1208 }
1214 } else 1209 } else
1215 pr_err(ERST_PFX 1210 pr_err(
1216 "Failed to allocate %lld bytes for persistent store error log\n", 1211 "Failed to allocate %lld bytes for persistent store error log.\n",
1217 erst_erange.size); 1212 erst_erange.size);
1218 1213
1219 return 0; 1214 return 0;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ec9b57d428a1..8ec37bbdd699 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes)
409 ghes->flags &= ~GHES_TO_CLEAR; 409 ghes->flags &= ~GHES_TO_CLEAR;
410} 410}
411 411
412static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
413{
414#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
415 unsigned long pfn;
416 int sec_sev = ghes_severity(gdata->error_severity);
417 struct cper_sec_mem_err *mem_err;
418 mem_err = (struct cper_sec_mem_err *)(gdata + 1);
419
420 if (sec_sev == GHES_SEV_CORRECTED &&
421 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
422 (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
423 pfn = mem_err->physical_addr >> PAGE_SHIFT;
424 if (pfn_valid(pfn))
425 memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
426 else if (printk_ratelimit())
427 pr_warn(FW_WARN GHES_PFX
428 "Invalid address in generic error data: %#llx\n",
429 mem_err->physical_addr);
430 }
431 if (sev == GHES_SEV_RECOVERABLE &&
432 sec_sev == GHES_SEV_RECOVERABLE &&
433 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
434 pfn = mem_err->physical_addr >> PAGE_SHIFT;
435 memory_failure_queue(pfn, 0, 0);
436 }
437#endif
438}
439
412static void ghes_do_proc(struct ghes *ghes, 440static void ghes_do_proc(struct ghes *ghes,
413 const struct acpi_hest_generic_status *estatus) 441 const struct acpi_hest_generic_status *estatus)
414{ 442{
@@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes,
428 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, 456 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
429 mem_err); 457 mem_err);
430#endif 458#endif
431#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE 459 ghes_handle_memory_failure(gdata, sev);
432 if (sev == GHES_SEV_RECOVERABLE &&
433 sec_sev == GHES_SEV_RECOVERABLE &&
434 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
435 unsigned long pfn;
436 pfn = mem_err->physical_addr >> PAGE_SHIFT;
437 memory_failure_queue(pfn, 0, 0);
438 }
439#endif
440 } 460 }
441#ifdef CONFIG_ACPI_APEI_PCIEAER 461#ifdef CONFIG_ACPI_APEI_PCIEAER
442 else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 462 else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index f5ef5d54e4ac..f5e37f32c71f 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -36,6 +36,7 @@
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/platform_device.h> 37#include <linux/platform_device.h>
38#include <acpi/apei.h> 38#include <acpi/apei.h>
39#include <asm/mce.h>
39 40
40#include "apei-internal.h" 41#include "apei-internal.h"
41 42
@@ -121,6 +122,41 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
121} 122}
122EXPORT_SYMBOL_GPL(apei_hest_parse); 123EXPORT_SYMBOL_GPL(apei_hest_parse);
123 124
125/*
126 * Check if firmware advertises firmware first mode. We need FF bit to be set
127 * along with a set of MC banks which work in FF mode.
128 */
129static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
130{
131#ifdef CONFIG_X86_MCE
132 int i;
133 struct acpi_hest_ia_corrected *cmc;
134 struct acpi_hest_ia_error_bank *mc_bank;
135
136 if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
137 return 0;
138
139 cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
140 if (!cmc->enabled)
141 return 0;
142
143 /*
144 * We expect HEST to provide a list of MC banks that report errors
145 * in firmware first mode. Otherwise, return non-zero value to
146 * indicate that we are done parsing HEST.
147 */
148 if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
149 return 1;
150
151 pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
152
153 mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
154 for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
155 mce_disable_bank(mc_bank->bank_number);
156#endif
157 return 1;
158}
159
124struct ghes_arr { 160struct ghes_arr {
125 struct platform_device **ghes_devs; 161 struct platform_device **ghes_devs;
126 unsigned int count; 162 unsigned int count;
@@ -227,6 +263,9 @@ void __init acpi_hest_init(void)
227 goto err; 263 goto err;
228 } 264 }
229 265
266 if (!acpi_disable_cmcff)
267 apei_hest_parse(hest_parse_cmc, NULL);
268
230 if (!ghes_disable) { 269 if (!ghes_disable) {
231 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); 270 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
232 if (rc) 271 if (rc)