diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 14:07:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 14:07:04 -0400 |
commit | b20c99eb668f10b855a9fd87e0a2f5db3fb3637d (patch) | |
tree | 87cb380f2006a1c5ee2c612fead142d261c64c4e /drivers/acpi | |
parent | bb8c4701704d81ef98657dc51adb99aa5a0c5ac9 (diff) | |
parent | ead6fa95b7e9d38b4526503403ba1c029b03dd72 (diff) |
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar:
"[ The reason for drivers/ updates is that Boris asked for the
drivers/edac/ changes to go via x86/ras in this cycle ]
Main changes:
- AMD CPUs:
. Add ECC event decoding support for new F15h models
. Various erratum fixes
. Fix single-channel on dual-channel-controllers bug.
- Intel CPUs:
. UC uncorrectable memory error parsing fix
. Add support for CMC (Corrected Machine Check) 'FF' (Firmware
First) flag in the APEI HEST
- Various cleanups and fixes"
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
amd64_edac: Fix incorrect wraparounds
amd64_edac: Correct erratum 505 range
cpc925_edac: Use proper array termination
x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured
amd64_edac: Get rid of boot_cpu_data accesses
amd64_edac: Add ECC decoding support for newer F15h models
x86, amd_nb: Clarify F15h, model 30h GART and L3 support
pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models.
x38_edac: Make a local function static
i3200_edac: Make a local function static
x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors
APEI/ERST: Fix error message formatting
amd64_edac: Fix single-channel setups
EDAC: Replace strict_strtol() with kstrtol()
mce: acpi/apei: Soft-offline a page on firmware GHES notification
mce: acpi/apei: Add a boot option to disable ff mode for corrected errors
mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
Diffstat (limited to 'drivers/acpi')
-rw-r--r-- | drivers/acpi/apei/erst.c | 51 | ||||
-rw-r--r-- | drivers/acpi/apei/ghes.c | 38 | ||||
-rw-r--r-- | drivers/acpi/apei/hest.c | 39 |
3 files changed, 91 insertions, 37 deletions
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 822b1ed3b00f..26311f23c824 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c | |||
@@ -39,7 +39,8 @@ | |||
39 | 39 | ||
40 | #include "apei-internal.h" | 40 | #include "apei-internal.h" |
41 | 41 | ||
42 | #define ERST_PFX "ERST: " | 42 | #undef pr_fmt |
43 | #define pr_fmt(fmt) "ERST: " fmt | ||
43 | 44 | ||
44 | /* ERST command status */ | 45 | /* ERST command status */ |
45 | #define ERST_STATUS_SUCCESS 0x0 | 46 | #define ERST_STATUS_SUCCESS 0x0 |
@@ -109,8 +110,7 @@ static inline int erst_errno(int command_status) | |||
109 | static int erst_timedout(u64 *t, u64 spin_unit) | 110 | static int erst_timedout(u64 *t, u64 spin_unit) |
110 | { | 111 | { |
111 | if ((s64)*t < spin_unit) { | 112 | if ((s64)*t < spin_unit) { |
112 | pr_warning(FW_WARN ERST_PFX | 113 | pr_warn(FW_WARN "Firmware does not respond in time.\n"); |
113 | "Firmware does not respond in time\n"); | ||
114 | return 1; | 114 | return 1; |
115 | } | 115 | } |
116 | *t -= spin_unit; | 116 | *t -= spin_unit; |
@@ -186,8 +186,8 @@ static int erst_exec_stall(struct apei_exec_context *ctx, | |||
186 | 186 | ||
187 | if (ctx->value > FIRMWARE_MAX_STALL) { | 187 | if (ctx->value > FIRMWARE_MAX_STALL) { |
188 | if (!in_nmi()) | 188 | if (!in_nmi()) |
189 | pr_warning(FW_WARN ERST_PFX | 189 | pr_warn(FW_WARN |
190 | "Too long stall time for stall instruction: %llx.\n", | 190 | "Too long stall time for stall instruction: 0x%llx.\n", |
191 | ctx->value); | 191 | ctx->value); |
192 | stall_time = FIRMWARE_MAX_STALL; | 192 | stall_time = FIRMWARE_MAX_STALL; |
193 | } else | 193 | } else |
@@ -206,8 +206,8 @@ static int erst_exec_stall_while_true(struct apei_exec_context *ctx, | |||
206 | 206 | ||
207 | if (ctx->var1 > FIRMWARE_MAX_STALL) { | 207 | if (ctx->var1 > FIRMWARE_MAX_STALL) { |
208 | if (!in_nmi()) | 208 | if (!in_nmi()) |
209 | pr_warning(FW_WARN ERST_PFX | 209 | pr_warn(FW_WARN |
210 | "Too long stall time for stall while true instruction: %llx.\n", | 210 | "Too long stall time for stall while true instruction: 0x%llx.\n", |
211 | ctx->var1); | 211 | ctx->var1); |
212 | stall_time = FIRMWARE_MAX_STALL; | 212 | stall_time = FIRMWARE_MAX_STALL; |
213 | } else | 213 | } else |
@@ -271,8 +271,7 @@ static int erst_exec_move_data(struct apei_exec_context *ctx, | |||
271 | 271 | ||
272 | /* ioremap does not work in interrupt context */ | 272 | /* ioremap does not work in interrupt context */ |
273 | if (in_interrupt()) { | 273 | if (in_interrupt()) { |
274 | pr_warning(ERST_PFX | 274 | pr_warn("MOVE_DATA can not be used in interrupt context.\n"); |
275 | "MOVE_DATA can not be used in interrupt context"); | ||
276 | return -EBUSY; | 275 | return -EBUSY; |
277 | } | 276 | } |
278 | 277 | ||
@@ -524,8 +523,7 @@ retry: | |||
524 | ERST_RECORD_ID_CACHE_SIZE_MAX); | 523 | ERST_RECORD_ID_CACHE_SIZE_MAX); |
525 | if (new_size <= erst_record_id_cache.size) { | 524 | if (new_size <= erst_record_id_cache.size) { |
526 | if (printk_ratelimit()) | 525 | if (printk_ratelimit()) |
527 | pr_warning(FW_WARN ERST_PFX | 526 | pr_warn(FW_WARN "too many record IDs!\n"); |
528 | "too many record ID!\n"); | ||
529 | return 0; | 527 | return 0; |
530 | } | 528 | } |
531 | alloc_size = new_size * sizeof(entries[0]); | 529 | alloc_size = new_size * sizeof(entries[0]); |
@@ -761,8 +759,7 @@ static int __erst_clear_from_storage(u64 record_id) | |||
761 | static void pr_unimpl_nvram(void) | 759 | static void pr_unimpl_nvram(void) |
762 | { | 760 | { |
763 | if (printk_ratelimit()) | 761 | if (printk_ratelimit()) |
764 | pr_warning(ERST_PFX | 762 | pr_warn("NVRAM ERST Log Address Range not implemented yet.\n"); |
765 | "NVRAM ERST Log Address Range is not implemented yet\n"); | ||
766 | } | 763 | } |
767 | 764 | ||
768 | static int __erst_write_to_nvram(const struct cper_record_header *record) | 765 | static int __erst_write_to_nvram(const struct cper_record_header *record) |
@@ -1133,7 +1130,7 @@ static int __init erst_init(void) | |||
1133 | goto err; | 1130 | goto err; |
1134 | 1131 | ||
1135 | if (erst_disable) { | 1132 | if (erst_disable) { |
1136 | pr_info(ERST_PFX | 1133 | pr_info( |
1137 | "Error Record Serialization Table (ERST) support is disabled.\n"); | 1134 | "Error Record Serialization Table (ERST) support is disabled.\n"); |
1138 | goto err; | 1135 | goto err; |
1139 | } | 1136 | } |
@@ -1144,14 +1141,14 @@ static int __init erst_init(void) | |||
1144 | goto err; | 1141 | goto err; |
1145 | else if (ACPI_FAILURE(status)) { | 1142 | else if (ACPI_FAILURE(status)) { |
1146 | const char *msg = acpi_format_exception(status); | 1143 | const char *msg = acpi_format_exception(status); |
1147 | pr_err(ERST_PFX "Failed to get table, %s\n", msg); | 1144 | pr_err("Failed to get table, %s\n", msg); |
1148 | rc = -EINVAL; | 1145 | rc = -EINVAL; |
1149 | goto err; | 1146 | goto err; |
1150 | } | 1147 | } |
1151 | 1148 | ||
1152 | rc = erst_check_table(erst_tab); | 1149 | rc = erst_check_table(erst_tab); |
1153 | if (rc) { | 1150 | if (rc) { |
1154 | pr_err(FW_BUG ERST_PFX "ERST table is invalid\n"); | 1151 | pr_err(FW_BUG "ERST table is invalid.\n"); |
1155 | goto err; | 1152 | goto err; |
1156 | } | 1153 | } |
1157 | 1154 | ||
@@ -1169,21 +1166,19 @@ static int __init erst_init(void) | |||
1169 | rc = erst_get_erange(&erst_erange); | 1166 | rc = erst_get_erange(&erst_erange); |
1170 | if (rc) { | 1167 | if (rc) { |
1171 | if (rc == -ENODEV) | 1168 | if (rc == -ENODEV) |
1172 | pr_info(ERST_PFX | 1169 | pr_info( |
1173 | "The corresponding hardware device or firmware implementation " | 1170 | "The corresponding hardware device or firmware implementation " |
1174 | "is not available.\n"); | 1171 | "is not available.\n"); |
1175 | else | 1172 | else |
1176 | pr_err(ERST_PFX | 1173 | pr_err("Failed to get Error Log Address Range.\n"); |
1177 | "Failed to get Error Log Address Range.\n"); | ||
1178 | goto err_unmap_reg; | 1174 | goto err_unmap_reg; |
1179 | } | 1175 | } |
1180 | 1176 | ||
1181 | r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST"); | 1177 | r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST"); |
1182 | if (!r) { | 1178 | if (!r) { |
1183 | pr_err(ERST_PFX | 1179 | pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n", |
1184 | "Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n", | 1180 | (unsigned long long)erst_erange.base, |
1185 | (unsigned long long)erst_erange.base, | 1181 | (unsigned long long)erst_erange.base + erst_erange.size - 1); |
1186 | (unsigned long long)erst_erange.base + erst_erange.size); | ||
1187 | rc = -EIO; | 1182 | rc = -EIO; |
1188 | goto err_unmap_reg; | 1183 | goto err_unmap_reg; |
1189 | } | 1184 | } |
@@ -1193,7 +1188,7 @@ static int __init erst_init(void) | |||
1193 | if (!erst_erange.vaddr) | 1188 | if (!erst_erange.vaddr) |
1194 | goto err_release_erange; | 1189 | goto err_release_erange; |
1195 | 1190 | ||
1196 | pr_info(ERST_PFX | 1191 | pr_info( |
1197 | "Error Record Serialization Table (ERST) support is initialized.\n"); | 1192 | "Error Record Serialization Table (ERST) support is initialized.\n"); |
1198 | 1193 | ||
1199 | buf = kmalloc(erst_erange.size, GFP_KERNEL); | 1194 | buf = kmalloc(erst_erange.size, GFP_KERNEL); |
@@ -1205,15 +1200,15 @@ static int __init erst_init(void) | |||
1205 | rc = pstore_register(&erst_info); | 1200 | rc = pstore_register(&erst_info); |
1206 | if (rc) { | 1201 | if (rc) { |
1207 | if (rc != -EPERM) | 1202 | if (rc != -EPERM) |
1208 | pr_info(ERST_PFX | 1203 | pr_info( |
1209 | "Could not register with persistent store\n"); | 1204 | "Could not register with persistent store.\n"); |
1210 | erst_info.buf = NULL; | 1205 | erst_info.buf = NULL; |
1211 | erst_info.bufsize = 0; | 1206 | erst_info.bufsize = 0; |
1212 | kfree(buf); | 1207 | kfree(buf); |
1213 | } | 1208 | } |
1214 | } else | 1209 | } else |
1215 | pr_err(ERST_PFX | 1210 | pr_err( |
1216 | "Failed to allocate %lld bytes for persistent store error log\n", | 1211 | "Failed to allocate %lld bytes for persistent store error log.\n", |
1217 | erst_erange.size); | 1212 | erst_erange.size); |
1218 | 1213 | ||
1219 | return 0; | 1214 | return 0; |
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ec9b57d428a1..8ec37bbdd699 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c | |||
@@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes) | |||
409 | ghes->flags &= ~GHES_TO_CLEAR; | 409 | ghes->flags &= ~GHES_TO_CLEAR; |
410 | } | 410 | } |
411 | 411 | ||
412 | static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) | ||
413 | { | ||
414 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | ||
415 | unsigned long pfn; | ||
416 | int sec_sev = ghes_severity(gdata->error_severity); | ||
417 | struct cper_sec_mem_err *mem_err; | ||
418 | mem_err = (struct cper_sec_mem_err *)(gdata + 1); | ||
419 | |||
420 | if (sec_sev == GHES_SEV_CORRECTED && | ||
421 | (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && | ||
422 | (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) { | ||
423 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
424 | if (pfn_valid(pfn)) | ||
425 | memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); | ||
426 | else if (printk_ratelimit()) | ||
427 | pr_warn(FW_WARN GHES_PFX | ||
428 | "Invalid address in generic error data: %#llx\n", | ||
429 | mem_err->physical_addr); | ||
430 | } | ||
431 | if (sev == GHES_SEV_RECOVERABLE && | ||
432 | sec_sev == GHES_SEV_RECOVERABLE && | ||
433 | mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
434 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
435 | memory_failure_queue(pfn, 0, 0); | ||
436 | } | ||
437 | #endif | ||
438 | } | ||
439 | |||
412 | static void ghes_do_proc(struct ghes *ghes, | 440 | static void ghes_do_proc(struct ghes *ghes, |
413 | const struct acpi_hest_generic_status *estatus) | 441 | const struct acpi_hest_generic_status *estatus) |
414 | { | 442 | { |
@@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes, | |||
428 | apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, | 456 | apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, |
429 | mem_err); | 457 | mem_err); |
430 | #endif | 458 | #endif |
431 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | 459 | ghes_handle_memory_failure(gdata, sev); |
432 | if (sev == GHES_SEV_RECOVERABLE && | ||
433 | sec_sev == GHES_SEV_RECOVERABLE && | ||
434 | mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
435 | unsigned long pfn; | ||
436 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
437 | memory_failure_queue(pfn, 0, 0); | ||
438 | } | ||
439 | #endif | ||
440 | } | 460 | } |
441 | #ifdef CONFIG_ACPI_APEI_PCIEAER | 461 | #ifdef CONFIG_ACPI_APEI_PCIEAER |
442 | else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | 462 | else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, |
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index f5ef5d54e4ac..f5e37f32c71f 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/io.h> | 36 | #include <linux/io.h> |
37 | #include <linux/platform_device.h> | 37 | #include <linux/platform_device.h> |
38 | #include <acpi/apei.h> | 38 | #include <acpi/apei.h> |
39 | #include <asm/mce.h> | ||
39 | 40 | ||
40 | #include "apei-internal.h" | 41 | #include "apei-internal.h" |
41 | 42 | ||
@@ -121,6 +122,41 @@ int apei_hest_parse(apei_hest_func_t func, void *data) | |||
121 | } | 122 | } |
122 | EXPORT_SYMBOL_GPL(apei_hest_parse); | 123 | EXPORT_SYMBOL_GPL(apei_hest_parse); |
123 | 124 | ||
125 | /* | ||
126 | * Check if firmware advertises firmware first mode. We need FF bit to be set | ||
127 | * along with a set of MC banks which work in FF mode. | ||
128 | */ | ||
129 | static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data) | ||
130 | { | ||
131 | #ifdef CONFIG_X86_MCE | ||
132 | int i; | ||
133 | struct acpi_hest_ia_corrected *cmc; | ||
134 | struct acpi_hest_ia_error_bank *mc_bank; | ||
135 | |||
136 | if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) | ||
137 | return 0; | ||
138 | |||
139 | cmc = (struct acpi_hest_ia_corrected *)hest_hdr; | ||
140 | if (!cmc->enabled) | ||
141 | return 0; | ||
142 | |||
143 | /* | ||
144 | * We expect HEST to provide a list of MC banks that report errors | ||
145 | * in firmware first mode. Otherwise, return non-zero value to | ||
146 | * indicate that we are done parsing HEST. | ||
147 | */ | ||
148 | if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks) | ||
149 | return 1; | ||
150 | |||
151 | pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n"); | ||
152 | |||
153 | mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1); | ||
154 | for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) | ||
155 | mce_disable_bank(mc_bank->bank_number); | ||
156 | #endif | ||
157 | return 1; | ||
158 | } | ||
159 | |||
124 | struct ghes_arr { | 160 | struct ghes_arr { |
125 | struct platform_device **ghes_devs; | 161 | struct platform_device **ghes_devs; |
126 | unsigned int count; | 162 | unsigned int count; |
@@ -227,6 +263,9 @@ void __init acpi_hest_init(void) | |||
227 | goto err; | 263 | goto err; |
228 | } | 264 | } |
229 | 265 | ||
266 | if (!acpi_disable_cmcff) | ||
267 | apei_hest_parse(hest_parse_cmc, NULL); | ||
268 | |||
230 | if (!ghes_disable) { | 269 | if (!ghes_disable) { |
231 | rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); | 270 | rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); |
232 | if (rc) | 271 | if (rc) |