diff options
author | Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | 2013-07-10 05:27:01 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2013-07-10 14:35:02 -0400 |
commit | cf870c70a194443f8fc654ddc9d6cfd02c58003b (patch) | |
tree | 73553a1960478b454dbcb99c0db0c8acf381e58b /drivers/acpi | |
parent | 9ad95879cd1b22ed016c804f8d686ff83a41a9d4 (diff) |
mce: acpi/apei: Soft-offline a page on firmware GHES notification
If the firmware indicates in GHES error data entry that the error threshold
has exceeded for a corrected error event, then we try to soft-offline the
page. This could be called in interrupt context, so we queue this up similar
to how we handle memory failure scenarios.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'drivers/acpi')
-rw-r--r-- | drivers/acpi/apei/ghes.c | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fcd7d91cec34..a8f362acc8ec 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c | |||
@@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes) | |||
409 | ghes->flags &= ~GHES_TO_CLEAR; | 409 | ghes->flags &= ~GHES_TO_CLEAR; |
410 | } | 410 | } |
411 | 411 | ||
412 | static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) | ||
413 | { | ||
414 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | ||
415 | unsigned long pfn; | ||
416 | int sec_sev = ghes_severity(gdata->error_severity); | ||
417 | struct cper_sec_mem_err *mem_err; | ||
418 | mem_err = (struct cper_sec_mem_err *)(gdata + 1); | ||
419 | |||
420 | if (sec_sev == GHES_SEV_CORRECTED && | ||
421 | (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && | ||
422 | (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) { | ||
423 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
424 | if (pfn_valid(pfn)) | ||
425 | memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); | ||
426 | else if (printk_ratelimit()) | ||
427 | pr_warn(FW_WARN GHES_PFX | ||
428 | "Invalid address in generic error data: %#llx\n", | ||
429 | mem_err->physical_addr); | ||
430 | } | ||
431 | if (sev == GHES_SEV_RECOVERABLE && | ||
432 | sec_sev == GHES_SEV_RECOVERABLE && | ||
433 | mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
434 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
435 | memory_failure_queue(pfn, 0, 0); | ||
436 | } | ||
437 | #endif | ||
438 | } | ||
439 | |||
412 | static void ghes_do_proc(struct ghes *ghes, | 440 | static void ghes_do_proc(struct ghes *ghes, |
413 | const struct acpi_hest_generic_status *estatus) | 441 | const struct acpi_hest_generic_status *estatus) |
414 | { | 442 | { |
@@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes, | |||
428 | apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, | 456 | apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, |
429 | mem_err); | 457 | mem_err); |
430 | #endif | 458 | #endif |
431 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | 459 | ghes_handle_memory_failure(gdata, sev); |
432 | if (sev == GHES_SEV_RECOVERABLE && | ||
433 | sec_sev == GHES_SEV_RECOVERABLE && | ||
434 | mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
435 | unsigned long pfn; | ||
436 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
437 | memory_failure_queue(pfn, 0, 0); | ||
438 | } | ||
439 | #endif | ||
440 | } | 460 | } |
441 | #ifdef CONFIG_ACPI_APEI_PCIEAER | 461 | #ifdef CONFIG_ACPI_APEI_PCIEAER |
442 | else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | 462 | else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, |