diff options
35 files changed, 1172 insertions, 135 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt index dfab71848dc8..5cc699ba5453 100644 --- a/Documentation/acpi/apei/einj.txt +++ b/Documentation/acpi/apei/einj.txt | |||
| @@ -48,12 +48,19 @@ directory apei/einj. The following files are provided. | |||
| 48 | - param1 | 48 | - param1 |
| 49 | This file is used to set the first error parameter value. Effect of | 49 | This file is used to set the first error parameter value. Effect of |
| 50 | parameter depends on error_type specified. For memory error, this is | 50 | parameter depends on error_type specified. For memory error, this is |
| 51 | physical memory address. | 51 | physical memory address. Only available if param_extension module |
| 52 | parameter is specified. | ||
| 52 | 53 | ||
| 53 | - param2 | 54 | - param2 |
| 54 | This file is used to set the second error parameter value. Effect of | 55 | This file is used to set the second error parameter value. Effect of |
| 55 | parameter depends on error_type specified. For memory error, this is | 56 | parameter depends on error_type specified. For memory error, this is |
| 56 | physical memory address mask. | 57 | physical memory address mask. Only available if param_extension |
| 58 | module parameter is specified. | ||
| 59 | |||
| 60 | Injecting parameter support is a BIOS version specific extension, that | ||
| 61 | is, it only works on some BIOS version. If you want to use it, please | ||
| 62 | make sure your BIOS version has the proper support and specify | ||
| 63 | "param_extension=y" in module parameter. | ||
| 57 | 64 | ||
| 58 | For more information about EINJ, please refer to ACPI specification | 65 | For more information about EINJ, please refer to ACPI specification |
| 59 | version 4.0, section 17.5. | 66 | version 4.0, section 17.5. |
diff --git a/arch/Kconfig b/arch/Kconfig index 26b0e2397a57..4b0669cbb3b0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX | |||
| 178 | config HAVE_RCU_TABLE_FREE | 178 | config HAVE_RCU_TABLE_FREE |
| 179 | bool | 179 | bool |
| 180 | 180 | ||
| 181 | config ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 182 | bool | ||
| 183 | |||
| 181 | source "kernel/gcov/Kconfig" | 184 | source "kernel/gcov/Kconfig" |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index ca2da8da6e9c..60cde53d266c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
| @@ -14,6 +14,7 @@ config ALPHA | |||
| 14 | select AUTO_IRQ_AFFINITY if SMP | 14 | select AUTO_IRQ_AFFINITY if SMP |
| 15 | select GENERIC_IRQ_SHOW | 15 | select GENERIC_IRQ_SHOW |
| 16 | select ARCH_WANT_OPTIONAL_GPIOLIB | 16 | select ARCH_WANT_OPTIONAL_GPIOLIB |
| 17 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 17 | help | 18 | help |
| 18 | The Alpha is a 64-bit general-purpose processor designed and | 19 | The Alpha is a 64-bit general-purpose processor designed and |
| 19 | marketed by the Digital Equipment Corporation of blessed memory, | 20 | marketed by the Digital Equipment Corporation of blessed memory, |
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e9d689b7c833..197e96f70405 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig | |||
| @@ -10,6 +10,7 @@ config AVR32 | |||
| 10 | select GENERIC_IRQ_PROBE | 10 | select GENERIC_IRQ_PROBE |
| 11 | select HARDIRQS_SW_RESEND | 11 | select HARDIRQS_SW_RESEND |
| 12 | select GENERIC_IRQ_SHOW | 12 | select GENERIC_IRQ_SHOW |
| 13 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 13 | help | 14 | help |
| 14 | AVR32 is a high-performance 32-bit RISC microprocessor core, | 15 | AVR32 is a high-performance 32-bit RISC microprocessor core, |
| 15 | designed for cost-sensitive embedded applications, with particular | 16 | designed for cost-sensitive embedded applications, with particular |
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index cb884e489425..bad27a6ff407 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig | |||
| @@ -7,6 +7,7 @@ config FRV | |||
| 7 | select HAVE_PERF_EVENTS | 7 | select HAVE_PERF_EVENTS |
| 8 | select HAVE_GENERIC_HARDIRQS | 8 | select HAVE_GENERIC_HARDIRQS |
| 9 | select GENERIC_IRQ_SHOW | 9 | select GENERIC_IRQ_SHOW |
| 10 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 10 | 11 | ||
| 11 | config ZONE_DMA | 12 | config ZONE_DMA |
| 12 | bool | 13 | bool |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 64c7ab7e7a81..124854714958 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
| @@ -28,6 +28,7 @@ config IA64 | |||
| 28 | select IRQ_PER_CPU | 28 | select IRQ_PER_CPU |
| 29 | select GENERIC_IRQ_SHOW | 29 | select GENERIC_IRQ_SHOW |
| 30 | select ARCH_WANT_OPTIONAL_GPIOLIB | 30 | select ARCH_WANT_OPTIONAL_GPIOLIB |
| 31 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 31 | default y | 32 | default y |
| 32 | help | 33 | help |
| 33 | The Itanium Processor Family is Intel's 64-bit successor to | 34 | The Itanium Processor Family is Intel's 64-bit successor to |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 284cd3771eaa..9e8ee9d2b8ca 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
| @@ -6,6 +6,7 @@ config M68K | |||
| 6 | select GENERIC_ATOMIC64 if MMU | 6 | select GENERIC_ATOMIC64 if MMU |
| 7 | select HAVE_GENERIC_HARDIRQS if !MMU | 7 | select HAVE_GENERIC_HARDIRQS if !MMU |
| 8 | select GENERIC_IRQ_SHOW if !MMU | 8 | select GENERIC_IRQ_SHOW if !MMU |
| 9 | select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS | ||
| 9 | 10 | ||
| 10 | config RWSEM_GENERIC_SPINLOCK | 11 | config RWSEM_GENERIC_SPINLOCK |
| 11 | bool | 12 | bool |
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 65adc86a230e..e077b0bf56ca 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig | |||
| @@ -15,6 +15,7 @@ config PARISC | |||
| 15 | select HAVE_GENERIC_HARDIRQS | 15 | select HAVE_GENERIC_HARDIRQS |
| 16 | select GENERIC_IRQ_PROBE | 16 | select GENERIC_IRQ_PROBE |
| 17 | select IRQ_PER_CPU | 17 | select IRQ_PER_CPU |
| 18 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 18 | 19 | ||
| 19 | help | 20 | help |
| 20 | The PA-RISC microprocessor is designed by Hewlett-Packard and used | 21 | The PA-RISC microprocessor is designed by Hewlett-Packard and used |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 374c475e56a3..6926b61acfea 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
| @@ -136,6 +136,7 @@ config PPC | |||
| 136 | select HAVE_SYSCALL_TRACEPOINTS | 136 | select HAVE_SYSCALL_TRACEPOINTS |
| 137 | select HAVE_BPF_JIT if (PPC64 && NET) | 137 | select HAVE_BPF_JIT if (PPC64 && NET) |
| 138 | select HAVE_ARCH_JUMP_LABEL | 138 | select HAVE_ARCH_JUMP_LABEL |
| 139 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 139 | 140 | ||
| 140 | config EARLY_PRINTK | 141 | config EARLY_PRINTK |
| 141 | bool | 142 | bool |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c03fef7a9c22..0f98bbddade5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
| @@ -81,6 +81,7 @@ config S390 | |||
| 81 | select INIT_ALL_POSSIBLE | 81 | select INIT_ALL_POSSIBLE |
| 82 | select HAVE_IRQ_WORK | 82 | select HAVE_IRQ_WORK |
| 83 | select HAVE_PERF_EVENTS | 83 | select HAVE_PERF_EVENTS |
| 84 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 84 | select HAVE_KERNEL_GZIP | 85 | select HAVE_KERNEL_GZIP |
| 85 | select HAVE_KERNEL_BZIP2 | 86 | select HAVE_KERNEL_BZIP2 |
| 86 | select HAVE_KERNEL_LZMA | 87 | select HAVE_KERNEL_LZMA |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 748ff1920068..ff9177c8f643 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
| @@ -11,6 +11,7 @@ config SUPERH | |||
| 11 | select HAVE_DMA_ATTRS | 11 | select HAVE_DMA_ATTRS |
| 12 | select HAVE_IRQ_WORK | 12 | select HAVE_IRQ_WORK |
| 13 | select HAVE_PERF_EVENTS | 13 | select HAVE_PERF_EVENTS |
| 14 | select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) | ||
| 14 | select PERF_USE_VMALLOC | 15 | select PERF_USE_VMALLOC |
| 15 | select HAVE_KERNEL_GZIP | 16 | select HAVE_KERNEL_GZIP |
| 16 | select HAVE_KERNEL_BZIP2 | 17 | select HAVE_KERNEL_BZIP2 |
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1074dddcb104..42c67beadcae 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
| @@ -54,6 +54,7 @@ config SPARC64 | |||
| 54 | select HAVE_PERF_EVENTS | 54 | select HAVE_PERF_EVENTS |
| 55 | select PERF_USE_VMALLOC | 55 | select PERF_USE_VMALLOC |
| 56 | select IRQ_PREFLOW_FASTEOI | 56 | select IRQ_PREFLOW_FASTEOI |
| 57 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 57 | 58 | ||
| 58 | config ARCH_DEFCONFIG | 59 | config ARCH_DEFCONFIG |
| 59 | string | 60 | string |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 0249b8b4db54..b30f71ac0d06 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
| @@ -12,6 +12,7 @@ config TILE | |||
| 12 | select GENERIC_PENDING_IRQ if SMP | 12 | select GENERIC_PENDING_IRQ if SMP |
| 13 | select GENERIC_IRQ_SHOW | 13 | select GENERIC_IRQ_SHOW |
| 14 | select SYS_HYPERVISOR | 14 | select SYS_HYPERVISOR |
| 15 | select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 | ||
| 15 | 16 | ||
| 16 | # FIXME: investigate whether we need/want these options. | 17 | # FIXME: investigate whether we need/want these options. |
| 17 | # select HAVE_IOREMAP_PROT | 18 | # select HAVE_IOREMAP_PROT |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7cf916fc1ce7..6a47bb22657f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -72,6 +72,7 @@ config X86 | |||
| 72 | select USE_GENERIC_SMP_HELPERS if SMP | 72 | select USE_GENERIC_SMP_HELPERS if SMP |
| 73 | select HAVE_BPF_JIT if (X86_64 && NET) | 73 | select HAVE_BPF_JIT if (X86_64 && NET) |
| 74 | select CLKEVT_I8253 | 74 | select CLKEVT_I8253 |
| 75 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 75 | 76 | ||
| 76 | config INSTRUCTION_DECODER | 77 | config INSTRUCTION_DECODER |
| 77 | def_bool (KPROBES || PERF_EVENTS) | 78 | def_bool (KPROBES || PERF_EVENTS) |
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index f739a70b1c70..c34aa51af4ee 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig | |||
| @@ -10,9 +10,11 @@ config ACPI_APEI | |||
| 10 | error injection. | 10 | error injection. |
| 11 | 11 | ||
| 12 | config ACPI_APEI_GHES | 12 | config ACPI_APEI_GHES |
| 13 | tristate "APEI Generic Hardware Error Source" | 13 | bool "APEI Generic Hardware Error Source" |
| 14 | depends on ACPI_APEI && X86 | 14 | depends on ACPI_APEI && X86 |
| 15 | select ACPI_HED | 15 | select ACPI_HED |
| 16 | select LLIST | ||
| 17 | select GENERIC_ALLOCATOR | ||
| 16 | help | 18 | help |
| 17 | Generic Hardware Error Source provides a way to report | 19 | Generic Hardware Error Source provides a way to report |
| 18 | platform hardware errors (such as that from chipset). It | 20 | platform hardware errors (such as that from chipset). It |
| @@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER | |||
| 30 | PCIe AER errors may be reported via APEI firmware first mode. | 32 | PCIe AER errors may be reported via APEI firmware first mode. |
| 31 | Turn on this option to enable the corresponding support. | 33 | Turn on this option to enable the corresponding support. |
| 32 | 34 | ||
| 35 | config ACPI_APEI_MEMORY_FAILURE | ||
| 36 | bool "APEI memory error recovering support" | ||
| 37 | depends on ACPI_APEI && MEMORY_FAILURE | ||
| 38 | help | ||
| 39 | Memory errors may be reported via APEI firmware first mode. | ||
| 40 | Turn on this option to enable the memory recovering support. | ||
| 41 | |||
| 33 | config ACPI_APEI_EINJ | 42 | config ACPI_APEI_EINJ |
| 34 | tristate "APEI Error INJection (EINJ)" | 43 | tristate "APEI Error INJection (EINJ)" |
| 35 | depends on ACPI_APEI && DEBUG_FS | 44 | depends on ACPI_APEI && DEBUG_FS |
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 4a904a4bf05f..8041248fce9b 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c | |||
| @@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop); | |||
| 157 | * Interpret the specified action. Go through whole action table, | 157 | * Interpret the specified action. Go through whole action table, |
| 158 | * execute all instructions belong to the action. | 158 | * execute all instructions belong to the action. |
| 159 | */ | 159 | */ |
| 160 | int apei_exec_run(struct apei_exec_context *ctx, u8 action) | 160 | int __apei_exec_run(struct apei_exec_context *ctx, u8 action, |
| 161 | bool optional) | ||
| 161 | { | 162 | { |
| 162 | int rc; | 163 | int rc = -ENOENT; |
| 163 | u32 i, ip; | 164 | u32 i, ip; |
| 164 | struct acpi_whea_header *entry; | 165 | struct acpi_whea_header *entry; |
| 165 | apei_exec_ins_func_t run; | 166 | apei_exec_ins_func_t run; |
| @@ -198,9 +199,9 @@ rewind: | |||
| 198 | goto rewind; | 199 | goto rewind; |
| 199 | } | 200 | } |
| 200 | 201 | ||
| 201 | return 0; | 202 | return !optional && rc < 0 ? rc : 0; |
| 202 | } | 203 | } |
| 203 | EXPORT_SYMBOL_GPL(apei_exec_run); | 204 | EXPORT_SYMBOL_GPL(__apei_exec_run); |
| 204 | 205 | ||
| 205 | typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, | 206 | typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, |
| 206 | struct acpi_whea_header *entry, | 207 | struct acpi_whea_header *entry, |
| @@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void) | |||
| 603 | return dapei; | 604 | return dapei; |
| 604 | } | 605 | } |
| 605 | EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); | 606 | EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); |
| 607 | |||
| 608 | int apei_osc_setup(void) | ||
| 609 | { | ||
| 610 | static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; | ||
| 611 | acpi_handle handle; | ||
| 612 | u32 capbuf[3]; | ||
| 613 | struct acpi_osc_context context = { | ||
| 614 | .uuid_str = whea_uuid_str, | ||
| 615 | .rev = 1, | ||
| 616 | .cap.length = sizeof(capbuf), | ||
| 617 | .cap.pointer = capbuf, | ||
| 618 | }; | ||
| 619 | |||
| 620 | capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; | ||
| 621 | capbuf[OSC_SUPPORT_TYPE] = 0; | ||
| 622 | capbuf[OSC_CONTROL_TYPE] = 0; | ||
| 623 | |||
| 624 | if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) | ||
| 625 | || ACPI_FAILURE(acpi_run_osc(handle, &context))) | ||
| 626 | return -EIO; | ||
| 627 | else { | ||
| 628 | kfree(context.ret.pointer); | ||
| 629 | return 0; | ||
| 630 | } | ||
| 631 | } | ||
| 632 | EXPORT_SYMBOL_GPL(apei_osc_setup); | ||
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index ef0581f2094d..f57050e7a5e7 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h | |||
| @@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) | |||
| 50 | return ctx->value; | 50 | return ctx->value; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | int apei_exec_run(struct apei_exec_context *ctx, u8 action); | 53 | int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional); |
| 54 | |||
| 55 | static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action) | ||
| 56 | { | ||
| 57 | return __apei_exec_run(ctx, action, 0); | ||
| 58 | } | ||
| 59 | |||
| 60 | /* It is optional whether the firmware provides the action */ | ||
| 61 | static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action) | ||
| 62 | { | ||
| 63 | return __apei_exec_run(ctx, action, 1); | ||
| 64 | } | ||
| 54 | 65 | ||
| 55 | /* Common instruction implementation */ | 66 | /* Common instruction implementation */ |
| 56 | 67 | ||
| @@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx, | |||
| 113 | const struct acpi_hest_generic_status *estatus); | 124 | const struct acpi_hest_generic_status *estatus); |
| 114 | int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); | 125 | int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); |
| 115 | int apei_estatus_check(const struct acpi_hest_generic_status *estatus); | 126 | int apei_estatus_check(const struct acpi_hest_generic_status *estatus); |
| 127 | |||
| 128 | int apei_osc_setup(void); | ||
| 116 | #endif | 129 | #endif |
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index f74b2ea11f21..589b96c38704 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c | |||
| @@ -46,7 +46,8 @@ | |||
| 46 | * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the | 46 | * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the |
| 47 | * EINJ table through an unpublished extension. Use with caution as | 47 | * EINJ table through an unpublished extension. Use with caution as |
| 48 | * most will ignore the parameter and make their own choice of address | 48 | * most will ignore the parameter and make their own choice of address |
| 49 | * for error injection. | 49 | * for error injection. This extension is used only if |
| 50 | * param_extension module parameter is specified. | ||
| 50 | */ | 51 | */ |
| 51 | struct einj_parameter { | 52 | struct einj_parameter { |
| 52 | u64 type; | 53 | u64 type; |
| @@ -65,6 +66,9 @@ struct einj_parameter { | |||
| 65 | ((struct acpi_whea_header *)((char *)(tab) + \ | 66 | ((struct acpi_whea_header *)((char *)(tab) + \ |
| 66 | sizeof(struct acpi_table_einj))) | 67 | sizeof(struct acpi_table_einj))) |
| 67 | 68 | ||
| 69 | static bool param_extension; | ||
| 70 | module_param(param_extension, bool, 0); | ||
| 71 | |||
| 68 | static struct acpi_table_einj *einj_tab; | 72 | static struct acpi_table_einj *einj_tab; |
| 69 | 73 | ||
| 70 | static struct apei_resources einj_resources; | 74 | static struct apei_resources einj_resources; |
| @@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) | |||
| 285 | 289 | ||
| 286 | einj_exec_ctx_init(&ctx); | 290 | einj_exec_ctx_init(&ctx); |
| 287 | 291 | ||
| 288 | rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); | 292 | rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION); |
| 289 | if (rc) | 293 | if (rc) |
| 290 | return rc; | 294 | return rc; |
| 291 | apei_exec_ctx_set_input(&ctx, type); | 295 | apei_exec_ctx_set_input(&ctx, type); |
| @@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) | |||
| 323 | rc = __einj_error_trigger(trigger_paddr); | 327 | rc = __einj_error_trigger(trigger_paddr); |
| 324 | if (rc) | 328 | if (rc) |
| 325 | return rc; | 329 | return rc; |
| 326 | rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); | 330 | rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); |
| 327 | 331 | ||
| 328 | return rc; | 332 | return rc; |
| 329 | } | 333 | } |
| @@ -489,14 +493,6 @@ static int __init einj_init(void) | |||
| 489 | einj_debug_dir, NULL, &error_type_fops); | 493 | einj_debug_dir, NULL, &error_type_fops); |
| 490 | if (!fentry) | 494 | if (!fentry) |
| 491 | goto err_cleanup; | 495 | goto err_cleanup; |
| 492 | fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, | ||
| 493 | einj_debug_dir, &error_param1); | ||
| 494 | if (!fentry) | ||
| 495 | goto err_cleanup; | ||
| 496 | fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, | ||
| 497 | einj_debug_dir, &error_param2); | ||
| 498 | if (!fentry) | ||
| 499 | goto err_cleanup; | ||
| 500 | fentry = debugfs_create_file("error_inject", S_IWUSR, | 496 | fentry = debugfs_create_file("error_inject", S_IWUSR, |
| 501 | einj_debug_dir, NULL, &error_inject_fops); | 497 | einj_debug_dir, NULL, &error_inject_fops); |
| 502 | if (!fentry) | 498 | if (!fentry) |
| @@ -513,12 +509,23 @@ static int __init einj_init(void) | |||
| 513 | rc = apei_exec_pre_map_gars(&ctx); | 509 | rc = apei_exec_pre_map_gars(&ctx); |
| 514 | if (rc) | 510 | if (rc) |
| 515 | goto err_release; | 511 | goto err_release; |
| 516 | param_paddr = einj_get_parameter_address(); | 512 | if (param_extension) { |
| 517 | if (param_paddr) { | 513 | param_paddr = einj_get_parameter_address(); |
| 518 | einj_param = ioremap(param_paddr, sizeof(*einj_param)); | 514 | if (param_paddr) { |
| 519 | rc = -ENOMEM; | 515 | einj_param = ioremap(param_paddr, sizeof(*einj_param)); |
| 520 | if (!einj_param) | 516 | rc = -ENOMEM; |
| 521 | goto err_unmap; | 517 | if (!einj_param) |
| 518 | goto err_unmap; | ||
| 519 | fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, | ||
| 520 | einj_debug_dir, &error_param1); | ||
| 521 | if (!fentry) | ||
| 522 | goto err_unmap; | ||
| 523 | fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, | ||
| 524 | einj_debug_dir, &error_param2); | ||
| 525 | if (!fentry) | ||
| 526 | goto err_unmap; | ||
| 527 | } else | ||
| 528 | pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); | ||
| 522 | } | 529 | } |
| 523 | 530 | ||
| 524 | pr_info(EINJ_PFX "Error INJection is initialized.\n"); | 531 | pr_info(EINJ_PFX "Error INJection is initialized.\n"); |
| @@ -526,6 +533,8 @@ static int __init einj_init(void) | |||
| 526 | return 0; | 533 | return 0; |
| 527 | 534 | ||
| 528 | err_unmap: | 535 | err_unmap: |
| 536 | if (einj_param) | ||
| 537 | iounmap(einj_param); | ||
| 529 | apei_exec_post_unmap_gars(&ctx); | 538 | apei_exec_post_unmap_gars(&ctx); |
| 530 | err_release: | 539 | err_release: |
| 531 | apei_resources_release(&einj_resources); | 540 | apei_resources_release(&einj_resources); |
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index a4cfb64c86a1..903549df809b 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c | |||
| @@ -33,7 +33,7 @@ | |||
| 33 | 33 | ||
| 34 | #define ERST_DBG_PFX "ERST DBG: " | 34 | #define ERST_DBG_PFX "ERST DBG: " |
| 35 | 35 | ||
| 36 | #define ERST_DBG_RECORD_LEN_MAX 4096 | 36 | #define ERST_DBG_RECORD_LEN_MAX 0x4000 |
| 37 | 37 | ||
| 38 | static void *erst_dbg_buf; | 38 | static void *erst_dbg_buf; |
| 39 | static unsigned int erst_dbg_buf_len; | 39 | static unsigned int erst_dbg_buf_len; |
| @@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = { | |||
| 213 | 213 | ||
| 214 | static __init int erst_dbg_init(void) | 214 | static __init int erst_dbg_init(void) |
| 215 | { | 215 | { |
| 216 | if (erst_disable) { | ||
| 217 | pr_info(ERST_DBG_PFX "ERST support is disabled.\n"); | ||
| 218 | return -ENODEV; | ||
| 219 | } | ||
| 216 | return misc_register(&erst_dbg_dev); | 220 | return misc_register(&erst_dbg_dev); |
| 217 | } | 221 | } |
| 218 | 222 | ||
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6053f4780df9..2ca59dc69f7f 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c | |||
| @@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset) | |||
| 642 | int rc; | 642 | int rc; |
| 643 | 643 | ||
| 644 | erst_exec_ctx_init(&ctx); | 644 | erst_exec_ctx_init(&ctx); |
| 645 | rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); | 645 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE); |
| 646 | if (rc) | 646 | if (rc) |
| 647 | return rc; | 647 | return rc; |
| 648 | apei_exec_ctx_set_input(&ctx, offset); | 648 | apei_exec_ctx_set_input(&ctx, offset); |
| @@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset) | |||
| 666 | if (rc) | 666 | if (rc) |
| 667 | return rc; | 667 | return rc; |
| 668 | val = apei_exec_ctx_get_output(&ctx); | 668 | val = apei_exec_ctx_get_output(&ctx); |
| 669 | rc = apei_exec_run(&ctx, ACPI_ERST_END); | 669 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); |
| 670 | if (rc) | 670 | if (rc) |
| 671 | return rc; | 671 | return rc; |
| 672 | 672 | ||
| @@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) | |||
| 681 | int rc; | 681 | int rc; |
| 682 | 682 | ||
| 683 | erst_exec_ctx_init(&ctx); | 683 | erst_exec_ctx_init(&ctx); |
| 684 | rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); | 684 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ); |
| 685 | if (rc) | 685 | if (rc) |
| 686 | return rc; | 686 | return rc; |
| 687 | apei_exec_ctx_set_input(&ctx, offset); | 687 | apei_exec_ctx_set_input(&ctx, offset); |
| @@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) | |||
| 709 | if (rc) | 709 | if (rc) |
| 710 | return rc; | 710 | return rc; |
| 711 | val = apei_exec_ctx_get_output(&ctx); | 711 | val = apei_exec_ctx_get_output(&ctx); |
| 712 | rc = apei_exec_run(&ctx, ACPI_ERST_END); | 712 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); |
| 713 | if (rc) | 713 | if (rc) |
| 714 | return rc; | 714 | return rc; |
| 715 | 715 | ||
| @@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id) | |||
| 724 | int rc; | 724 | int rc; |
| 725 | 725 | ||
| 726 | erst_exec_ctx_init(&ctx); | 726 | erst_exec_ctx_init(&ctx); |
| 727 | rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); | 727 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR); |
| 728 | if (rc) | 728 | if (rc) |
| 729 | return rc; | 729 | return rc; |
| 730 | apei_exec_ctx_set_input(&ctx, record_id); | 730 | apei_exec_ctx_set_input(&ctx, record_id); |
| @@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id) | |||
| 748 | if (rc) | 748 | if (rc) |
| 749 | return rc; | 749 | return rc; |
| 750 | val = apei_exec_ctx_get_output(&ctx); | 750 | val = apei_exec_ctx_get_output(&ctx); |
| 751 | rc = apei_exec_run(&ctx, ACPI_ERST_END); | 751 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); |
| 752 | if (rc) | 752 | if (rc) |
| 753 | return rc; | 753 | return rc; |
| 754 | 754 | ||
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f703b2881153..0784f99a4665 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | * For more information about Generic Hardware Error Source, please | 12 | * For more information about Generic Hardware Error Source, please |
| 13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | 13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 |
| 14 | * | 14 | * |
| 15 | * Copyright 2010 Intel Corp. | 15 | * Copyright 2010,2011 Intel Corp. |
| 16 | * Author: Huang Ying <ying.huang@intel.com> | 16 | * Author: Huang Ying <ying.huang@intel.com> |
| 17 | * | 17 | * |
| 18 | * This program is free software; you can redistribute it and/or | 18 | * This program is free software; you can redistribute it and/or |
| @@ -42,6 +42,9 @@ | |||
| 42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
| 43 | #include <linux/ratelimit.h> | 43 | #include <linux/ratelimit.h> |
| 44 | #include <linux/vmalloc.h> | 44 | #include <linux/vmalloc.h> |
| 45 | #include <linux/irq_work.h> | ||
| 46 | #include <linux/llist.h> | ||
| 47 | #include <linux/genalloc.h> | ||
| 45 | #include <acpi/apei.h> | 48 | #include <acpi/apei.h> |
| 46 | #include <acpi/atomicio.h> | 49 | #include <acpi/atomicio.h> |
| 47 | #include <acpi/hed.h> | 50 | #include <acpi/hed.h> |
| @@ -53,6 +56,30 @@ | |||
| 53 | #define GHES_PFX "GHES: " | 56 | #define GHES_PFX "GHES: " |
| 54 | 57 | ||
| 55 | #define GHES_ESTATUS_MAX_SIZE 65536 | 58 | #define GHES_ESTATUS_MAX_SIZE 65536 |
| 59 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 | ||
| 60 | |||
| 61 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | ||
| 62 | |||
| 63 | /* This is just an estimation for memory pool allocation */ | ||
| 64 | #define GHES_ESTATUS_CACHE_AVG_SIZE 512 | ||
| 65 | |||
| 66 | #define GHES_ESTATUS_CACHES_SIZE 4 | ||
| 67 | |||
| 68 | #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL | ||
| 69 | /* Prevent too many caches are allocated because of RCU */ | ||
| 70 | #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) | ||
| 71 | |||
| 72 | #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ | ||
| 73 | (sizeof(struct ghes_estatus_cache) + (estatus_len)) | ||
| 74 | #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ | ||
| 75 | ((struct acpi_hest_generic_status *) \ | ||
| 76 | ((struct ghes_estatus_cache *)(estatus_cache) + 1)) | ||
| 77 | |||
| 78 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ | ||
| 79 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | ||
| 80 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ | ||
| 81 | ((struct acpi_hest_generic_status *) \ | ||
| 82 | ((struct ghes_estatus_node *)(estatus_node) + 1)) | ||
| 56 | 83 | ||
| 57 | /* | 84 | /* |
| 58 | * One struct ghes is created for each generic hardware error source. | 85 | * One struct ghes is created for each generic hardware error source. |
| @@ -77,6 +104,22 @@ struct ghes { | |||
| 77 | }; | 104 | }; |
| 78 | }; | 105 | }; |
| 79 | 106 | ||
| 107 | struct ghes_estatus_node { | ||
| 108 | struct llist_node llnode; | ||
| 109 | struct acpi_hest_generic *generic; | ||
| 110 | }; | ||
| 111 | |||
| 112 | struct ghes_estatus_cache { | ||
| 113 | u32 estatus_len; | ||
| 114 | atomic_t count; | ||
| 115 | struct acpi_hest_generic *generic; | ||
| 116 | unsigned long long time_in; | ||
| 117 | struct rcu_head rcu; | ||
| 118 | }; | ||
| 119 | |||
| 120 | int ghes_disable; | ||
| 121 | module_param_named(disable, ghes_disable, bool, 0); | ||
| 122 | |||
| 80 | static int ghes_panic_timeout __read_mostly = 30; | 123 | static int ghes_panic_timeout __read_mostly = 30; |
| 81 | 124 | ||
| 82 | /* | 125 | /* |
| @@ -121,6 +164,22 @@ static struct vm_struct *ghes_ioremap_area; | |||
| 121 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); | 164 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); |
| 122 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); | 165 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); |
| 123 | 166 | ||
| 167 | /* | ||
| 168 | * printk is not safe in NMI context. So in NMI handler, we allocate | ||
| 169 | * required memory from lock-less memory allocator | ||
| 170 | * (ghes_estatus_pool), save estatus into it, put them into lock-less | ||
| 171 | * list (ghes_estatus_llist), then delay printk into IRQ context via | ||
| 172 | * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record | ||
| 173 | * required pool size by all NMI error source. | ||
| 174 | */ | ||
| 175 | static struct gen_pool *ghes_estatus_pool; | ||
| 176 | static unsigned long ghes_estatus_pool_size_request; | ||
| 177 | static struct llist_head ghes_estatus_llist; | ||
| 178 | static struct irq_work ghes_proc_irq_work; | ||
| 179 | |||
| 180 | struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; | ||
| 181 | static atomic_t ghes_estatus_cache_alloced; | ||
| 182 | |||
| 124 | static int ghes_ioremap_init(void) | 183 | static int ghes_ioremap_init(void) |
| 125 | { | 184 | { |
| 126 | ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, | 185 | ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, |
| @@ -180,6 +239,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) | |||
| 180 | __flush_tlb_one(vaddr); | 239 | __flush_tlb_one(vaddr); |
| 181 | } | 240 | } |
| 182 | 241 | ||
| 242 | static int ghes_estatus_pool_init(void) | ||
| 243 | { | ||
| 244 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); | ||
| 245 | if (!ghes_estatus_pool) | ||
| 246 | return -ENOMEM; | ||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | |||
| 250 | static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, | ||
| 251 | struct gen_pool_chunk *chunk, | ||
| 252 | void *data) | ||
| 253 | { | ||
| 254 | free_page(chunk->start_addr); | ||
| 255 | } | ||
| 256 | |||
| 257 | static void ghes_estatus_pool_exit(void) | ||
| 258 | { | ||
| 259 | gen_pool_for_each_chunk(ghes_estatus_pool, | ||
| 260 | ghes_estatus_pool_free_chunk_page, NULL); | ||
| 261 | gen_pool_destroy(ghes_estatus_pool); | ||
| 262 | } | ||
| 263 | |||
| 264 | static int ghes_estatus_pool_expand(unsigned long len) | ||
| 265 | { | ||
| 266 | unsigned long i, pages, size, addr; | ||
| 267 | int ret; | ||
| 268 | |||
| 269 | ghes_estatus_pool_size_request += PAGE_ALIGN(len); | ||
| 270 | size = gen_pool_size(ghes_estatus_pool); | ||
| 271 | if (size >= ghes_estatus_pool_size_request) | ||
| 272 | return 0; | ||
| 273 | pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; | ||
| 274 | for (i = 0; i < pages; i++) { | ||
| 275 | addr = __get_free_page(GFP_KERNEL); | ||
| 276 | if (!addr) | ||
| 277 | return -ENOMEM; | ||
| 278 | ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); | ||
| 279 | if (ret) | ||
| 280 | return ret; | ||
| 281 | } | ||
| 282 | |||
| 283 | return 0; | ||
| 284 | } | ||
| 285 | |||
| 286 | static void ghes_estatus_pool_shrink(unsigned long len) | ||
| 287 | { | ||
| 288 | ghes_estatus_pool_size_request -= PAGE_ALIGN(len); | ||
| 289 | } | ||
| 290 | |||
| 183 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | 291 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
| 184 | { | 292 | { |
| 185 | struct ghes *ghes; | 293 | struct ghes *ghes; |
| @@ -341,43 +449,196 @@ static void ghes_clear_estatus(struct ghes *ghes) | |||
| 341 | ghes->flags &= ~GHES_TO_CLEAR; | 449 | ghes->flags &= ~GHES_TO_CLEAR; |
| 342 | } | 450 | } |
| 343 | 451 | ||
| 344 | static void ghes_do_proc(struct ghes *ghes) | 452 | static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) |
| 345 | { | 453 | { |
| 346 | int sev, processed = 0; | 454 | int sev, sec_sev; |
| 347 | struct acpi_hest_generic_data *gdata; | 455 | struct acpi_hest_generic_data *gdata; |
| 348 | 456 | ||
| 349 | sev = ghes_severity(ghes->estatus->error_severity); | 457 | sev = ghes_severity(estatus->error_severity); |
| 350 | apei_estatus_for_each_section(ghes->estatus, gdata) { | 458 | apei_estatus_for_each_section(estatus, gdata) { |
| 351 | #ifdef CONFIG_X86_MCE | 459 | sec_sev = ghes_severity(gdata->error_severity); |
| 352 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | 460 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, |
| 353 | CPER_SEC_PLATFORM_MEM)) { | 461 | CPER_SEC_PLATFORM_MEM)) { |
| 354 | apei_mce_report_mem_error( | 462 | struct cper_sec_mem_err *mem_err; |
| 355 | sev == GHES_SEV_CORRECTED, | 463 | mem_err = (struct cper_sec_mem_err *)(gdata+1); |
| 356 | (struct cper_sec_mem_err *)(gdata+1)); | 464 | #ifdef CONFIG_X86_MCE |
| 357 | processed = 1; | 465 | apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, |
| 358 | } | 466 | mem_err); |
| 359 | #endif | 467 | #endif |
| 468 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | ||
| 469 | if (sev == GHES_SEV_RECOVERABLE && | ||
| 470 | sec_sev == GHES_SEV_RECOVERABLE && | ||
| 471 | mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
| 472 | unsigned long pfn; | ||
| 473 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
| 474 | memory_failure_queue(pfn, 0, 0); | ||
| 475 | } | ||
| 476 | #endif | ||
| 477 | } | ||
| 360 | } | 478 | } |
| 361 | } | 479 | } |
| 362 | 480 | ||
| 363 | static void ghes_print_estatus(const char *pfx, struct ghes *ghes) | 481 | static void __ghes_print_estatus(const char *pfx, |
| 482 | const struct acpi_hest_generic *generic, | ||
| 483 | const struct acpi_hest_generic_status *estatus) | ||
| 364 | { | 484 | { |
| 365 | /* Not more than 2 messages every 5 seconds */ | ||
| 366 | static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); | ||
| 367 | |||
| 368 | if (pfx == NULL) { | 485 | if (pfx == NULL) { |
| 369 | if (ghes_severity(ghes->estatus->error_severity) <= | 486 | if (ghes_severity(estatus->error_severity) <= |
| 370 | GHES_SEV_CORRECTED) | 487 | GHES_SEV_CORRECTED) |
| 371 | pfx = KERN_WARNING HW_ERR; | 488 | pfx = KERN_WARNING HW_ERR; |
| 372 | else | 489 | else |
| 373 | pfx = KERN_ERR HW_ERR; | 490 | pfx = KERN_ERR HW_ERR; |
| 374 | } | 491 | } |
| 375 | if (__ratelimit(&ratelimit)) { | 492 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", |
| 376 | printk( | 493 | pfx, generic->header.source_id); |
| 377 | "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", | 494 | apei_estatus_print(pfx, estatus); |
| 378 | pfx, ghes->generic->header.source_id); | 495 | } |
| 379 | apei_estatus_print(pfx, ghes->estatus); | 496 | |
| 497 | static int ghes_print_estatus(const char *pfx, | ||
| 498 | const struct acpi_hest_generic *generic, | ||
| 499 | const struct acpi_hest_generic_status *estatus) | ||
| 500 | { | ||
| 501 | /* Not more than 2 messages every 5 seconds */ | ||
| 502 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); | ||
| 503 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | ||
| 504 | struct ratelimit_state *ratelimit; | ||
| 505 | |||
| 506 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) | ||
| 507 | ratelimit = &ratelimit_corrected; | ||
| 508 | else | ||
| 509 | ratelimit = &ratelimit_uncorrected; | ||
| 510 | if (__ratelimit(ratelimit)) { | ||
| 511 | __ghes_print_estatus(pfx, generic, estatus); | ||
| 512 | return 1; | ||
| 380 | } | 513 | } |
| 514 | return 0; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* | ||
| 518 | * GHES error status reporting throttle, to report more kinds of | ||
| 519 | * errors, instead of just most frequently occurred errors. | ||
| 520 | */ | ||
| 521 | static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) | ||
| 522 | { | ||
| 523 | u32 len; | ||
| 524 | int i, cached = 0; | ||
| 525 | unsigned long long now; | ||
| 526 | struct ghes_estatus_cache *cache; | ||
| 527 | struct acpi_hest_generic_status *cache_estatus; | ||
| 528 | |||
| 529 | len = apei_estatus_len(estatus); | ||
| 530 | rcu_read_lock(); | ||
| 531 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | ||
| 532 | cache = rcu_dereference(ghes_estatus_caches[i]); | ||
| 533 | if (cache == NULL) | ||
| 534 | continue; | ||
| 535 | if (len != cache->estatus_len) | ||
| 536 | continue; | ||
| 537 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | ||
| 538 | if (memcmp(estatus, cache_estatus, len)) | ||
| 539 | continue; | ||
| 540 | atomic_inc(&cache->count); | ||
| 541 | now = sched_clock(); | ||
| 542 | if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) | ||
| 543 | cached = 1; | ||
| 544 | break; | ||
| 545 | } | ||
| 546 | rcu_read_unlock(); | ||
| 547 | return cached; | ||
| 548 | } | ||
| 549 | |||
| 550 | static struct ghes_estatus_cache *ghes_estatus_cache_alloc( | ||
| 551 | struct acpi_hest_generic *generic, | ||
| 552 | struct acpi_hest_generic_status *estatus) | ||
| 553 | { | ||
| 554 | int alloced; | ||
| 555 | u32 len, cache_len; | ||
| 556 | struct ghes_estatus_cache *cache; | ||
| 557 | struct acpi_hest_generic_status *cache_estatus; | ||
| 558 | |||
| 559 | alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); | ||
| 560 | if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { | ||
| 561 | atomic_dec(&ghes_estatus_cache_alloced); | ||
| 562 | return NULL; | ||
| 563 | } | ||
| 564 | len = apei_estatus_len(estatus); | ||
| 565 | cache_len = GHES_ESTATUS_CACHE_LEN(len); | ||
| 566 | cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); | ||
| 567 | if (!cache) { | ||
| 568 | atomic_dec(&ghes_estatus_cache_alloced); | ||
| 569 | return NULL; | ||
| 570 | } | ||
| 571 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | ||
| 572 | memcpy(cache_estatus, estatus, len); | ||
| 573 | cache->estatus_len = len; | ||
| 574 | atomic_set(&cache->count, 0); | ||
| 575 | cache->generic = generic; | ||
| 576 | cache->time_in = sched_clock(); | ||
| 577 | return cache; | ||
| 578 | } | ||
| 579 | |||
| 580 | static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) | ||
| 581 | { | ||
| 582 | u32 len; | ||
| 583 | |||
| 584 | len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); | ||
| 585 | len = GHES_ESTATUS_CACHE_LEN(len); | ||
| 586 | gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); | ||
| 587 | atomic_dec(&ghes_estatus_cache_alloced); | ||
| 588 | } | ||
| 589 | |||
| 590 | static void ghes_estatus_cache_rcu_free(struct rcu_head *head) | ||
| 591 | { | ||
| 592 | struct ghes_estatus_cache *cache; | ||
| 593 | |||
| 594 | cache = container_of(head, struct ghes_estatus_cache, rcu); | ||
| 595 | ghes_estatus_cache_free(cache); | ||
| 596 | } | ||
| 597 | |||
| 598 | static void ghes_estatus_cache_add( | ||
| 599 | struct acpi_hest_generic *generic, | ||
| 600 | struct acpi_hest_generic_status *estatus) | ||
| 601 | { | ||
| 602 | int i, slot = -1, count; | ||
| 603 | unsigned long long now, duration, period, max_period = 0; | ||
| 604 | struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; | ||
| 605 | |||
| 606 | new_cache = ghes_estatus_cache_alloc(generic, estatus); | ||
| 607 | if (new_cache == NULL) | ||
| 608 | return; | ||
| 609 | rcu_read_lock(); | ||
| 610 | now = sched_clock(); | ||
| 611 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | ||
| 612 | cache = rcu_dereference(ghes_estatus_caches[i]); | ||
| 613 | if (cache == NULL) { | ||
| 614 | slot = i; | ||
| 615 | slot_cache = NULL; | ||
| 616 | break; | ||
| 617 | } | ||
| 618 | duration = now - cache->time_in; | ||
| 619 | if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { | ||
| 620 | slot = i; | ||
| 621 | slot_cache = cache; | ||
| 622 | break; | ||
| 623 | } | ||
| 624 | count = atomic_read(&cache->count); | ||
| 625 | period = duration; | ||
| 626 | do_div(period, (count + 1)); | ||
| 627 | if (period > max_period) { | ||
| 628 | max_period = period; | ||
| 629 | slot = i; | ||
| 630 | slot_cache = cache; | ||
| 631 | } | ||
| 632 | } | ||
| 633 | /* new_cache must be put into array after its contents are written */ | ||
| 634 | smp_wmb(); | ||
| 635 | if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, | ||
| 636 | slot_cache, new_cache) == slot_cache) { | ||
| 637 | if (slot_cache) | ||
| 638 | call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); | ||
| 639 | } else | ||
| 640 | ghes_estatus_cache_free(new_cache); | ||
| 641 | rcu_read_unlock(); | ||
| 381 | } | 642 | } |
| 382 | 643 | ||
| 383 | static int ghes_proc(struct ghes *ghes) | 644 | static int ghes_proc(struct ghes *ghes) |
| @@ -387,9 +648,11 @@ static int ghes_proc(struct ghes *ghes) | |||
| 387 | rc = ghes_read_estatus(ghes, 0); | 648 | rc = ghes_read_estatus(ghes, 0); |
| 388 | if (rc) | 649 | if (rc) |
| 389 | goto out; | 650 | goto out; |
| 390 | ghes_print_estatus(NULL, ghes); | 651 | if (!ghes_estatus_cached(ghes->estatus)) { |
| 391 | ghes_do_proc(ghes); | 652 | if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) |
| 392 | 653 | ghes_estatus_cache_add(ghes->generic, ghes->estatus); | |
| 654 | } | ||
| 655 | ghes_do_proc(ghes->estatus); | ||
| 393 | out: | 656 | out: |
| 394 | ghes_clear_estatus(ghes); | 657 | ghes_clear_estatus(ghes); |
| 395 | return 0; | 658 | return 0; |
| @@ -447,6 +710,45 @@ static int ghes_notify_sci(struct notifier_block *this, | |||
| 447 | return ret; | 710 | return ret; |
| 448 | } | 711 | } |
| 449 | 712 | ||
| 713 | static void ghes_proc_in_irq(struct irq_work *irq_work) | ||
| 714 | { | ||
| 715 | struct llist_node *llnode, *next, *tail = NULL; | ||
| 716 | struct ghes_estatus_node *estatus_node; | ||
| 717 | struct acpi_hest_generic *generic; | ||
| 718 | struct acpi_hest_generic_status *estatus; | ||
| 719 | u32 len, node_len; | ||
| 720 | |||
| 721 | /* | ||
| 722 | * Because the time order of estatus in list is reversed, | ||
| 723 | * revert it back to proper order. | ||
| 724 | */ | ||
| 725 | llnode = llist_del_all(&ghes_estatus_llist); | ||
| 726 | while (llnode) { | ||
| 727 | next = llnode->next; | ||
| 728 | llnode->next = tail; | ||
| 729 | tail = llnode; | ||
| 730 | llnode = next; | ||
| 731 | } | ||
| 732 | llnode = tail; | ||
| 733 | while (llnode) { | ||
| 734 | next = llnode->next; | ||
| 735 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | ||
| 736 | llnode); | ||
| 737 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | ||
| 738 | len = apei_estatus_len(estatus); | ||
| 739 | node_len = GHES_ESTATUS_NODE_LEN(len); | ||
| 740 | ghes_do_proc(estatus); | ||
| 741 | if (!ghes_estatus_cached(estatus)) { | ||
| 742 | generic = estatus_node->generic; | ||
| 743 | if (ghes_print_estatus(NULL, generic, estatus)) | ||
| 744 | ghes_estatus_cache_add(generic, estatus); | ||
| 745 | } | ||
| 746 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | ||
| 747 | node_len); | ||
| 748 | llnode = next; | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 450 | static int ghes_notify_nmi(struct notifier_block *this, | 752 | static int ghes_notify_nmi(struct notifier_block *this, |
| 451 | unsigned long cmd, void *data) | 753 | unsigned long cmd, void *data) |
| 452 | { | 754 | { |
| @@ -476,7 +778,8 @@ static int ghes_notify_nmi(struct notifier_block *this, | |||
| 476 | 778 | ||
| 477 | if (sev_global >= GHES_SEV_PANIC) { | 779 | if (sev_global >= GHES_SEV_PANIC) { |
| 478 | oops_begin(); | 780 | oops_begin(); |
| 479 | ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); | 781 | __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, |
| 782 | ghes_global->estatus); | ||
| 480 | /* reboot to log the error! */ | 783 | /* reboot to log the error! */ |
| 481 | if (panic_timeout == 0) | 784 | if (panic_timeout == 0) |
| 482 | panic_timeout = ghes_panic_timeout; | 785 | panic_timeout = ghes_panic_timeout; |
| @@ -484,12 +787,34 @@ static int ghes_notify_nmi(struct notifier_block *this, | |||
| 484 | } | 787 | } |
| 485 | 788 | ||
| 486 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { | 789 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { |
| 790 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 791 | u32 len, node_len; | ||
| 792 | struct ghes_estatus_node *estatus_node; | ||
| 793 | struct acpi_hest_generic_status *estatus; | ||
| 794 | #endif | ||
| 487 | if (!(ghes->flags & GHES_TO_CLEAR)) | 795 | if (!(ghes->flags & GHES_TO_CLEAR)) |
| 488 | continue; | 796 | continue; |
| 489 | /* Do not print estatus because printk is not NMI safe */ | 797 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
| 490 | ghes_do_proc(ghes); | 798 | if (ghes_estatus_cached(ghes->estatus)) |
| 799 | goto next; | ||
| 800 | /* Save estatus for further processing in IRQ context */ | ||
| 801 | len = apei_estatus_len(ghes->estatus); | ||
| 802 | node_len = GHES_ESTATUS_NODE_LEN(len); | ||
| 803 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, | ||
| 804 | node_len); | ||
| 805 | if (estatus_node) { | ||
| 806 | estatus_node->generic = ghes->generic; | ||
| 807 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | ||
| 808 | memcpy(estatus, ghes->estatus, len); | ||
| 809 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | ||
| 810 | } | ||
| 811 | next: | ||
| 812 | #endif | ||
| 491 | ghes_clear_estatus(ghes); | 813 | ghes_clear_estatus(ghes); |
| 492 | } | 814 | } |
| 815 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 816 | irq_work_queue(&ghes_proc_irq_work); | ||
| 817 | #endif | ||
| 493 | 818 | ||
| 494 | out: | 819 | out: |
| 495 | raw_spin_unlock(&ghes_nmi_lock); | 820 | raw_spin_unlock(&ghes_nmi_lock); |
| @@ -504,10 +829,26 @@ static struct notifier_block ghes_notifier_nmi = { | |||
| 504 | .notifier_call = ghes_notify_nmi, | 829 | .notifier_call = ghes_notify_nmi, |
| 505 | }; | 830 | }; |
| 506 | 831 | ||
| 832 | static unsigned long ghes_esource_prealloc_size( | ||
| 833 | const struct acpi_hest_generic *generic) | ||
| 834 | { | ||
| 835 | unsigned long block_length, prealloc_records, prealloc_size; | ||
| 836 | |||
| 837 | block_length = min_t(unsigned long, generic->error_block_length, | ||
| 838 | GHES_ESTATUS_MAX_SIZE); | ||
| 839 | prealloc_records = max_t(unsigned long, | ||
| 840 | generic->records_to_preallocate, 1); | ||
| 841 | prealloc_size = min_t(unsigned long, block_length * prealloc_records, | ||
| 842 | GHES_ESOURCE_PREALLOC_MAX_SIZE); | ||
| 843 | |||
| 844 | return prealloc_size; | ||
| 845 | } | ||
| 846 | |||
| 507 | static int __devinit ghes_probe(struct platform_device *ghes_dev) | 847 | static int __devinit ghes_probe(struct platform_device *ghes_dev) |
| 508 | { | 848 | { |
| 509 | struct acpi_hest_generic *generic; | 849 | struct acpi_hest_generic *generic; |
| 510 | struct ghes *ghes = NULL; | 850 | struct ghes *ghes = NULL; |
| 851 | unsigned long len; | ||
| 511 | int rc = -EINVAL; | 852 | int rc = -EINVAL; |
| 512 | 853 | ||
| 513 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; | 854 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
| @@ -573,6 +914,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) | |||
| 573 | mutex_unlock(&ghes_list_mutex); | 914 | mutex_unlock(&ghes_list_mutex); |
| 574 | break; | 915 | break; |
| 575 | case ACPI_HEST_NOTIFY_NMI: | 916 | case ACPI_HEST_NOTIFY_NMI: |
| 917 | len = ghes_esource_prealloc_size(generic); | ||
| 918 | ghes_estatus_pool_expand(len); | ||
| 576 | mutex_lock(&ghes_list_mutex); | 919 | mutex_lock(&ghes_list_mutex); |
| 577 | if (list_empty(&ghes_nmi)) | 920 | if (list_empty(&ghes_nmi)) |
| 578 | register_die_notifier(&ghes_notifier_nmi); | 921 | register_die_notifier(&ghes_notifier_nmi); |
| @@ -597,6 +940,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) | |||
| 597 | { | 940 | { |
| 598 | struct ghes *ghes; | 941 | struct ghes *ghes; |
| 599 | struct acpi_hest_generic *generic; | 942 | struct acpi_hest_generic *generic; |
| 943 | unsigned long len; | ||
| 600 | 944 | ||
| 601 | ghes = platform_get_drvdata(ghes_dev); | 945 | ghes = platform_get_drvdata(ghes_dev); |
| 602 | generic = ghes->generic; | 946 | generic = ghes->generic; |
| @@ -627,6 +971,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) | |||
| 627 | * freed after NMI handler finishes. | 971 | * freed after NMI handler finishes. |
| 628 | */ | 972 | */ |
| 629 | synchronize_rcu(); | 973 | synchronize_rcu(); |
| 974 | len = ghes_esource_prealloc_size(generic); | ||
| 975 | ghes_estatus_pool_shrink(len); | ||
| 630 | break; | 976 | break; |
| 631 | default: | 977 | default: |
| 632 | BUG(); | 978 | BUG(); |
| @@ -662,15 +1008,43 @@ static int __init ghes_init(void) | |||
| 662 | return -EINVAL; | 1008 | return -EINVAL; |
| 663 | } | 1009 | } |
| 664 | 1010 | ||
| 1011 | if (ghes_disable) { | ||
| 1012 | pr_info(GHES_PFX "GHES is not enabled!\n"); | ||
| 1013 | return -EINVAL; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | ||
| 1017 | |||
| 665 | rc = ghes_ioremap_init(); | 1018 | rc = ghes_ioremap_init(); |
| 666 | if (rc) | 1019 | if (rc) |
| 667 | goto err; | 1020 | goto err; |
| 668 | 1021 | ||
| 669 | rc = platform_driver_register(&ghes_platform_driver); | 1022 | rc = ghes_estatus_pool_init(); |
| 670 | if (rc) | 1023 | if (rc) |
| 671 | goto err_ioremap_exit; | 1024 | goto err_ioremap_exit; |
| 672 | 1025 | ||
| 1026 | rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * | ||
| 1027 | GHES_ESTATUS_CACHE_ALLOCED_MAX); | ||
| 1028 | if (rc) | ||
| 1029 | goto err_pool_exit; | ||
| 1030 | |||
| 1031 | rc = platform_driver_register(&ghes_platform_driver); | ||
| 1032 | if (rc) | ||
| 1033 | goto err_pool_exit; | ||
| 1034 | |||
| 1035 | rc = apei_osc_setup(); | ||
| 1036 | if (rc == 0 && osc_sb_apei_support_acked) | ||
| 1037 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | ||
| 1038 | else if (rc == 0 && !osc_sb_apei_support_acked) | ||
| 1039 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | ||
| 1040 | else if (rc && osc_sb_apei_support_acked) | ||
| 1041 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | ||
| 1042 | else | ||
| 1043 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | ||
| 1044 | |||
| 673 | return 0; | 1045 | return 0; |
| 1046 | err_pool_exit: | ||
| 1047 | ghes_estatus_pool_exit(); | ||
| 674 | err_ioremap_exit: | 1048 | err_ioremap_exit: |
| 675 | ghes_ioremap_exit(); | 1049 | ghes_ioremap_exit(); |
| 676 | err: | 1050 | err: |
| @@ -680,6 +1054,7 @@ err: | |||
| 680 | static void __exit ghes_exit(void) | 1054 | static void __exit ghes_exit(void) |
| 681 | { | 1055 | { |
| 682 | platform_driver_unregister(&ghes_platform_driver); | 1056 | platform_driver_unregister(&ghes_platform_driver); |
| 1057 | ghes_estatus_pool_exit(); | ||
| 683 | ghes_ioremap_exit(); | 1058 | ghes_ioremap_exit(); |
| 684 | } | 1059 | } |
| 685 | 1060 | ||
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 181bc2f7bb74..05fee06f4d6e 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c | |||
| @@ -231,16 +231,17 @@ void __init acpi_hest_init(void) | |||
| 231 | goto err; | 231 | goto err; |
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); | 234 | if (!ghes_disable) { |
| 235 | if (rc) | 235 | rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); |
| 236 | goto err; | 236 | if (rc) |
| 237 | 237 | goto err; | |
| 238 | rc = hest_ghes_dev_register(ghes_count); | 238 | rc = hest_ghes_dev_register(ghes_count); |
| 239 | if (!rc) { | 239 | if (rc) |
| 240 | pr_info(HEST_PFX "Table parsing has been initialized.\n"); | 240 | goto err; |
| 241 | return; | ||
| 242 | } | 241 | } |
| 243 | 242 | ||
| 243 | pr_info(HEST_PFX "Table parsing has been initialized.\n"); | ||
| 244 | return; | ||
| 244 | err: | 245 | err: |
| 245 | hest_disable = 1; | 246 | hest_disable = 1; |
| 246 | } | 247 | } |
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index d1e06c182cdb..437ddbf0c49a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/pci.h> | 39 | #include <linux/pci.h> |
| 40 | #include <acpi/acpi_bus.h> | 40 | #include <acpi/acpi_bus.h> |
| 41 | #include <acpi/acpi_drivers.h> | 41 | #include <acpi/acpi_drivers.h> |
| 42 | #include <acpi/apei.h> | ||
| 42 | #include <linux/dmi.h> | 43 | #include <linux/dmi.h> |
| 43 | #include <linux/suspend.h> | 44 | #include <linux/suspend.h> |
| 44 | 45 | ||
| @@ -519,6 +520,7 @@ out_kfree: | |||
| 519 | } | 520 | } |
| 520 | EXPORT_SYMBOL(acpi_run_osc); | 521 | EXPORT_SYMBOL(acpi_run_osc); |
| 521 | 522 | ||
| 523 | bool osc_sb_apei_support_acked; | ||
| 522 | static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; | 524 | static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; |
| 523 | static void acpi_bus_osc_support(void) | 525 | static void acpi_bus_osc_support(void) |
| 524 | { | 526 | { |
| @@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void) | |||
| 541 | #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) | 543 | #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) |
| 542 | capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; | 544 | capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; |
| 543 | #endif | 545 | #endif |
| 546 | |||
| 547 | if (!ghes_disable) | ||
| 548 | capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT; | ||
| 544 | if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) | 549 | if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) |
| 545 | return; | 550 | return; |
| 546 | if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) | 551 | if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) { |
| 552 | u32 *capbuf_ret = context.ret.pointer; | ||
| 553 | if (context.ret.length > OSC_SUPPORT_TYPE) | ||
| 554 | osc_sb_apei_support_acked = | ||
| 555 | capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT; | ||
| 547 | kfree(context.ret.pointer); | 556 | kfree(context.ret.pointer); |
| 548 | /* do we need to check the returned cap? Sounds no */ | 557 | } |
| 558 | /* do we need to check other returned cap? Sounds no */ | ||
| 549 | } | 559 | } |
| 550 | 560 | ||
| 551 | /* -------------------------------------------------------------------------- | 561 | /* -------------------------------------------------------------------------- |
diff --git a/include/acpi/apei.h b/include/acpi/apei.h index e67b523a50e1..51a527d24a8a 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h | |||
| @@ -18,6 +18,11 @@ | |||
| 18 | 18 | ||
| 19 | extern int hest_disable; | 19 | extern int hest_disable; |
| 20 | extern int erst_disable; | 20 | extern int erst_disable; |
| 21 | #ifdef CONFIG_ACPI_APEI_GHES | ||
| 22 | extern int ghes_disable; | ||
| 23 | #else | ||
| 24 | #define ghes_disable 1 | ||
| 25 | #endif | ||
| 21 | 26 | ||
| 22 | #ifdef CONFIG_ACPI_APEI | 27 | #ifdef CONFIG_ACPI_APEI |
| 23 | void __init acpi_hest_init(void); | 28 | void __init acpi_hest_init(void); |
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1deb2a73c2da..e19527de6a93 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h | |||
| @@ -280,6 +280,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); | |||
| 280 | #define OSC_SB_CPUHP_OST_SUPPORT 8 | 280 | #define OSC_SB_CPUHP_OST_SUPPORT 8 |
| 281 | #define OSC_SB_APEI_SUPPORT 16 | 281 | #define OSC_SB_APEI_SUPPORT 16 |
| 282 | 282 | ||
| 283 | extern bool osc_sb_apei_support_acked; | ||
| 284 | |||
| 283 | /* PCI defined _OSC bits */ | 285 | /* PCI defined _OSC bits */ |
| 284 | /* _OSC DW1 Definition (OS Support Fields) */ | 286 | /* _OSC DW1 Definition (OS Support Fields) */ |
| 285 | #define OSC_EXT_PCI_CONFIG_SUPPORT 1 | 287 | #define OSC_EXT_PCI_CONFIG_SUPPORT 1 |
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3bac44cce142..7ad634501e48 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h | |||
| @@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); | |||
| 146 | extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); | 146 | extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); |
| 147 | extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); | 147 | extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); |
| 148 | 148 | ||
| 149 | #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) | ||
| 149 | #define BITMAP_LAST_WORD_MASK(nbits) \ | 150 | #define BITMAP_LAST_WORD_MASK(nbits) \ |
| 150 | ( \ | 151 | ( \ |
| 151 | ((nbits) % BITS_PER_LONG) ? \ | 152 | ((nbits) % BITS_PER_LONG) ? \ |
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 5bbebda78b02..5e98eeb2af3b 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h | |||
| @@ -1,8 +1,26 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Basic general purpose allocator for managing special purpose memory | 2 | * Basic general purpose allocator for managing special purpose |
| 3 | * not managed by the regular kmalloc/kfree interface. | 3 | * memory, for example, memory that is not managed by the regular |
| 4 | * Uses for this includes on-device special memory, uncached memory | 4 | * kmalloc/kfree interface. Uses for this includes on-device special |
| 5 | * etc. | 5 | * memory, uncached memory etc. |
| 6 | * | ||
| 7 | * It is safe to use the allocator in NMI handlers and other special | ||
| 8 | * unblockable contexts that could otherwise deadlock on locks. This | ||
| 9 | * is implemented by using atomic operations and retries on any | ||
| 10 | * conflicts. The disadvantage is that there may be livelocks in | ||
| 11 | * extreme cases. For better scalability, one allocator can be used | ||
| 12 | * for each CPU. | ||
| 13 | * | ||
| 14 | * The lockless operation only works if there is enough memory | ||
| 15 | * available. If new memory is added to the pool a lock has to be | ||
| 16 | * still taken. So any user relying on locklessness has to ensure | ||
| 17 | * that sufficient memory is preallocated. | ||
| 18 | * | ||
| 19 | * The basic atomic operation of this allocator is cmpxchg on long. | ||
| 20 | * On architectures that don't have NMI-safe cmpxchg implementation, | ||
| 21 | * the allocator can NOT be used in NMI handler. So code uses the | ||
| 22 | * allocator in NMI handler should depend on | ||
| 23 | * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
| 6 | * | 24 | * |
| 7 | * This source code is licensed under the GNU General Public License, | 25 | * This source code is licensed under the GNU General Public License, |
| 8 | * Version 2. See the file COPYING for more details. | 26 | * Version 2. See the file COPYING for more details. |
| @@ -15,7 +33,7 @@ | |||
| 15 | * General purpose special memory pool descriptor. | 33 | * General purpose special memory pool descriptor. |
| 16 | */ | 34 | */ |
| 17 | struct gen_pool { | 35 | struct gen_pool { |
| 18 | rwlock_t lock; | 36 | spinlock_t lock; |
| 19 | struct list_head chunks; /* list of chunks in this pool */ | 37 | struct list_head chunks; /* list of chunks in this pool */ |
| 20 | int min_alloc_order; /* minimum allocation order */ | 38 | int min_alloc_order; /* minimum allocation order */ |
| 21 | }; | 39 | }; |
| @@ -24,8 +42,8 @@ struct gen_pool { | |||
| 24 | * General purpose special memory pool chunk descriptor. | 42 | * General purpose special memory pool chunk descriptor. |
| 25 | */ | 43 | */ |
| 26 | struct gen_pool_chunk { | 44 | struct gen_pool_chunk { |
| 27 | spinlock_t lock; | ||
| 28 | struct list_head next_chunk; /* next chunk in pool */ | 45 | struct list_head next_chunk; /* next chunk in pool */ |
| 46 | atomic_t avail; | ||
| 29 | phys_addr_t phys_addr; /* physical starting address of memory chunk */ | 47 | phys_addr_t phys_addr; /* physical starting address of memory chunk */ |
| 30 | unsigned long start_addr; /* starting address of memory chunk */ | 48 | unsigned long start_addr; /* starting address of memory chunk */ |
| 31 | unsigned long end_addr; /* ending address of memory chunk */ | 49 | unsigned long end_addr; /* ending address of memory chunk */ |
| @@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr, | |||
| 56 | extern void gen_pool_destroy(struct gen_pool *); | 74 | extern void gen_pool_destroy(struct gen_pool *); |
| 57 | extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); | 75 | extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); |
| 58 | extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); | 76 | extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); |
| 77 | extern void gen_pool_for_each_chunk(struct gen_pool *, | ||
| 78 | void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); | ||
| 79 | extern size_t gen_pool_avail(struct gen_pool *); | ||
| 80 | extern size_t gen_pool_size(struct gen_pool *); | ||
| 59 | #endif /* __GENALLOC_H__ */ | 81 | #endif /* __GENALLOC_H__ */ |
diff --git a/include/linux/llist.h b/include/linux/llist.h new file mode 100644 index 000000000000..aa0c8b5b3cd0 --- /dev/null +++ b/include/linux/llist.h | |||
| @@ -0,0 +1,126 @@ | |||
| 1 | #ifndef LLIST_H | ||
| 2 | #define LLIST_H | ||
| 3 | /* | ||
| 4 | * Lock-less NULL terminated single linked list | ||
| 5 | * | ||
| 6 | * If there are multiple producers and multiple consumers, llist_add | ||
| 7 | * can be used in producers and llist_del_all can be used in | ||
| 8 | * consumers. They can work simultaneously without lock. But | ||
| 9 | * llist_del_first can not be used here. Because llist_del_first | ||
| 10 | * depends on list->first->next does not changed if list->first is not | ||
| 11 | * changed during its operation, but llist_del_first, llist_add, | ||
| 12 | * llist_add (or llist_del_all, llist_add, llist_add) sequence in | ||
| 13 | * another consumer may violate that. | ||
| 14 | * | ||
| 15 | * If there are multiple producers and one consumer, llist_add can be | ||
| 16 | * used in producers and llist_del_all or llist_del_first can be used | ||
| 17 | * in the consumer. | ||
| 18 | * | ||
| 19 | * This can be summarized as follow: | ||
| 20 | * | ||
| 21 | * | add | del_first | del_all | ||
| 22 | * add | - | - | - | ||
| 23 | * del_first | | L | L | ||
| 24 | * del_all | | | - | ||
| 25 | * | ||
| 26 | * Where "-" stands for no lock is needed, while "L" stands for lock | ||
| 27 | * is needed. | ||
| 28 | * | ||
| 29 | * The list entries deleted via llist_del_all can be traversed with | ||
| 30 | * traversing function such as llist_for_each etc. But the list | ||
| 31 | * entries can not be traversed safely before deleted from the list. | ||
| 32 | * The order of deleted entries is from the newest to the oldest added | ||
| 33 | * one. If you want to traverse from the oldest to the newest, you | ||
| 34 | * must reverse the order by yourself before traversing. | ||
| 35 | * | ||
| 36 | * The basic atomic operation of this list is cmpxchg on long. On | ||
| 37 | * architectures that don't have NMI-safe cmpxchg implementation, the | ||
| 38 | * list can NOT be used in NMI handler. So code uses the list in NMI | ||
| 39 | * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
| 40 | */ | ||
| 41 | |||
| 42 | struct llist_head { | ||
| 43 | struct llist_node *first; | ||
| 44 | }; | ||
| 45 | |||
| 46 | struct llist_node { | ||
| 47 | struct llist_node *next; | ||
| 48 | }; | ||
| 49 | |||
| 50 | #define LLIST_HEAD_INIT(name) { NULL } | ||
| 51 | #define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) | ||
| 52 | |||
| 53 | /** | ||
| 54 | * init_llist_head - initialize lock-less list head | ||
| 55 | * @head: the head for your lock-less list | ||
| 56 | */ | ||
| 57 | static inline void init_llist_head(struct llist_head *list) | ||
| 58 | { | ||
| 59 | list->first = NULL; | ||
| 60 | } | ||
| 61 | |||
| 62 | /** | ||
| 63 | * llist_entry - get the struct of this entry | ||
| 64 | * @ptr: the &struct llist_node pointer. | ||
| 65 | * @type: the type of the struct this is embedded in. | ||
| 66 | * @member: the name of the llist_node within the struct. | ||
| 67 | */ | ||
| 68 | #define llist_entry(ptr, type, member) \ | ||
| 69 | container_of(ptr, type, member) | ||
| 70 | |||
| 71 | /** | ||
| 72 | * llist_for_each - iterate over some deleted entries of a lock-less list | ||
| 73 | * @pos: the &struct llist_node to use as a loop cursor | ||
| 74 | * @node: the first entry of deleted list entries | ||
| 75 | * | ||
| 76 | * In general, some entries of the lock-less list can be traversed | ||
| 77 | * safely only after being deleted from list, so start with an entry | ||
| 78 | * instead of list head. | ||
| 79 | * | ||
| 80 | * If being used on entries deleted from lock-less list directly, the | ||
| 81 | * traverse order is from the newest to the oldest added entry. If | ||
| 82 | * you want to traverse from the oldest to the newest, you must | ||
| 83 | * reverse the order by yourself before traversing. | ||
| 84 | */ | ||
| 85 | #define llist_for_each(pos, node) \ | ||
| 86 | for ((pos) = (node); pos; (pos) = (pos)->next) | ||
| 87 | |||
| 88 | /** | ||
| 89 | * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type | ||
| 90 | * @pos: the type * to use as a loop cursor. | ||
| 91 | * @node: the fist entry of deleted list entries. | ||
| 92 | * @member: the name of the llist_node with the struct. | ||
| 93 | * | ||
| 94 | * In general, some entries of the lock-less list can be traversed | ||
| 95 | * safely only after being removed from list, so start with an entry | ||
| 96 | * instead of list head. | ||
| 97 | * | ||
| 98 | * If being used on entries deleted from lock-less list directly, the | ||
| 99 | * traverse order is from the newest to the oldest added entry. If | ||
| 100 | * you want to traverse from the oldest to the newest, you must | ||
| 101 | * reverse the order by yourself before traversing. | ||
| 102 | */ | ||
| 103 | #define llist_for_each_entry(pos, node, member) \ | ||
| 104 | for ((pos) = llist_entry((node), typeof(*(pos)), member); \ | ||
| 105 | &(pos)->member != NULL; \ | ||
| 106 | (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) | ||
| 107 | |||
| 108 | /** | ||
| 109 | * llist_empty - tests whether a lock-less list is empty | ||
| 110 | * @head: the list to test | ||
| 111 | * | ||
| 112 | * Not guaranteed to be accurate or up to date. Just a quick way to | ||
| 113 | * test whether the list is empty without deleting something from the | ||
| 114 | * list. | ||
| 115 | */ | ||
| 116 | static inline int llist_empty(const struct llist_head *head) | ||
| 117 | { | ||
| 118 | return ACCESS_ONCE(head->first) == NULL; | ||
| 119 | } | ||
| 120 | |||
| 121 | void llist_add(struct llist_node *new, struct llist_head *head); | ||
| 122 | void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, | ||
| 123 | struct llist_head *head); | ||
| 124 | struct llist_node *llist_del_first(struct llist_head *head); | ||
| 125 | struct llist_node *llist_del_all(struct llist_head *head); | ||
| 126 | #endif /* LLIST_H */ | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 3172a1c0f08e..f2690cf49827 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -1600,6 +1600,7 @@ enum mf_flags { | |||
| 1600 | }; | 1600 | }; |
| 1601 | extern void memory_failure(unsigned long pfn, int trapno); | 1601 | extern void memory_failure(unsigned long pfn, int trapno); |
| 1602 | extern int __memory_failure(unsigned long pfn, int trapno, int flags); | 1602 | extern int __memory_failure(unsigned long pfn, int trapno, int flags); |
| 1603 | extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); | ||
| 1603 | extern int unpoison_memory(unsigned long pfn); | 1604 | extern int unpoison_memory(unsigned long pfn); |
| 1604 | extern int sysctl_memory_failure_early_kill; | 1605 | extern int sysctl_memory_failure_early_kill; |
| 1605 | extern int sysctl_memory_failure_recovery; | 1606 | extern int sysctl_memory_failure_recovery; |
diff --git a/lib/Kconfig b/lib/Kconfig index 32f3e5ae2be5..6c695ff9caba 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
| @@ -276,4 +276,7 @@ config CORDIC | |||
| 276 | so its calculations are in fixed point. Modules can select this | 276 | so its calculations are in fixed point. Modules can select this |
| 277 | when they require this function. Module will be called cordic. | 277 | when they require this function. Module will be called cordic. |
| 278 | 278 | ||
| 279 | config LLIST | ||
| 280 | bool | ||
| 281 | |||
| 279 | endmenu | 282 | endmenu |
diff --git a/lib/Makefile b/lib/Makefile index 892f4e282ea1..6457af4a7caf 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o | |||
| 115 | 115 | ||
| 116 | obj-$(CONFIG_CORDIC) += cordic.o | 116 | obj-$(CONFIG_CORDIC) += cordic.o |
| 117 | 117 | ||
| 118 | obj-$(CONFIG_LLIST) += llist.o | ||
| 119 | |||
| 118 | hostprogs-y := gen_crc32table | 120 | hostprogs-y := gen_crc32table |
| 119 | clean-files := crc32table.h | 121 | clean-files := crc32table.h |
| 120 | 122 | ||
diff --git a/lib/bitmap.c b/lib/bitmap.c index 37ef4b048795..2f4412e4d071 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
| @@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) | |||
| 271 | } | 271 | } |
| 272 | EXPORT_SYMBOL(__bitmap_weight); | 272 | EXPORT_SYMBOL(__bitmap_weight); |
| 273 | 273 | ||
| 274 | #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) | ||
| 275 | |||
| 276 | void bitmap_set(unsigned long *map, int start, int nr) | 274 | void bitmap_set(unsigned long *map, int start, int nr) |
| 277 | { | 275 | { |
| 278 | unsigned long *p = map + BIT_WORD(start); | 276 | unsigned long *p = map + BIT_WORD(start); |
diff --git a/lib/genalloc.c b/lib/genalloc.c index 577ddf805975..f352cc42f4f8 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c | |||
| @@ -1,8 +1,26 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Basic general purpose allocator for managing special purpose memory | 2 | * Basic general purpose allocator for managing special purpose |
| 3 | * not managed by the regular kmalloc/kfree interface. | 3 | * memory, for example, memory that is not managed by the regular |
| 4 | * Uses for this includes on-device special memory, uncached memory | 4 | * kmalloc/kfree interface. Uses for this includes on-device special |
| 5 | * etc. | 5 | * memory, uncached memory etc. |
| 6 | * | ||
| 7 | * It is safe to use the allocator in NMI handlers and other special | ||
| 8 | * unblockable contexts that could otherwise deadlock on locks. This | ||
| 9 | * is implemented by using atomic operations and retries on any | ||
| 10 | * conflicts. The disadvantage is that there may be livelocks in | ||
| 11 | * extreme cases. For better scalability, one allocator can be used | ||
| 12 | * for each CPU. | ||
| 13 | * | ||
| 14 | * The lockless operation only works if there is enough memory | ||
| 15 | * available. If new memory is added to the pool a lock has to be | ||
| 16 | * still taken. So any user relying on locklessness has to ensure | ||
| 17 | * that sufficient memory is preallocated. | ||
| 18 | * | ||
| 19 | * The basic atomic operation of this allocator is cmpxchg on long. | ||
| 20 | * On architectures that don't have NMI-safe cmpxchg implementation, | ||
| 21 | * the allocator can NOT be used in NMI handler. So code uses the | ||
| 22 | * allocator in NMI handler should depend on | ||
| 23 | * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
| 6 | * | 24 | * |
| 7 | * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> | 25 | * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> |
| 8 | * | 26 | * |
| @@ -13,8 +31,109 @@ | |||
| 13 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
| 14 | #include <linux/module.h> | 32 | #include <linux/module.h> |
| 15 | #include <linux/bitmap.h> | 33 | #include <linux/bitmap.h> |
| 34 | #include <linux/rculist.h> | ||
| 35 | #include <linux/interrupt.h> | ||
| 16 | #include <linux/genalloc.h> | 36 | #include <linux/genalloc.h> |
| 17 | 37 | ||
| 38 | static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) | ||
| 39 | { | ||
| 40 | unsigned long val, nval; | ||
| 41 | |||
| 42 | nval = *addr; | ||
| 43 | do { | ||
| 44 | val = nval; | ||
| 45 | if (val & mask_to_set) | ||
| 46 | return -EBUSY; | ||
| 47 | cpu_relax(); | ||
| 48 | } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val); | ||
| 49 | |||
| 50 | return 0; | ||
| 51 | } | ||
| 52 | |||
| 53 | static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) | ||
| 54 | { | ||
| 55 | unsigned long val, nval; | ||
| 56 | |||
| 57 | nval = *addr; | ||
| 58 | do { | ||
| 59 | val = nval; | ||
| 60 | if ((val & mask_to_clear) != mask_to_clear) | ||
| 61 | return -EBUSY; | ||
| 62 | cpu_relax(); | ||
| 63 | } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val); | ||
| 64 | |||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | |||
| 68 | /* | ||
| 69 | * bitmap_set_ll - set the specified number of bits at the specified position | ||
| 70 | * @map: pointer to a bitmap | ||
| 71 | * @start: a bit position in @map | ||
| 72 | * @nr: number of bits to set | ||
| 73 | * | ||
| 74 | * Set @nr bits start from @start in @map lock-lessly. Several users | ||
| 75 | * can set/clear the same bitmap simultaneously without lock. If two | ||
| 76 | * users set the same bit, one user will return remain bits, otherwise | ||
| 77 | * return 0. | ||
| 78 | */ | ||
| 79 | static int bitmap_set_ll(unsigned long *map, int start, int nr) | ||
| 80 | { | ||
| 81 | unsigned long *p = map + BIT_WORD(start); | ||
| 82 | const int size = start + nr; | ||
| 83 | int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); | ||
| 84 | unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); | ||
| 85 | |||
| 86 | while (nr - bits_to_set >= 0) { | ||
| 87 | if (set_bits_ll(p, mask_to_set)) | ||
| 88 | return nr; | ||
| 89 | nr -= bits_to_set; | ||
| 90 | bits_to_set = BITS_PER_LONG; | ||
| 91 | mask_to_set = ~0UL; | ||
| 92 | p++; | ||
| 93 | } | ||
| 94 | if (nr) { | ||
| 95 | mask_to_set &= BITMAP_LAST_WORD_MASK(size); | ||
| 96 | if (set_bits_ll(p, mask_to_set)) | ||
| 97 | return nr; | ||
| 98 | } | ||
| 99 | |||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | /* | ||
| 104 | * bitmap_clear_ll - clear the specified number of bits at the specified position | ||
| 105 | * @map: pointer to a bitmap | ||
| 106 | * @start: a bit position in @map | ||
| 107 | * @nr: number of bits to set | ||
| 108 | * | ||
| 109 | * Clear @nr bits start from @start in @map lock-lessly. Several users | ||
| 110 | * can set/clear the same bitmap simultaneously without lock. If two | ||
| 111 | * users clear the same bit, one user will return remain bits, | ||
| 112 | * otherwise return 0. | ||
| 113 | */ | ||
| 114 | static int bitmap_clear_ll(unsigned long *map, int start, int nr) | ||
| 115 | { | ||
| 116 | unsigned long *p = map + BIT_WORD(start); | ||
| 117 | const int size = start + nr; | ||
| 118 | int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); | ||
| 119 | unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); | ||
| 120 | |||
| 121 | while (nr - bits_to_clear >= 0) { | ||
| 122 | if (clear_bits_ll(p, mask_to_clear)) | ||
| 123 | return nr; | ||
| 124 | nr -= bits_to_clear; | ||
| 125 | bits_to_clear = BITS_PER_LONG; | ||
| 126 | mask_to_clear = ~0UL; | ||
| 127 | p++; | ||
| 128 | } | ||
| 129 | if (nr) { | ||
| 130 | mask_to_clear &= BITMAP_LAST_WORD_MASK(size); | ||
| 131 | if (clear_bits_ll(p, mask_to_clear)) | ||
| 132 | return nr; | ||
| 133 | } | ||
| 134 | |||
| 135 | return 0; | ||
| 136 | } | ||
| 18 | 137 | ||
| 19 | /** | 138 | /** |
| 20 | * gen_pool_create - create a new special memory pool | 139 | * gen_pool_create - create a new special memory pool |
| @@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) | |||
| 30 | 149 | ||
| 31 | pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); | 150 | pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); |
| 32 | if (pool != NULL) { | 151 | if (pool != NULL) { |
| 33 | rwlock_init(&pool->lock); | 152 | spin_lock_init(&pool->lock); |
| 34 | INIT_LIST_HEAD(&pool->chunks); | 153 | INIT_LIST_HEAD(&pool->chunks); |
| 35 | pool->min_alloc_order = min_alloc_order; | 154 | pool->min_alloc_order = min_alloc_order; |
| 36 | } | 155 | } |
| @@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy | |||
| 63 | if (unlikely(chunk == NULL)) | 182 | if (unlikely(chunk == NULL)) |
| 64 | return -ENOMEM; | 183 | return -ENOMEM; |
| 65 | 184 | ||
| 66 | spin_lock_init(&chunk->lock); | ||
| 67 | chunk->phys_addr = phys; | 185 | chunk->phys_addr = phys; |
| 68 | chunk->start_addr = virt; | 186 | chunk->start_addr = virt; |
| 69 | chunk->end_addr = virt + size; | 187 | chunk->end_addr = virt + size; |
| 188 | atomic_set(&chunk->avail, size); | ||
| 70 | 189 | ||
| 71 | write_lock(&pool->lock); | 190 | spin_lock(&pool->lock); |
| 72 | list_add(&chunk->next_chunk, &pool->chunks); | 191 | list_add_rcu(&chunk->next_chunk, &pool->chunks); |
| 73 | write_unlock(&pool->lock); | 192 | spin_unlock(&pool->lock); |
| 74 | 193 | ||
| 75 | return 0; | 194 | return 0; |
| 76 | } | 195 | } |
| @@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt); | |||
| 85 | */ | 204 | */ |
| 86 | phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) | 205 | phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) |
| 87 | { | 206 | { |
| 88 | struct list_head *_chunk; | ||
| 89 | struct gen_pool_chunk *chunk; | 207 | struct gen_pool_chunk *chunk; |
| 208 | phys_addr_t paddr = -1; | ||
| 90 | 209 | ||
| 91 | read_lock(&pool->lock); | 210 | rcu_read_lock(); |
| 92 | list_for_each(_chunk, &pool->chunks) { | 211 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { |
| 93 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 212 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { |
| 94 | 213 | paddr = chunk->phys_addr + (addr - chunk->start_addr); | |
| 95 | if (addr >= chunk->start_addr && addr < chunk->end_addr) | 214 | break; |
| 96 | return chunk->phys_addr + addr - chunk->start_addr; | 215 | } |
| 97 | } | 216 | } |
| 98 | read_unlock(&pool->lock); | 217 | rcu_read_unlock(); |
| 99 | 218 | ||
| 100 | return -1; | 219 | return paddr; |
| 101 | } | 220 | } |
| 102 | EXPORT_SYMBOL(gen_pool_virt_to_phys); | 221 | EXPORT_SYMBOL(gen_pool_virt_to_phys); |
| 103 | 222 | ||
| @@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool) | |||
| 115 | int order = pool->min_alloc_order; | 234 | int order = pool->min_alloc_order; |
| 116 | int bit, end_bit; | 235 | int bit, end_bit; |
| 117 | 236 | ||
| 118 | |||
| 119 | list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { | 237 | list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { |
| 120 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 238 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); |
| 121 | list_del(&chunk->next_chunk); | 239 | list_del(&chunk->next_chunk); |
| @@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy); | |||
| 137 | * @size: number of bytes to allocate from the pool | 255 | * @size: number of bytes to allocate from the pool |
| 138 | * | 256 | * |
| 139 | * Allocate the requested number of bytes from the specified pool. | 257 | * Allocate the requested number of bytes from the specified pool. |
| 140 | * Uses a first-fit algorithm. | 258 | * Uses a first-fit algorithm. Can not be used in NMI handler on |
| 259 | * architectures without NMI-safe cmpxchg implementation. | ||
| 141 | */ | 260 | */ |
| 142 | unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) | 261 | unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) |
| 143 | { | 262 | { |
| 144 | struct list_head *_chunk; | ||
| 145 | struct gen_pool_chunk *chunk; | 263 | struct gen_pool_chunk *chunk; |
| 146 | unsigned long addr, flags; | 264 | unsigned long addr = 0; |
| 147 | int order = pool->min_alloc_order; | 265 | int order = pool->min_alloc_order; |
| 148 | int nbits, start_bit, end_bit; | 266 | int nbits, start_bit = 0, end_bit, remain; |
| 267 | |||
| 268 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 269 | BUG_ON(in_nmi()); | ||
| 270 | #endif | ||
| 149 | 271 | ||
| 150 | if (size == 0) | 272 | if (size == 0) |
| 151 | return 0; | 273 | return 0; |
| 152 | 274 | ||
| 153 | nbits = (size + (1UL << order) - 1) >> order; | 275 | nbits = (size + (1UL << order) - 1) >> order; |
| 154 | 276 | rcu_read_lock(); | |
| 155 | read_lock(&pool->lock); | 277 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { |
| 156 | list_for_each(_chunk, &pool->chunks) { | 278 | if (size > atomic_read(&chunk->avail)) |
| 157 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 279 | continue; |
| 158 | 280 | ||
| 159 | end_bit = (chunk->end_addr - chunk->start_addr) >> order; | 281 | end_bit = (chunk->end_addr - chunk->start_addr) >> order; |
| 160 | 282 | retry: | |
| 161 | spin_lock_irqsave(&chunk->lock, flags); | 283 | start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, |
| 162 | start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, | 284 | start_bit, nbits, 0); |
| 163 | nbits, 0); | 285 | if (start_bit >= end_bit) |
| 164 | if (start_bit >= end_bit) { | ||
| 165 | spin_unlock_irqrestore(&chunk->lock, flags); | ||
| 166 | continue; | 286 | continue; |
| 287 | remain = bitmap_set_ll(chunk->bits, start_bit, nbits); | ||
| 288 | if (remain) { | ||
| 289 | remain = bitmap_clear_ll(chunk->bits, start_bit, | ||
| 290 | nbits - remain); | ||
| 291 | BUG_ON(remain); | ||
| 292 | goto retry; | ||
| 167 | } | 293 | } |
| 168 | 294 | ||
| 169 | addr = chunk->start_addr + ((unsigned long)start_bit << order); | 295 | addr = chunk->start_addr + ((unsigned long)start_bit << order); |
| 170 | 296 | size = nbits << order; | |
| 171 | bitmap_set(chunk->bits, start_bit, nbits); | 297 | atomic_sub(size, &chunk->avail); |
| 172 | spin_unlock_irqrestore(&chunk->lock, flags); | 298 | break; |
| 173 | read_unlock(&pool->lock); | ||
| 174 | return addr; | ||
| 175 | } | 299 | } |
| 176 | read_unlock(&pool->lock); | 300 | rcu_read_unlock(); |
| 177 | return 0; | 301 | return addr; |
| 178 | } | 302 | } |
| 179 | EXPORT_SYMBOL(gen_pool_alloc); | 303 | EXPORT_SYMBOL(gen_pool_alloc); |
| 180 | 304 | ||
| @@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc); | |||
| 184 | * @addr: starting address of memory to free back to pool | 308 | * @addr: starting address of memory to free back to pool |
| 185 | * @size: size in bytes of memory to free | 309 | * @size: size in bytes of memory to free |
| 186 | * | 310 | * |
| 187 | * Free previously allocated special memory back to the specified pool. | 311 | * Free previously allocated special memory back to the specified |
| 312 | * pool. Can not be used in NMI handler on architectures without | ||
| 313 | * NMI-safe cmpxchg implementation. | ||
| 188 | */ | 314 | */ |
| 189 | void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) | 315 | void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) |
| 190 | { | 316 | { |
| 191 | struct list_head *_chunk; | ||
| 192 | struct gen_pool_chunk *chunk; | 317 | struct gen_pool_chunk *chunk; |
| 193 | unsigned long flags; | ||
| 194 | int order = pool->min_alloc_order; | 318 | int order = pool->min_alloc_order; |
| 195 | int bit, nbits; | 319 | int start_bit, nbits, remain; |
| 196 | 320 | ||
| 197 | nbits = (size + (1UL << order) - 1) >> order; | 321 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
| 198 | 322 | BUG_ON(in_nmi()); | |
| 199 | read_lock(&pool->lock); | 323 | #endif |
| 200 | list_for_each(_chunk, &pool->chunks) { | ||
| 201 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | ||
| 202 | 324 | ||
| 325 | nbits = (size + (1UL << order) - 1) >> order; | ||
| 326 | rcu_read_lock(); | ||
| 327 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { | ||
| 203 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { | 328 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { |
| 204 | BUG_ON(addr + size > chunk->end_addr); | 329 | BUG_ON(addr + size > chunk->end_addr); |
| 205 | spin_lock_irqsave(&chunk->lock, flags); | 330 | start_bit = (addr - chunk->start_addr) >> order; |
| 206 | bit = (addr - chunk->start_addr) >> order; | 331 | remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); |
| 207 | while (nbits--) | 332 | BUG_ON(remain); |
| 208 | __clear_bit(bit++, chunk->bits); | 333 | size = nbits << order; |
| 209 | spin_unlock_irqrestore(&chunk->lock, flags); | 334 | atomic_add(size, &chunk->avail); |
| 210 | break; | 335 | rcu_read_unlock(); |
| 336 | return; | ||
| 211 | } | 337 | } |
| 212 | } | 338 | } |
| 213 | BUG_ON(nbits > 0); | 339 | rcu_read_unlock(); |
| 214 | read_unlock(&pool->lock); | 340 | BUG(); |
| 215 | } | 341 | } |
| 216 | EXPORT_SYMBOL(gen_pool_free); | 342 | EXPORT_SYMBOL(gen_pool_free); |
| 343 | |||
| 344 | /** | ||
| 345 | * gen_pool_for_each_chunk - call func for every chunk of generic memory pool | ||
| 346 | * @pool: the generic memory pool | ||
| 347 | * @func: func to call | ||
| 348 | * @data: additional data used by @func | ||
| 349 | * | ||
| 350 | * Call @func for every chunk of generic memory pool. The @func is | ||
| 351 | * called with rcu_read_lock held. | ||
| 352 | */ | ||
| 353 | void gen_pool_for_each_chunk(struct gen_pool *pool, | ||
| 354 | void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data), | ||
| 355 | void *data) | ||
| 356 | { | ||
| 357 | struct gen_pool_chunk *chunk; | ||
| 358 | |||
| 359 | rcu_read_lock(); | ||
| 360 | list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) | ||
| 361 | func(pool, chunk, data); | ||
| 362 | rcu_read_unlock(); | ||
| 363 | } | ||
| 364 | EXPORT_SYMBOL(gen_pool_for_each_chunk); | ||
| 365 | |||
| 366 | /** | ||
| 367 | * gen_pool_avail - get available free space of the pool | ||
| 368 | * @pool: pool to get available free space | ||
| 369 | * | ||
| 370 | * Return available free space of the specified pool. | ||
| 371 | */ | ||
| 372 | size_t gen_pool_avail(struct gen_pool *pool) | ||
| 373 | { | ||
| 374 | struct gen_pool_chunk *chunk; | ||
| 375 | size_t avail = 0; | ||
| 376 | |||
| 377 | rcu_read_lock(); | ||
| 378 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) | ||
| 379 | avail += atomic_read(&chunk->avail); | ||
| 380 | rcu_read_unlock(); | ||
| 381 | return avail; | ||
| 382 | } | ||
| 383 | EXPORT_SYMBOL_GPL(gen_pool_avail); | ||
| 384 | |||
| 385 | /** | ||
| 386 | * gen_pool_size - get size in bytes of memory managed by the pool | ||
| 387 | * @pool: pool to get size | ||
| 388 | * | ||
| 389 | * Return size in bytes of memory managed by the pool. | ||
| 390 | */ | ||
| 391 | size_t gen_pool_size(struct gen_pool *pool) | ||
| 392 | { | ||
| 393 | struct gen_pool_chunk *chunk; | ||
| 394 | size_t size = 0; | ||
| 395 | |||
| 396 | rcu_read_lock(); | ||
| 397 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) | ||
| 398 | size += chunk->end_addr - chunk->start_addr; | ||
| 399 | rcu_read_unlock(); | ||
| 400 | return size; | ||
| 401 | } | ||
| 402 | EXPORT_SYMBOL_GPL(gen_pool_size); | ||
diff --git a/lib/llist.c b/lib/llist.c new file mode 100644 index 000000000000..da445724fa1f --- /dev/null +++ b/lib/llist.c | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | /* | ||
| 2 | * Lock-less NULL terminated single linked list | ||
| 3 | * | ||
| 4 | * The basic atomic operation of this list is cmpxchg on long. On | ||
| 5 | * architectures that don't have NMI-safe cmpxchg implementation, the | ||
| 6 | * list can NOT be used in NMI handler. So code uses the list in NMI | ||
| 7 | * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
| 8 | * | ||
| 9 | * Copyright 2010,2011 Intel Corp. | ||
| 10 | * Author: Huang Ying <ying.huang@intel.com> | ||
| 11 | * | ||
| 12 | * This program is free software; you can redistribute it and/or | ||
| 13 | * modify it under the terms of the GNU General Public License version | ||
| 14 | * 2 as published by the Free Software Foundation; | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 24 | */ | ||
| 25 | #include <linux/kernel.h> | ||
| 26 | #include <linux/module.h> | ||
| 27 | #include <linux/interrupt.h> | ||
| 28 | #include <linux/llist.h> | ||
| 29 | |||
| 30 | #include <asm/system.h> | ||
| 31 | |||
| 32 | /** | ||
| 33 | * llist_add - add a new entry | ||
| 34 | * @new: new entry to be added | ||
| 35 | * @head: the head for your lock-less list | ||
| 36 | */ | ||
| 37 | void llist_add(struct llist_node *new, struct llist_head *head) | ||
| 38 | { | ||
| 39 | struct llist_node *entry, *old_entry; | ||
| 40 | |||
| 41 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 42 | BUG_ON(in_nmi()); | ||
| 43 | #endif | ||
| 44 | |||
| 45 | entry = head->first; | ||
| 46 | do { | ||
| 47 | old_entry = entry; | ||
| 48 | new->next = entry; | ||
| 49 | cpu_relax(); | ||
| 50 | } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); | ||
| 51 | } | ||
| 52 | EXPORT_SYMBOL_GPL(llist_add); | ||
| 53 | |||
| 54 | /** | ||
| 55 | * llist_add_batch - add several linked entries in batch | ||
| 56 | * @new_first: first entry in batch to be added | ||
| 57 | * @new_last: last entry in batch to be added | ||
| 58 | * @head: the head for your lock-less list | ||
| 59 | */ | ||
| 60 | void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, | ||
| 61 | struct llist_head *head) | ||
| 62 | { | ||
| 63 | struct llist_node *entry, *old_entry; | ||
| 64 | |||
| 65 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 66 | BUG_ON(in_nmi()); | ||
| 67 | #endif | ||
| 68 | |||
| 69 | entry = head->first; | ||
| 70 | do { | ||
| 71 | old_entry = entry; | ||
| 72 | new_last->next = entry; | ||
| 73 | cpu_relax(); | ||
| 74 | } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry); | ||
| 75 | } | ||
| 76 | EXPORT_SYMBOL_GPL(llist_add_batch); | ||
| 77 | |||
| 78 | /** | ||
| 79 | * llist_del_first - delete the first entry of lock-less list | ||
| 80 | * @head: the head for your lock-less list | ||
| 81 | * | ||
| 82 | * If list is empty, return NULL, otherwise, return the first entry | ||
| 83 | * deleted, this is the newest added one. | ||
| 84 | * | ||
| 85 | * Only one llist_del_first user can be used simultaneously with | ||
| 86 | * multiple llist_add users without lock. Because otherwise | ||
| 87 | * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, | ||
| 88 | * llist_add) sequence in another user may change @head->first->next, | ||
| 89 | * but keep @head->first. If multiple consumers are needed, please | ||
| 90 | * use llist_del_all or use lock between consumers. | ||
| 91 | */ | ||
| 92 | struct llist_node *llist_del_first(struct llist_head *head) | ||
| 93 | { | ||
| 94 | struct llist_node *entry, *old_entry, *next; | ||
| 95 | |||
| 96 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 97 | BUG_ON(in_nmi()); | ||
| 98 | #endif | ||
| 99 | |||
| 100 | entry = head->first; | ||
| 101 | do { | ||
| 102 | if (entry == NULL) | ||
| 103 | return NULL; | ||
| 104 | old_entry = entry; | ||
| 105 | next = entry->next; | ||
| 106 | cpu_relax(); | ||
| 107 | } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry); | ||
| 108 | |||
| 109 | return entry; | ||
| 110 | } | ||
| 111 | EXPORT_SYMBOL_GPL(llist_del_first); | ||
| 112 | |||
| 113 | /** | ||
| 114 | * llist_del_all - delete all entries from lock-less list | ||
| 115 | * @head: the head of lock-less list to delete all entries | ||
| 116 | * | ||
| 117 | * If list is empty, return NULL, otherwise, delete all entries and | ||
| 118 | * return the pointer to the first entry. The order of entries | ||
| 119 | * deleted is from the newest to the oldest added one. | ||
| 120 | */ | ||
| 121 | struct llist_node *llist_del_all(struct llist_head *head) | ||
| 122 | { | ||
| 123 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
| 124 | BUG_ON(in_nmi()); | ||
| 125 | #endif | ||
| 126 | |||
| 127 | return xchg(&head->first, NULL); | ||
| 128 | } | ||
| 129 | EXPORT_SYMBOL_GPL(llist_del_all); | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 740c4f52059c..2b43ba051ac9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -53,6 +53,7 @@ | |||
| 53 | #include <linux/hugetlb.h> | 53 | #include <linux/hugetlb.h> |
| 54 | #include <linux/memory_hotplug.h> | 54 | #include <linux/memory_hotplug.h> |
| 55 | #include <linux/mm_inline.h> | 55 | #include <linux/mm_inline.h> |
| 56 | #include <linux/kfifo.h> | ||
| 56 | #include "internal.h" | 57 | #include "internal.h" |
| 57 | 58 | ||
| 58 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 59 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
| @@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno) | |||
| 1178 | __memory_failure(pfn, trapno, 0); | 1179 | __memory_failure(pfn, trapno, 0); |
| 1179 | } | 1180 | } |
| 1180 | 1181 | ||
| 1182 | #define MEMORY_FAILURE_FIFO_ORDER 4 | ||
| 1183 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) | ||
| 1184 | |||
| 1185 | struct memory_failure_entry { | ||
| 1186 | unsigned long pfn; | ||
| 1187 | int trapno; | ||
| 1188 | int flags; | ||
| 1189 | }; | ||
| 1190 | |||
| 1191 | struct memory_failure_cpu { | ||
| 1192 | DECLARE_KFIFO(fifo, struct memory_failure_entry, | ||
| 1193 | MEMORY_FAILURE_FIFO_SIZE); | ||
| 1194 | spinlock_t lock; | ||
| 1195 | struct work_struct work; | ||
| 1196 | }; | ||
| 1197 | |||
| 1198 | static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu); | ||
| 1199 | |||
| 1200 | /** | ||
| 1201 | * memory_failure_queue - Schedule handling memory failure of a page. | ||
| 1202 | * @pfn: Page Number of the corrupted page | ||
| 1203 | * @trapno: Trap number reported in the signal to user space. | ||
| 1204 | * @flags: Flags for memory failure handling | ||
| 1205 | * | ||
| 1206 | * This function is called by the low level hardware error handler | ||
| 1207 | * when it detects hardware memory corruption of a page. It schedules | ||
| 1208 | * the recovering of error page, including dropping pages, killing | ||
| 1209 | * processes etc. | ||
| 1210 | * | ||
| 1211 | * The function is primarily of use for corruptions that | ||
| 1212 | * happen outside the current execution context (e.g. when | ||
| 1213 | * detected by a background scrubber) | ||
| 1214 | * | ||
| 1215 | * Can run in IRQ context. | ||
| 1216 | */ | ||
| 1217 | void memory_failure_queue(unsigned long pfn, int trapno, int flags) | ||
| 1218 | { | ||
| 1219 | struct memory_failure_cpu *mf_cpu; | ||
| 1220 | unsigned long proc_flags; | ||
| 1221 | struct memory_failure_entry entry = { | ||
| 1222 | .pfn = pfn, | ||
| 1223 | .trapno = trapno, | ||
| 1224 | .flags = flags, | ||
| 1225 | }; | ||
| 1226 | |||
| 1227 | mf_cpu = &get_cpu_var(memory_failure_cpu); | ||
| 1228 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); | ||
| 1229 | if (kfifo_put(&mf_cpu->fifo, &entry)) | ||
| 1230 | schedule_work_on(smp_processor_id(), &mf_cpu->work); | ||
| 1231 | else | ||
| 1232 | pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n", | ||
| 1233 | pfn); | ||
| 1234 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | ||
| 1235 | put_cpu_var(memory_failure_cpu); | ||
| 1236 | } | ||
| 1237 | EXPORT_SYMBOL_GPL(memory_failure_queue); | ||
| 1238 | |||
| 1239 | static void memory_failure_work_func(struct work_struct *work) | ||
| 1240 | { | ||
| 1241 | struct memory_failure_cpu *mf_cpu; | ||
| 1242 | struct memory_failure_entry entry = { 0, }; | ||
| 1243 | unsigned long proc_flags; | ||
| 1244 | int gotten; | ||
| 1245 | |||
| 1246 | mf_cpu = &__get_cpu_var(memory_failure_cpu); | ||
| 1247 | for (;;) { | ||
| 1248 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); | ||
| 1249 | gotten = kfifo_get(&mf_cpu->fifo, &entry); | ||
| 1250 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | ||
| 1251 | if (!gotten) | ||
| 1252 | break; | ||
| 1253 | __memory_failure(entry.pfn, entry.trapno, entry.flags); | ||
| 1254 | } | ||
| 1255 | } | ||
| 1256 | |||
| 1257 | static int __init memory_failure_init(void) | ||
| 1258 | { | ||
| 1259 | struct memory_failure_cpu *mf_cpu; | ||
| 1260 | int cpu; | ||
| 1261 | |||
| 1262 | for_each_possible_cpu(cpu) { | ||
| 1263 | mf_cpu = &per_cpu(memory_failure_cpu, cpu); | ||
| 1264 | spin_lock_init(&mf_cpu->lock); | ||
| 1265 | INIT_KFIFO(mf_cpu->fifo); | ||
| 1266 | INIT_WORK(&mf_cpu->work, memory_failure_work_func); | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | return 0; | ||
| 1270 | } | ||
| 1271 | core_initcall(memory_failure_init); | ||
| 1272 | |||
| 1181 | /** | 1273 | /** |
| 1182 | * unpoison_memory - Unpoison a previously poisoned page | 1274 | * unpoison_memory - Unpoison a previously poisoned page |
| 1183 | * @pfn: Page number of the to be unpoisoned page | 1275 | * @pfn: Page number of the to be unpoisoned page |
