diff options
35 files changed, 1172 insertions, 135 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt index dfab71848dc8..5cc699ba5453 100644 --- a/Documentation/acpi/apei/einj.txt +++ b/Documentation/acpi/apei/einj.txt | |||
@@ -48,12 +48,19 @@ directory apei/einj. The following files are provided. | |||
48 | - param1 | 48 | - param1 |
49 | This file is used to set the first error parameter value. Effect of | 49 | This file is used to set the first error parameter value. Effect of |
50 | parameter depends on error_type specified. For memory error, this is | 50 | parameter depends on error_type specified. For memory error, this is |
51 | physical memory address. | 51 | physical memory address. Only available if param_extension module |
52 | parameter is specified. | ||
52 | 53 | ||
53 | - param2 | 54 | - param2 |
54 | This file is used to set the second error parameter value. Effect of | 55 | This file is used to set the second error parameter value. Effect of |
55 | parameter depends on error_type specified. For memory error, this is | 56 | parameter depends on error_type specified. For memory error, this is |
56 | physical memory address mask. | 57 | physical memory address mask. Only available if param_extension |
58 | module parameter is specified. | ||
59 | |||
60 | Injecting parameter support is a BIOS version specific extension, that | ||
61 | is, it only works on some BIOS version. If you want to use it, please | ||
62 | make sure your BIOS version has the proper support and specify | ||
63 | "param_extension=y" in module parameter. | ||
57 | 64 | ||
58 | For more information about EINJ, please refer to ACPI specification | 65 | For more information about EINJ, please refer to ACPI specification |
59 | version 4.0, section 17.5. | 66 | version 4.0, section 17.5. |
diff --git a/arch/Kconfig b/arch/Kconfig index 26b0e2397a57..4b0669cbb3b0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX | |||
178 | config HAVE_RCU_TABLE_FREE | 178 | config HAVE_RCU_TABLE_FREE |
179 | bool | 179 | bool |
180 | 180 | ||
181 | config ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
182 | bool | ||
183 | |||
181 | source "kernel/gcov/Kconfig" | 184 | source "kernel/gcov/Kconfig" |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index ca2da8da6e9c..60cde53d266c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
@@ -14,6 +14,7 @@ config ALPHA | |||
14 | select AUTO_IRQ_AFFINITY if SMP | 14 | select AUTO_IRQ_AFFINITY if SMP |
15 | select GENERIC_IRQ_SHOW | 15 | select GENERIC_IRQ_SHOW |
16 | select ARCH_WANT_OPTIONAL_GPIOLIB | 16 | select ARCH_WANT_OPTIONAL_GPIOLIB |
17 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
17 | help | 18 | help |
18 | The Alpha is a 64-bit general-purpose processor designed and | 19 | The Alpha is a 64-bit general-purpose processor designed and |
19 | marketed by the Digital Equipment Corporation of blessed memory, | 20 | marketed by the Digital Equipment Corporation of blessed memory, |
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e9d689b7c833..197e96f70405 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig | |||
@@ -10,6 +10,7 @@ config AVR32 | |||
10 | select GENERIC_IRQ_PROBE | 10 | select GENERIC_IRQ_PROBE |
11 | select HARDIRQS_SW_RESEND | 11 | select HARDIRQS_SW_RESEND |
12 | select GENERIC_IRQ_SHOW | 12 | select GENERIC_IRQ_SHOW |
13 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
13 | help | 14 | help |
14 | AVR32 is a high-performance 32-bit RISC microprocessor core, | 15 | AVR32 is a high-performance 32-bit RISC microprocessor core, |
15 | designed for cost-sensitive embedded applications, with particular | 16 | designed for cost-sensitive embedded applications, with particular |
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index cb884e489425..bad27a6ff407 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig | |||
@@ -7,6 +7,7 @@ config FRV | |||
7 | select HAVE_PERF_EVENTS | 7 | select HAVE_PERF_EVENTS |
8 | select HAVE_GENERIC_HARDIRQS | 8 | select HAVE_GENERIC_HARDIRQS |
9 | select GENERIC_IRQ_SHOW | 9 | select GENERIC_IRQ_SHOW |
10 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
10 | 11 | ||
11 | config ZONE_DMA | 12 | config ZONE_DMA |
12 | bool | 13 | bool |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 64c7ab7e7a81..124854714958 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -28,6 +28,7 @@ config IA64 | |||
28 | select IRQ_PER_CPU | 28 | select IRQ_PER_CPU |
29 | select GENERIC_IRQ_SHOW | 29 | select GENERIC_IRQ_SHOW |
30 | select ARCH_WANT_OPTIONAL_GPIOLIB | 30 | select ARCH_WANT_OPTIONAL_GPIOLIB |
31 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
31 | default y | 32 | default y |
32 | help | 33 | help |
33 | The Itanium Processor Family is Intel's 64-bit successor to | 34 | The Itanium Processor Family is Intel's 64-bit successor to |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 284cd3771eaa..9e8ee9d2b8ca 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
@@ -6,6 +6,7 @@ config M68K | |||
6 | select GENERIC_ATOMIC64 if MMU | 6 | select GENERIC_ATOMIC64 if MMU |
7 | select HAVE_GENERIC_HARDIRQS if !MMU | 7 | select HAVE_GENERIC_HARDIRQS if !MMU |
8 | select GENERIC_IRQ_SHOW if !MMU | 8 | select GENERIC_IRQ_SHOW if !MMU |
9 | select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS | ||
9 | 10 | ||
10 | config RWSEM_GENERIC_SPINLOCK | 11 | config RWSEM_GENERIC_SPINLOCK |
11 | bool | 12 | bool |
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 65adc86a230e..e077b0bf56ca 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig | |||
@@ -15,6 +15,7 @@ config PARISC | |||
15 | select HAVE_GENERIC_HARDIRQS | 15 | select HAVE_GENERIC_HARDIRQS |
16 | select GENERIC_IRQ_PROBE | 16 | select GENERIC_IRQ_PROBE |
17 | select IRQ_PER_CPU | 17 | select IRQ_PER_CPU |
18 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
18 | 19 | ||
19 | help | 20 | help |
20 | The PA-RISC microprocessor is designed by Hewlett-Packard and used | 21 | The PA-RISC microprocessor is designed by Hewlett-Packard and used |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 374c475e56a3..6926b61acfea 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -136,6 +136,7 @@ config PPC | |||
136 | select HAVE_SYSCALL_TRACEPOINTS | 136 | select HAVE_SYSCALL_TRACEPOINTS |
137 | select HAVE_BPF_JIT if (PPC64 && NET) | 137 | select HAVE_BPF_JIT if (PPC64 && NET) |
138 | select HAVE_ARCH_JUMP_LABEL | 138 | select HAVE_ARCH_JUMP_LABEL |
139 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
139 | 140 | ||
140 | config EARLY_PRINTK | 141 | config EARLY_PRINTK |
141 | bool | 142 | bool |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c03fef7a9c22..0f98bbddade5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -81,6 +81,7 @@ config S390 | |||
81 | select INIT_ALL_POSSIBLE | 81 | select INIT_ALL_POSSIBLE |
82 | select HAVE_IRQ_WORK | 82 | select HAVE_IRQ_WORK |
83 | select HAVE_PERF_EVENTS | 83 | select HAVE_PERF_EVENTS |
84 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
84 | select HAVE_KERNEL_GZIP | 85 | select HAVE_KERNEL_GZIP |
85 | select HAVE_KERNEL_BZIP2 | 86 | select HAVE_KERNEL_BZIP2 |
86 | select HAVE_KERNEL_LZMA | 87 | select HAVE_KERNEL_LZMA |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 748ff1920068..ff9177c8f643 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -11,6 +11,7 @@ config SUPERH | |||
11 | select HAVE_DMA_ATTRS | 11 | select HAVE_DMA_ATTRS |
12 | select HAVE_IRQ_WORK | 12 | select HAVE_IRQ_WORK |
13 | select HAVE_PERF_EVENTS | 13 | select HAVE_PERF_EVENTS |
14 | select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) | ||
14 | select PERF_USE_VMALLOC | 15 | select PERF_USE_VMALLOC |
15 | select HAVE_KERNEL_GZIP | 16 | select HAVE_KERNEL_GZIP |
16 | select HAVE_KERNEL_BZIP2 | 17 | select HAVE_KERNEL_BZIP2 |
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1074dddcb104..42c67beadcae 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -54,6 +54,7 @@ config SPARC64 | |||
54 | select HAVE_PERF_EVENTS | 54 | select HAVE_PERF_EVENTS |
55 | select PERF_USE_VMALLOC | 55 | select PERF_USE_VMALLOC |
56 | select IRQ_PREFLOW_FASTEOI | 56 | select IRQ_PREFLOW_FASTEOI |
57 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
57 | 58 | ||
58 | config ARCH_DEFCONFIG | 59 | config ARCH_DEFCONFIG |
59 | string | 60 | string |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 0249b8b4db54..b30f71ac0d06 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -12,6 +12,7 @@ config TILE | |||
12 | select GENERIC_PENDING_IRQ if SMP | 12 | select GENERIC_PENDING_IRQ if SMP |
13 | select GENERIC_IRQ_SHOW | 13 | select GENERIC_IRQ_SHOW |
14 | select SYS_HYPERVISOR | 14 | select SYS_HYPERVISOR |
15 | select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 | ||
15 | 16 | ||
16 | # FIXME: investigate whether we need/want these options. | 17 | # FIXME: investigate whether we need/want these options. |
17 | # select HAVE_IOREMAP_PROT | 18 | # select HAVE_IOREMAP_PROT |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7cf916fc1ce7..6a47bb22657f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -72,6 +72,7 @@ config X86 | |||
72 | select USE_GENERIC_SMP_HELPERS if SMP | 72 | select USE_GENERIC_SMP_HELPERS if SMP |
73 | select HAVE_BPF_JIT if (X86_64 && NET) | 73 | select HAVE_BPF_JIT if (X86_64 && NET) |
74 | select CLKEVT_I8253 | 74 | select CLKEVT_I8253 |
75 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
75 | 76 | ||
76 | config INSTRUCTION_DECODER | 77 | config INSTRUCTION_DECODER |
77 | def_bool (KPROBES || PERF_EVENTS) | 78 | def_bool (KPROBES || PERF_EVENTS) |
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index f739a70b1c70..c34aa51af4ee 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig | |||
@@ -10,9 +10,11 @@ config ACPI_APEI | |||
10 | error injection. | 10 | error injection. |
11 | 11 | ||
12 | config ACPI_APEI_GHES | 12 | config ACPI_APEI_GHES |
13 | tristate "APEI Generic Hardware Error Source" | 13 | bool "APEI Generic Hardware Error Source" |
14 | depends on ACPI_APEI && X86 | 14 | depends on ACPI_APEI && X86 |
15 | select ACPI_HED | 15 | select ACPI_HED |
16 | select LLIST | ||
17 | select GENERIC_ALLOCATOR | ||
16 | help | 18 | help |
17 | Generic Hardware Error Source provides a way to report | 19 | Generic Hardware Error Source provides a way to report |
18 | platform hardware errors (such as that from chipset). It | 20 | platform hardware errors (such as that from chipset). It |
@@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER | |||
30 | PCIe AER errors may be reported via APEI firmware first mode. | 32 | PCIe AER errors may be reported via APEI firmware first mode. |
31 | Turn on this option to enable the corresponding support. | 33 | Turn on this option to enable the corresponding support. |
32 | 34 | ||
35 | config ACPI_APEI_MEMORY_FAILURE | ||
36 | bool "APEI memory error recovering support" | ||
37 | depends on ACPI_APEI && MEMORY_FAILURE | ||
38 | help | ||
39 | Memory errors may be reported via APEI firmware first mode. | ||
40 | Turn on this option to enable the memory recovering support. | ||
41 | |||
33 | config ACPI_APEI_EINJ | 42 | config ACPI_APEI_EINJ |
34 | tristate "APEI Error INJection (EINJ)" | 43 | tristate "APEI Error INJection (EINJ)" |
35 | depends on ACPI_APEI && DEBUG_FS | 44 | depends on ACPI_APEI && DEBUG_FS |
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 4a904a4bf05f..8041248fce9b 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c | |||
@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop); | |||
157 | * Interpret the specified action. Go through whole action table, | 157 | * Interpret the specified action. Go through whole action table, |
158 | * execute all instructions belong to the action. | 158 | * execute all instructions belong to the action. |
159 | */ | 159 | */ |
160 | int apei_exec_run(struct apei_exec_context *ctx, u8 action) | 160 | int __apei_exec_run(struct apei_exec_context *ctx, u8 action, |
161 | bool optional) | ||
161 | { | 162 | { |
162 | int rc; | 163 | int rc = -ENOENT; |
163 | u32 i, ip; | 164 | u32 i, ip; |
164 | struct acpi_whea_header *entry; | 165 | struct acpi_whea_header *entry; |
165 | apei_exec_ins_func_t run; | 166 | apei_exec_ins_func_t run; |
@@ -198,9 +199,9 @@ rewind: | |||
198 | goto rewind; | 199 | goto rewind; |
199 | } | 200 | } |
200 | 201 | ||
201 | return 0; | 202 | return !optional && rc < 0 ? rc : 0; |
202 | } | 203 | } |
203 | EXPORT_SYMBOL_GPL(apei_exec_run); | 204 | EXPORT_SYMBOL_GPL(__apei_exec_run); |
204 | 205 | ||
205 | typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, | 206 | typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, |
206 | struct acpi_whea_header *entry, | 207 | struct acpi_whea_header *entry, |
@@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void) | |||
603 | return dapei; | 604 | return dapei; |
604 | } | 605 | } |
605 | EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); | 606 | EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); |
607 | |||
608 | int apei_osc_setup(void) | ||
609 | { | ||
610 | static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; | ||
611 | acpi_handle handle; | ||
612 | u32 capbuf[3]; | ||
613 | struct acpi_osc_context context = { | ||
614 | .uuid_str = whea_uuid_str, | ||
615 | .rev = 1, | ||
616 | .cap.length = sizeof(capbuf), | ||
617 | .cap.pointer = capbuf, | ||
618 | }; | ||
619 | |||
620 | capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; | ||
621 | capbuf[OSC_SUPPORT_TYPE] = 0; | ||
622 | capbuf[OSC_CONTROL_TYPE] = 0; | ||
623 | |||
624 | if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) | ||
625 | || ACPI_FAILURE(acpi_run_osc(handle, &context))) | ||
626 | return -EIO; | ||
627 | else { | ||
628 | kfree(context.ret.pointer); | ||
629 | return 0; | ||
630 | } | ||
631 | } | ||
632 | EXPORT_SYMBOL_GPL(apei_osc_setup); | ||
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index ef0581f2094d..f57050e7a5e7 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h | |||
@@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) | |||
50 | return ctx->value; | 50 | return ctx->value; |
51 | } | 51 | } |
52 | 52 | ||
53 | int apei_exec_run(struct apei_exec_context *ctx, u8 action); | 53 | int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional); |
54 | |||
55 | static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action) | ||
56 | { | ||
57 | return __apei_exec_run(ctx, action, 0); | ||
58 | } | ||
59 | |||
60 | /* It is optional whether the firmware provides the action */ | ||
61 | static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action) | ||
62 | { | ||
63 | return __apei_exec_run(ctx, action, 1); | ||
64 | } | ||
54 | 65 | ||
55 | /* Common instruction implementation */ | 66 | /* Common instruction implementation */ |
56 | 67 | ||
@@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx, | |||
113 | const struct acpi_hest_generic_status *estatus); | 124 | const struct acpi_hest_generic_status *estatus); |
114 | int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); | 125 | int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); |
115 | int apei_estatus_check(const struct acpi_hest_generic_status *estatus); | 126 | int apei_estatus_check(const struct acpi_hest_generic_status *estatus); |
127 | |||
128 | int apei_osc_setup(void); | ||
116 | #endif | 129 | #endif |
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index f74b2ea11f21..589b96c38704 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c | |||
@@ -46,7 +46,8 @@ | |||
46 | * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the | 46 | * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the |
47 | * EINJ table through an unpublished extension. Use with caution as | 47 | * EINJ table through an unpublished extension. Use with caution as |
48 | * most will ignore the parameter and make their own choice of address | 48 | * most will ignore the parameter and make their own choice of address |
49 | * for error injection. | 49 | * for error injection. This extension is used only if |
50 | * param_extension module parameter is specified. | ||
50 | */ | 51 | */ |
51 | struct einj_parameter { | 52 | struct einj_parameter { |
52 | u64 type; | 53 | u64 type; |
@@ -65,6 +66,9 @@ struct einj_parameter { | |||
65 | ((struct acpi_whea_header *)((char *)(tab) + \ | 66 | ((struct acpi_whea_header *)((char *)(tab) + \ |
66 | sizeof(struct acpi_table_einj))) | 67 | sizeof(struct acpi_table_einj))) |
67 | 68 | ||
69 | static bool param_extension; | ||
70 | module_param(param_extension, bool, 0); | ||
71 | |||
68 | static struct acpi_table_einj *einj_tab; | 72 | static struct acpi_table_einj *einj_tab; |
69 | 73 | ||
70 | static struct apei_resources einj_resources; | 74 | static struct apei_resources einj_resources; |
@@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) | |||
285 | 289 | ||
286 | einj_exec_ctx_init(&ctx); | 290 | einj_exec_ctx_init(&ctx); |
287 | 291 | ||
288 | rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); | 292 | rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION); |
289 | if (rc) | 293 | if (rc) |
290 | return rc; | 294 | return rc; |
291 | apei_exec_ctx_set_input(&ctx, type); | 295 | apei_exec_ctx_set_input(&ctx, type); |
@@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) | |||
323 | rc = __einj_error_trigger(trigger_paddr); | 327 | rc = __einj_error_trigger(trigger_paddr); |
324 | if (rc) | 328 | if (rc) |
325 | return rc; | 329 | return rc; |
326 | rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); | 330 | rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); |
327 | 331 | ||
328 | return rc; | 332 | return rc; |
329 | } | 333 | } |
@@ -489,14 +493,6 @@ static int __init einj_init(void) | |||
489 | einj_debug_dir, NULL, &error_type_fops); | 493 | einj_debug_dir, NULL, &error_type_fops); |
490 | if (!fentry) | 494 | if (!fentry) |
491 | goto err_cleanup; | 495 | goto err_cleanup; |
492 | fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, | ||
493 | einj_debug_dir, &error_param1); | ||
494 | if (!fentry) | ||
495 | goto err_cleanup; | ||
496 | fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, | ||
497 | einj_debug_dir, &error_param2); | ||
498 | if (!fentry) | ||
499 | goto err_cleanup; | ||
500 | fentry = debugfs_create_file("error_inject", S_IWUSR, | 496 | fentry = debugfs_create_file("error_inject", S_IWUSR, |
501 | einj_debug_dir, NULL, &error_inject_fops); | 497 | einj_debug_dir, NULL, &error_inject_fops); |
502 | if (!fentry) | 498 | if (!fentry) |
@@ -513,12 +509,23 @@ static int __init einj_init(void) | |||
513 | rc = apei_exec_pre_map_gars(&ctx); | 509 | rc = apei_exec_pre_map_gars(&ctx); |
514 | if (rc) | 510 | if (rc) |
515 | goto err_release; | 511 | goto err_release; |
516 | param_paddr = einj_get_parameter_address(); | 512 | if (param_extension) { |
517 | if (param_paddr) { | 513 | param_paddr = einj_get_parameter_address(); |
518 | einj_param = ioremap(param_paddr, sizeof(*einj_param)); | 514 | if (param_paddr) { |
519 | rc = -ENOMEM; | 515 | einj_param = ioremap(param_paddr, sizeof(*einj_param)); |
520 | if (!einj_param) | 516 | rc = -ENOMEM; |
521 | goto err_unmap; | 517 | if (!einj_param) |
518 | goto err_unmap; | ||
519 | fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, | ||
520 | einj_debug_dir, &error_param1); | ||
521 | if (!fentry) | ||
522 | goto err_unmap; | ||
523 | fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, | ||
524 | einj_debug_dir, &error_param2); | ||
525 | if (!fentry) | ||
526 | goto err_unmap; | ||
527 | } else | ||
528 | pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); | ||
522 | } | 529 | } |
523 | 530 | ||
524 | pr_info(EINJ_PFX "Error INJection is initialized.\n"); | 531 | pr_info(EINJ_PFX "Error INJection is initialized.\n"); |
@@ -526,6 +533,8 @@ static int __init einj_init(void) | |||
526 | return 0; | 533 | return 0; |
527 | 534 | ||
528 | err_unmap: | 535 | err_unmap: |
536 | if (einj_param) | ||
537 | iounmap(einj_param); | ||
529 | apei_exec_post_unmap_gars(&ctx); | 538 | apei_exec_post_unmap_gars(&ctx); |
530 | err_release: | 539 | err_release: |
531 | apei_resources_release(&einj_resources); | 540 | apei_resources_release(&einj_resources); |
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index a4cfb64c86a1..903549df809b 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c | |||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | #define ERST_DBG_PFX "ERST DBG: " | 34 | #define ERST_DBG_PFX "ERST DBG: " |
35 | 35 | ||
36 | #define ERST_DBG_RECORD_LEN_MAX 4096 | 36 | #define ERST_DBG_RECORD_LEN_MAX 0x4000 |
37 | 37 | ||
38 | static void *erst_dbg_buf; | 38 | static void *erst_dbg_buf; |
39 | static unsigned int erst_dbg_buf_len; | 39 | static unsigned int erst_dbg_buf_len; |
@@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = { | |||
213 | 213 | ||
214 | static __init int erst_dbg_init(void) | 214 | static __init int erst_dbg_init(void) |
215 | { | 215 | { |
216 | if (erst_disable) { | ||
217 | pr_info(ERST_DBG_PFX "ERST support is disabled.\n"); | ||
218 | return -ENODEV; | ||
219 | } | ||
216 | return misc_register(&erst_dbg_dev); | 220 | return misc_register(&erst_dbg_dev); |
217 | } | 221 | } |
218 | 222 | ||
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6053f4780df9..2ca59dc69f7f 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c | |||
@@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset) | |||
642 | int rc; | 642 | int rc; |
643 | 643 | ||
644 | erst_exec_ctx_init(&ctx); | 644 | erst_exec_ctx_init(&ctx); |
645 | rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); | 645 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE); |
646 | if (rc) | 646 | if (rc) |
647 | return rc; | 647 | return rc; |
648 | apei_exec_ctx_set_input(&ctx, offset); | 648 | apei_exec_ctx_set_input(&ctx, offset); |
@@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset) | |||
666 | if (rc) | 666 | if (rc) |
667 | return rc; | 667 | return rc; |
668 | val = apei_exec_ctx_get_output(&ctx); | 668 | val = apei_exec_ctx_get_output(&ctx); |
669 | rc = apei_exec_run(&ctx, ACPI_ERST_END); | 669 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); |
670 | if (rc) | 670 | if (rc) |
671 | return rc; | 671 | return rc; |
672 | 672 | ||
@@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) | |||
681 | int rc; | 681 | int rc; |
682 | 682 | ||
683 | erst_exec_ctx_init(&ctx); | 683 | erst_exec_ctx_init(&ctx); |
684 | rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); | 684 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ); |
685 | if (rc) | 685 | if (rc) |
686 | return rc; | 686 | return rc; |
687 | apei_exec_ctx_set_input(&ctx, offset); | 687 | apei_exec_ctx_set_input(&ctx, offset); |
@@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) | |||
709 | if (rc) | 709 | if (rc) |
710 | return rc; | 710 | return rc; |
711 | val = apei_exec_ctx_get_output(&ctx); | 711 | val = apei_exec_ctx_get_output(&ctx); |
712 | rc = apei_exec_run(&ctx, ACPI_ERST_END); | 712 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); |
713 | if (rc) | 713 | if (rc) |
714 | return rc; | 714 | return rc; |
715 | 715 | ||
@@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id) | |||
724 | int rc; | 724 | int rc; |
725 | 725 | ||
726 | erst_exec_ctx_init(&ctx); | 726 | erst_exec_ctx_init(&ctx); |
727 | rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); | 727 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR); |
728 | if (rc) | 728 | if (rc) |
729 | return rc; | 729 | return rc; |
730 | apei_exec_ctx_set_input(&ctx, record_id); | 730 | apei_exec_ctx_set_input(&ctx, record_id); |
@@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id) | |||
748 | if (rc) | 748 | if (rc) |
749 | return rc; | 749 | return rc; |
750 | val = apei_exec_ctx_get_output(&ctx); | 750 | val = apei_exec_ctx_get_output(&ctx); |
751 | rc = apei_exec_run(&ctx, ACPI_ERST_END); | 751 | rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); |
752 | if (rc) | 752 | if (rc) |
753 | return rc; | 753 | return rc; |
754 | 754 | ||
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f703b2881153..0784f99a4665 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c | |||
@@ -12,7 +12,7 @@ | |||
12 | * For more information about Generic Hardware Error Source, please | 12 | * For more information about Generic Hardware Error Source, please |
13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | 13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 |
14 | * | 14 | * |
15 | * Copyright 2010 Intel Corp. | 15 | * Copyright 2010,2011 Intel Corp. |
16 | * Author: Huang Ying <ying.huang@intel.com> | 16 | * Author: Huang Ying <ying.huang@intel.com> |
17 | * | 17 | * |
18 | * This program is free software; you can redistribute it and/or | 18 | * This program is free software; you can redistribute it and/or |
@@ -42,6 +42,9 @@ | |||
42 | #include <linux/mutex.h> | 42 | #include <linux/mutex.h> |
43 | #include <linux/ratelimit.h> | 43 | #include <linux/ratelimit.h> |
44 | #include <linux/vmalloc.h> | 44 | #include <linux/vmalloc.h> |
45 | #include <linux/irq_work.h> | ||
46 | #include <linux/llist.h> | ||
47 | #include <linux/genalloc.h> | ||
45 | #include <acpi/apei.h> | 48 | #include <acpi/apei.h> |
46 | #include <acpi/atomicio.h> | 49 | #include <acpi/atomicio.h> |
47 | #include <acpi/hed.h> | 50 | #include <acpi/hed.h> |
@@ -53,6 +56,30 @@ | |||
53 | #define GHES_PFX "GHES: " | 56 | #define GHES_PFX "GHES: " |
54 | 57 | ||
55 | #define GHES_ESTATUS_MAX_SIZE 65536 | 58 | #define GHES_ESTATUS_MAX_SIZE 65536 |
59 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 | ||
60 | |||
61 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | ||
62 | |||
63 | /* This is just an estimation for memory pool allocation */ | ||
64 | #define GHES_ESTATUS_CACHE_AVG_SIZE 512 | ||
65 | |||
66 | #define GHES_ESTATUS_CACHES_SIZE 4 | ||
67 | |||
68 | #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL | ||
69 | /* Prevent too many caches are allocated because of RCU */ | ||
70 | #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) | ||
71 | |||
72 | #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ | ||
73 | (sizeof(struct ghes_estatus_cache) + (estatus_len)) | ||
74 | #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ | ||
75 | ((struct acpi_hest_generic_status *) \ | ||
76 | ((struct ghes_estatus_cache *)(estatus_cache) + 1)) | ||
77 | |||
78 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ | ||
79 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | ||
80 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ | ||
81 | ((struct acpi_hest_generic_status *) \ | ||
82 | ((struct ghes_estatus_node *)(estatus_node) + 1)) | ||
56 | 83 | ||
57 | /* | 84 | /* |
58 | * One struct ghes is created for each generic hardware error source. | 85 | * One struct ghes is created for each generic hardware error source. |
@@ -77,6 +104,22 @@ struct ghes { | |||
77 | }; | 104 | }; |
78 | }; | 105 | }; |
79 | 106 | ||
107 | struct ghes_estatus_node { | ||
108 | struct llist_node llnode; | ||
109 | struct acpi_hest_generic *generic; | ||
110 | }; | ||
111 | |||
112 | struct ghes_estatus_cache { | ||
113 | u32 estatus_len; | ||
114 | atomic_t count; | ||
115 | struct acpi_hest_generic *generic; | ||
116 | unsigned long long time_in; | ||
117 | struct rcu_head rcu; | ||
118 | }; | ||
119 | |||
120 | int ghes_disable; | ||
121 | module_param_named(disable, ghes_disable, bool, 0); | ||
122 | |||
80 | static int ghes_panic_timeout __read_mostly = 30; | 123 | static int ghes_panic_timeout __read_mostly = 30; |
81 | 124 | ||
82 | /* | 125 | /* |
@@ -121,6 +164,22 @@ static struct vm_struct *ghes_ioremap_area; | |||
121 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); | 164 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); |
122 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); | 165 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); |
123 | 166 | ||
167 | /* | ||
168 | * printk is not safe in NMI context. So in NMI handler, we allocate | ||
169 | * required memory from lock-less memory allocator | ||
170 | * (ghes_estatus_pool), save estatus into it, put them into lock-less | ||
171 | * list (ghes_estatus_llist), then delay printk into IRQ context via | ||
172 | * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record | ||
173 | * required pool size by all NMI error source. | ||
174 | */ | ||
175 | static struct gen_pool *ghes_estatus_pool; | ||
176 | static unsigned long ghes_estatus_pool_size_request; | ||
177 | static struct llist_head ghes_estatus_llist; | ||
178 | static struct irq_work ghes_proc_irq_work; | ||
179 | |||
180 | struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; | ||
181 | static atomic_t ghes_estatus_cache_alloced; | ||
182 | |||
124 | static int ghes_ioremap_init(void) | 183 | static int ghes_ioremap_init(void) |
125 | { | 184 | { |
126 | ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, | 185 | ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, |
@@ -180,6 +239,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) | |||
180 | __flush_tlb_one(vaddr); | 239 | __flush_tlb_one(vaddr); |
181 | } | 240 | } |
182 | 241 | ||
242 | static int ghes_estatus_pool_init(void) | ||
243 | { | ||
244 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); | ||
245 | if (!ghes_estatus_pool) | ||
246 | return -ENOMEM; | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, | ||
251 | struct gen_pool_chunk *chunk, | ||
252 | void *data) | ||
253 | { | ||
254 | free_page(chunk->start_addr); | ||
255 | } | ||
256 | |||
257 | static void ghes_estatus_pool_exit(void) | ||
258 | { | ||
259 | gen_pool_for_each_chunk(ghes_estatus_pool, | ||
260 | ghes_estatus_pool_free_chunk_page, NULL); | ||
261 | gen_pool_destroy(ghes_estatus_pool); | ||
262 | } | ||
263 | |||
264 | static int ghes_estatus_pool_expand(unsigned long len) | ||
265 | { | ||
266 | unsigned long i, pages, size, addr; | ||
267 | int ret; | ||
268 | |||
269 | ghes_estatus_pool_size_request += PAGE_ALIGN(len); | ||
270 | size = gen_pool_size(ghes_estatus_pool); | ||
271 | if (size >= ghes_estatus_pool_size_request) | ||
272 | return 0; | ||
273 | pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; | ||
274 | for (i = 0; i < pages; i++) { | ||
275 | addr = __get_free_page(GFP_KERNEL); | ||
276 | if (!addr) | ||
277 | return -ENOMEM; | ||
278 | ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); | ||
279 | if (ret) | ||
280 | return ret; | ||
281 | } | ||
282 | |||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | static void ghes_estatus_pool_shrink(unsigned long len) | ||
287 | { | ||
288 | ghes_estatus_pool_size_request -= PAGE_ALIGN(len); | ||
289 | } | ||
290 | |||
183 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | 291 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
184 | { | 292 | { |
185 | struct ghes *ghes; | 293 | struct ghes *ghes; |
@@ -341,43 +449,196 @@ static void ghes_clear_estatus(struct ghes *ghes) | |||
341 | ghes->flags &= ~GHES_TO_CLEAR; | 449 | ghes->flags &= ~GHES_TO_CLEAR; |
342 | } | 450 | } |
343 | 451 | ||
344 | static void ghes_do_proc(struct ghes *ghes) | 452 | static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) |
345 | { | 453 | { |
346 | int sev, processed = 0; | 454 | int sev, sec_sev; |
347 | struct acpi_hest_generic_data *gdata; | 455 | struct acpi_hest_generic_data *gdata; |
348 | 456 | ||
349 | sev = ghes_severity(ghes->estatus->error_severity); | 457 | sev = ghes_severity(estatus->error_severity); |
350 | apei_estatus_for_each_section(ghes->estatus, gdata) { | 458 | apei_estatus_for_each_section(estatus, gdata) { |
351 | #ifdef CONFIG_X86_MCE | 459 | sec_sev = ghes_severity(gdata->error_severity); |
352 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | 460 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, |
353 | CPER_SEC_PLATFORM_MEM)) { | 461 | CPER_SEC_PLATFORM_MEM)) { |
354 | apei_mce_report_mem_error( | 462 | struct cper_sec_mem_err *mem_err; |
355 | sev == GHES_SEV_CORRECTED, | 463 | mem_err = (struct cper_sec_mem_err *)(gdata+1); |
356 | (struct cper_sec_mem_err *)(gdata+1)); | 464 | #ifdef CONFIG_X86_MCE |
357 | processed = 1; | 465 | apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, |
358 | } | 466 | mem_err); |
359 | #endif | 467 | #endif |
468 | #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE | ||
469 | if (sev == GHES_SEV_RECOVERABLE && | ||
470 | sec_sev == GHES_SEV_RECOVERABLE && | ||
471 | mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { | ||
472 | unsigned long pfn; | ||
473 | pfn = mem_err->physical_addr >> PAGE_SHIFT; | ||
474 | memory_failure_queue(pfn, 0, 0); | ||
475 | } | ||
476 | #endif | ||
477 | } | ||
360 | } | 478 | } |
361 | } | 479 | } |
362 | 480 | ||
363 | static void ghes_print_estatus(const char *pfx, struct ghes *ghes) | 481 | static void __ghes_print_estatus(const char *pfx, |
482 | const struct acpi_hest_generic *generic, | ||
483 | const struct acpi_hest_generic_status *estatus) | ||
364 | { | 484 | { |
365 | /* Not more than 2 messages every 5 seconds */ | ||
366 | static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); | ||
367 | |||
368 | if (pfx == NULL) { | 485 | if (pfx == NULL) { |
369 | if (ghes_severity(ghes->estatus->error_severity) <= | 486 | if (ghes_severity(estatus->error_severity) <= |
370 | GHES_SEV_CORRECTED) | 487 | GHES_SEV_CORRECTED) |
371 | pfx = KERN_WARNING HW_ERR; | 488 | pfx = KERN_WARNING HW_ERR; |
372 | else | 489 | else |
373 | pfx = KERN_ERR HW_ERR; | 490 | pfx = KERN_ERR HW_ERR; |
374 | } | 491 | } |
375 | if (__ratelimit(&ratelimit)) { | 492 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", |
376 | printk( | 493 | pfx, generic->header.source_id); |
377 | "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", | 494 | apei_estatus_print(pfx, estatus); |
378 | pfx, ghes->generic->header.source_id); | 495 | } |
379 | apei_estatus_print(pfx, ghes->estatus); | 496 | |
497 | static int ghes_print_estatus(const char *pfx, | ||
498 | const struct acpi_hest_generic *generic, | ||
499 | const struct acpi_hest_generic_status *estatus) | ||
500 | { | ||
501 | /* Not more than 2 messages every 5 seconds */ | ||
502 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); | ||
503 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | ||
504 | struct ratelimit_state *ratelimit; | ||
505 | |||
506 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) | ||
507 | ratelimit = &ratelimit_corrected; | ||
508 | else | ||
509 | ratelimit = &ratelimit_uncorrected; | ||
510 | if (__ratelimit(ratelimit)) { | ||
511 | __ghes_print_estatus(pfx, generic, estatus); | ||
512 | return 1; | ||
380 | } | 513 | } |
514 | return 0; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * GHES error status reporting throttle, to report more kinds of | ||
519 | * errors, instead of just most frequently occurred errors. | ||
520 | */ | ||
521 | static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) | ||
522 | { | ||
523 | u32 len; | ||
524 | int i, cached = 0; | ||
525 | unsigned long long now; | ||
526 | struct ghes_estatus_cache *cache; | ||
527 | struct acpi_hest_generic_status *cache_estatus; | ||
528 | |||
529 | len = apei_estatus_len(estatus); | ||
530 | rcu_read_lock(); | ||
531 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | ||
532 | cache = rcu_dereference(ghes_estatus_caches[i]); | ||
533 | if (cache == NULL) | ||
534 | continue; | ||
535 | if (len != cache->estatus_len) | ||
536 | continue; | ||
537 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | ||
538 | if (memcmp(estatus, cache_estatus, len)) | ||
539 | continue; | ||
540 | atomic_inc(&cache->count); | ||
541 | now = sched_clock(); | ||
542 | if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) | ||
543 | cached = 1; | ||
544 | break; | ||
545 | } | ||
546 | rcu_read_unlock(); | ||
547 | return cached; | ||
548 | } | ||
549 | |||
550 | static struct ghes_estatus_cache *ghes_estatus_cache_alloc( | ||
551 | struct acpi_hest_generic *generic, | ||
552 | struct acpi_hest_generic_status *estatus) | ||
553 | { | ||
554 | int alloced; | ||
555 | u32 len, cache_len; | ||
556 | struct ghes_estatus_cache *cache; | ||
557 | struct acpi_hest_generic_status *cache_estatus; | ||
558 | |||
559 | alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); | ||
560 | if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { | ||
561 | atomic_dec(&ghes_estatus_cache_alloced); | ||
562 | return NULL; | ||
563 | } | ||
564 | len = apei_estatus_len(estatus); | ||
565 | cache_len = GHES_ESTATUS_CACHE_LEN(len); | ||
566 | cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); | ||
567 | if (!cache) { | ||
568 | atomic_dec(&ghes_estatus_cache_alloced); | ||
569 | return NULL; | ||
570 | } | ||
571 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | ||
572 | memcpy(cache_estatus, estatus, len); | ||
573 | cache->estatus_len = len; | ||
574 | atomic_set(&cache->count, 0); | ||
575 | cache->generic = generic; | ||
576 | cache->time_in = sched_clock(); | ||
577 | return cache; | ||
578 | } | ||
579 | |||
580 | static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) | ||
581 | { | ||
582 | u32 len; | ||
583 | |||
584 | len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); | ||
585 | len = GHES_ESTATUS_CACHE_LEN(len); | ||
586 | gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); | ||
587 | atomic_dec(&ghes_estatus_cache_alloced); | ||
588 | } | ||
589 | |||
590 | static void ghes_estatus_cache_rcu_free(struct rcu_head *head) | ||
591 | { | ||
592 | struct ghes_estatus_cache *cache; | ||
593 | |||
594 | cache = container_of(head, struct ghes_estatus_cache, rcu); | ||
595 | ghes_estatus_cache_free(cache); | ||
596 | } | ||
597 | |||
598 | static void ghes_estatus_cache_add( | ||
599 | struct acpi_hest_generic *generic, | ||
600 | struct acpi_hest_generic_status *estatus) | ||
601 | { | ||
602 | int i, slot = -1, count; | ||
603 | unsigned long long now, duration, period, max_period = 0; | ||
604 | struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; | ||
605 | |||
606 | new_cache = ghes_estatus_cache_alloc(generic, estatus); | ||
607 | if (new_cache == NULL) | ||
608 | return; | ||
609 | rcu_read_lock(); | ||
610 | now = sched_clock(); | ||
611 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | ||
612 | cache = rcu_dereference(ghes_estatus_caches[i]); | ||
613 | if (cache == NULL) { | ||
614 | slot = i; | ||
615 | slot_cache = NULL; | ||
616 | break; | ||
617 | } | ||
618 | duration = now - cache->time_in; | ||
619 | if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { | ||
620 | slot = i; | ||
621 | slot_cache = cache; | ||
622 | break; | ||
623 | } | ||
624 | count = atomic_read(&cache->count); | ||
625 | period = duration; | ||
626 | do_div(period, (count + 1)); | ||
627 | if (period > max_period) { | ||
628 | max_period = period; | ||
629 | slot = i; | ||
630 | slot_cache = cache; | ||
631 | } | ||
632 | } | ||
633 | /* new_cache must be put into array after its contents are written */ | ||
634 | smp_wmb(); | ||
635 | if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, | ||
636 | slot_cache, new_cache) == slot_cache) { | ||
637 | if (slot_cache) | ||
638 | call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); | ||
639 | } else | ||
640 | ghes_estatus_cache_free(new_cache); | ||
641 | rcu_read_unlock(); | ||
381 | } | 642 | } |
382 | 643 | ||
383 | static int ghes_proc(struct ghes *ghes) | 644 | static int ghes_proc(struct ghes *ghes) |
@@ -387,9 +648,11 @@ static int ghes_proc(struct ghes *ghes) | |||
387 | rc = ghes_read_estatus(ghes, 0); | 648 | rc = ghes_read_estatus(ghes, 0); |
388 | if (rc) | 649 | if (rc) |
389 | goto out; | 650 | goto out; |
390 | ghes_print_estatus(NULL, ghes); | 651 | if (!ghes_estatus_cached(ghes->estatus)) { |
391 | ghes_do_proc(ghes); | 652 | if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) |
392 | 653 | ghes_estatus_cache_add(ghes->generic, ghes->estatus); | |
654 | } | ||
655 | ghes_do_proc(ghes->estatus); | ||
393 | out: | 656 | out: |
394 | ghes_clear_estatus(ghes); | 657 | ghes_clear_estatus(ghes); |
395 | return 0; | 658 | return 0; |
@@ -447,6 +710,45 @@ static int ghes_notify_sci(struct notifier_block *this, | |||
447 | return ret; | 710 | return ret; |
448 | } | 711 | } |
449 | 712 | ||
713 | static void ghes_proc_in_irq(struct irq_work *irq_work) | ||
714 | { | ||
715 | struct llist_node *llnode, *next, *tail = NULL; | ||
716 | struct ghes_estatus_node *estatus_node; | ||
717 | struct acpi_hest_generic *generic; | ||
718 | struct acpi_hest_generic_status *estatus; | ||
719 | u32 len, node_len; | ||
720 | |||
721 | /* | ||
722 | * Because the time order of estatus in list is reversed, | ||
723 | * revert it back to proper order. | ||
724 | */ | ||
725 | llnode = llist_del_all(&ghes_estatus_llist); | ||
726 | while (llnode) { | ||
727 | next = llnode->next; | ||
728 | llnode->next = tail; | ||
729 | tail = llnode; | ||
730 | llnode = next; | ||
731 | } | ||
732 | llnode = tail; | ||
733 | while (llnode) { | ||
734 | next = llnode->next; | ||
735 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | ||
736 | llnode); | ||
737 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | ||
738 | len = apei_estatus_len(estatus); | ||
739 | node_len = GHES_ESTATUS_NODE_LEN(len); | ||
740 | ghes_do_proc(estatus); | ||
741 | if (!ghes_estatus_cached(estatus)) { | ||
742 | generic = estatus_node->generic; | ||
743 | if (ghes_print_estatus(NULL, generic, estatus)) | ||
744 | ghes_estatus_cache_add(generic, estatus); | ||
745 | } | ||
746 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | ||
747 | node_len); | ||
748 | llnode = next; | ||
749 | } | ||
750 | } | ||
751 | |||
450 | static int ghes_notify_nmi(struct notifier_block *this, | 752 | static int ghes_notify_nmi(struct notifier_block *this, |
451 | unsigned long cmd, void *data) | 753 | unsigned long cmd, void *data) |
452 | { | 754 | { |
@@ -476,7 +778,8 @@ static int ghes_notify_nmi(struct notifier_block *this, | |||
476 | 778 | ||
477 | if (sev_global >= GHES_SEV_PANIC) { | 779 | if (sev_global >= GHES_SEV_PANIC) { |
478 | oops_begin(); | 780 | oops_begin(); |
479 | ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); | 781 | __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, |
782 | ghes_global->estatus); | ||
480 | /* reboot to log the error! */ | 783 | /* reboot to log the error! */ |
481 | if (panic_timeout == 0) | 784 | if (panic_timeout == 0) |
482 | panic_timeout = ghes_panic_timeout; | 785 | panic_timeout = ghes_panic_timeout; |
@@ -484,12 +787,34 @@ static int ghes_notify_nmi(struct notifier_block *this, | |||
484 | } | 787 | } |
485 | 788 | ||
486 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { | 789 | list_for_each_entry_rcu(ghes, &ghes_nmi, list) { |
790 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
791 | u32 len, node_len; | ||
792 | struct ghes_estatus_node *estatus_node; | ||
793 | struct acpi_hest_generic_status *estatus; | ||
794 | #endif | ||
487 | if (!(ghes->flags & GHES_TO_CLEAR)) | 795 | if (!(ghes->flags & GHES_TO_CLEAR)) |
488 | continue; | 796 | continue; |
489 | /* Do not print estatus because printk is not NMI safe */ | 797 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
490 | ghes_do_proc(ghes); | 798 | if (ghes_estatus_cached(ghes->estatus)) |
799 | goto next; | ||
800 | /* Save estatus for further processing in IRQ context */ | ||
801 | len = apei_estatus_len(ghes->estatus); | ||
802 | node_len = GHES_ESTATUS_NODE_LEN(len); | ||
803 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, | ||
804 | node_len); | ||
805 | if (estatus_node) { | ||
806 | estatus_node->generic = ghes->generic; | ||
807 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | ||
808 | memcpy(estatus, ghes->estatus, len); | ||
809 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | ||
810 | } | ||
811 | next: | ||
812 | #endif | ||
491 | ghes_clear_estatus(ghes); | 813 | ghes_clear_estatus(ghes); |
492 | } | 814 | } |
815 | #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
816 | irq_work_queue(&ghes_proc_irq_work); | ||
817 | #endif | ||
493 | 818 | ||
494 | out: | 819 | out: |
495 | raw_spin_unlock(&ghes_nmi_lock); | 820 | raw_spin_unlock(&ghes_nmi_lock); |
@@ -504,10 +829,26 @@ static struct notifier_block ghes_notifier_nmi = { | |||
504 | .notifier_call = ghes_notify_nmi, | 829 | .notifier_call = ghes_notify_nmi, |
505 | }; | 830 | }; |
506 | 831 | ||
832 | static unsigned long ghes_esource_prealloc_size( | ||
833 | const struct acpi_hest_generic *generic) | ||
834 | { | ||
835 | unsigned long block_length, prealloc_records, prealloc_size; | ||
836 | |||
837 | block_length = min_t(unsigned long, generic->error_block_length, | ||
838 | GHES_ESTATUS_MAX_SIZE); | ||
839 | prealloc_records = max_t(unsigned long, | ||
840 | generic->records_to_preallocate, 1); | ||
841 | prealloc_size = min_t(unsigned long, block_length * prealloc_records, | ||
842 | GHES_ESOURCE_PREALLOC_MAX_SIZE); | ||
843 | |||
844 | return prealloc_size; | ||
845 | } | ||
846 | |||
507 | static int __devinit ghes_probe(struct platform_device *ghes_dev) | 847 | static int __devinit ghes_probe(struct platform_device *ghes_dev) |
508 | { | 848 | { |
509 | struct acpi_hest_generic *generic; | 849 | struct acpi_hest_generic *generic; |
510 | struct ghes *ghes = NULL; | 850 | struct ghes *ghes = NULL; |
851 | unsigned long len; | ||
511 | int rc = -EINVAL; | 852 | int rc = -EINVAL; |
512 | 853 | ||
513 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; | 854 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
@@ -573,6 +914,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) | |||
573 | mutex_unlock(&ghes_list_mutex); | 914 | mutex_unlock(&ghes_list_mutex); |
574 | break; | 915 | break; |
575 | case ACPI_HEST_NOTIFY_NMI: | 916 | case ACPI_HEST_NOTIFY_NMI: |
917 | len = ghes_esource_prealloc_size(generic); | ||
918 | ghes_estatus_pool_expand(len); | ||
576 | mutex_lock(&ghes_list_mutex); | 919 | mutex_lock(&ghes_list_mutex); |
577 | if (list_empty(&ghes_nmi)) | 920 | if (list_empty(&ghes_nmi)) |
578 | register_die_notifier(&ghes_notifier_nmi); | 921 | register_die_notifier(&ghes_notifier_nmi); |
@@ -597,6 +940,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) | |||
597 | { | 940 | { |
598 | struct ghes *ghes; | 941 | struct ghes *ghes; |
599 | struct acpi_hest_generic *generic; | 942 | struct acpi_hest_generic *generic; |
943 | unsigned long len; | ||
600 | 944 | ||
601 | ghes = platform_get_drvdata(ghes_dev); | 945 | ghes = platform_get_drvdata(ghes_dev); |
602 | generic = ghes->generic; | 946 | generic = ghes->generic; |
@@ -627,6 +971,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) | |||
627 | * freed after NMI handler finishes. | 971 | * freed after NMI handler finishes. |
628 | */ | 972 | */ |
629 | synchronize_rcu(); | 973 | synchronize_rcu(); |
974 | len = ghes_esource_prealloc_size(generic); | ||
975 | ghes_estatus_pool_shrink(len); | ||
630 | break; | 976 | break; |
631 | default: | 977 | default: |
632 | BUG(); | 978 | BUG(); |
@@ -662,15 +1008,43 @@ static int __init ghes_init(void) | |||
662 | return -EINVAL; | 1008 | return -EINVAL; |
663 | } | 1009 | } |
664 | 1010 | ||
1011 | if (ghes_disable) { | ||
1012 | pr_info(GHES_PFX "GHES is not enabled!\n"); | ||
1013 | return -EINVAL; | ||
1014 | } | ||
1015 | |||
1016 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | ||
1017 | |||
665 | rc = ghes_ioremap_init(); | 1018 | rc = ghes_ioremap_init(); |
666 | if (rc) | 1019 | if (rc) |
667 | goto err; | 1020 | goto err; |
668 | 1021 | ||
669 | rc = platform_driver_register(&ghes_platform_driver); | 1022 | rc = ghes_estatus_pool_init(); |
670 | if (rc) | 1023 | if (rc) |
671 | goto err_ioremap_exit; | 1024 | goto err_ioremap_exit; |
672 | 1025 | ||
1026 | rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * | ||
1027 | GHES_ESTATUS_CACHE_ALLOCED_MAX); | ||
1028 | if (rc) | ||
1029 | goto err_pool_exit; | ||
1030 | |||
1031 | rc = platform_driver_register(&ghes_platform_driver); | ||
1032 | if (rc) | ||
1033 | goto err_pool_exit; | ||
1034 | |||
1035 | rc = apei_osc_setup(); | ||
1036 | if (rc == 0 && osc_sb_apei_support_acked) | ||
1037 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | ||
1038 | else if (rc == 0 && !osc_sb_apei_support_acked) | ||
1039 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | ||
1040 | else if (rc && osc_sb_apei_support_acked) | ||
1041 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | ||
1042 | else | ||
1043 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | ||
1044 | |||
673 | return 0; | 1045 | return 0; |
1046 | err_pool_exit: | ||
1047 | ghes_estatus_pool_exit(); | ||
674 | err_ioremap_exit: | 1048 | err_ioremap_exit: |
675 | ghes_ioremap_exit(); | 1049 | ghes_ioremap_exit(); |
676 | err: | 1050 | err: |
@@ -680,6 +1054,7 @@ err: | |||
680 | static void __exit ghes_exit(void) | 1054 | static void __exit ghes_exit(void) |
681 | { | 1055 | { |
682 | platform_driver_unregister(&ghes_platform_driver); | 1056 | platform_driver_unregister(&ghes_platform_driver); |
1057 | ghes_estatus_pool_exit(); | ||
683 | ghes_ioremap_exit(); | 1058 | ghes_ioremap_exit(); |
684 | } | 1059 | } |
685 | 1060 | ||
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 181bc2f7bb74..05fee06f4d6e 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c | |||
@@ -231,16 +231,17 @@ void __init acpi_hest_init(void) | |||
231 | goto err; | 231 | goto err; |
232 | } | 232 | } |
233 | 233 | ||
234 | rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); | 234 | if (!ghes_disable) { |
235 | if (rc) | 235 | rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); |
236 | goto err; | 236 | if (rc) |
237 | 237 | goto err; | |
238 | rc = hest_ghes_dev_register(ghes_count); | 238 | rc = hest_ghes_dev_register(ghes_count); |
239 | if (!rc) { | 239 | if (rc) |
240 | pr_info(HEST_PFX "Table parsing has been initialized.\n"); | 240 | goto err; |
241 | return; | ||
242 | } | 241 | } |
243 | 242 | ||
243 | pr_info(HEST_PFX "Table parsing has been initialized.\n"); | ||
244 | return; | ||
244 | err: | 245 | err: |
245 | hest_disable = 1; | 246 | hest_disable = 1; |
246 | } | 247 | } |
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index d1e06c182cdb..437ddbf0c49a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/pci.h> | 39 | #include <linux/pci.h> |
40 | #include <acpi/acpi_bus.h> | 40 | #include <acpi/acpi_bus.h> |
41 | #include <acpi/acpi_drivers.h> | 41 | #include <acpi/acpi_drivers.h> |
42 | #include <acpi/apei.h> | ||
42 | #include <linux/dmi.h> | 43 | #include <linux/dmi.h> |
43 | #include <linux/suspend.h> | 44 | #include <linux/suspend.h> |
44 | 45 | ||
@@ -519,6 +520,7 @@ out_kfree: | |||
519 | } | 520 | } |
520 | EXPORT_SYMBOL(acpi_run_osc); | 521 | EXPORT_SYMBOL(acpi_run_osc); |
521 | 522 | ||
523 | bool osc_sb_apei_support_acked; | ||
522 | static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; | 524 | static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; |
523 | static void acpi_bus_osc_support(void) | 525 | static void acpi_bus_osc_support(void) |
524 | { | 526 | { |
@@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void) | |||
541 | #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) | 543 | #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) |
542 | capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; | 544 | capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; |
543 | #endif | 545 | #endif |
546 | |||
547 | if (!ghes_disable) | ||
548 | capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT; | ||
544 | if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) | 549 | if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) |
545 | return; | 550 | return; |
546 | if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) | 551 | if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) { |
552 | u32 *capbuf_ret = context.ret.pointer; | ||
553 | if (context.ret.length > OSC_SUPPORT_TYPE) | ||
554 | osc_sb_apei_support_acked = | ||
555 | capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT; | ||
547 | kfree(context.ret.pointer); | 556 | kfree(context.ret.pointer); |
548 | /* do we need to check the returned cap? Sounds no */ | 557 | } |
558 | /* do we need to check other returned cap? Sounds no */ | ||
549 | } | 559 | } |
550 | 560 | ||
551 | /* -------------------------------------------------------------------------- | 561 | /* -------------------------------------------------------------------------- |
diff --git a/include/acpi/apei.h b/include/acpi/apei.h index e67b523a50e1..51a527d24a8a 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h | |||
@@ -18,6 +18,11 @@ | |||
18 | 18 | ||
19 | extern int hest_disable; | 19 | extern int hest_disable; |
20 | extern int erst_disable; | 20 | extern int erst_disable; |
21 | #ifdef CONFIG_ACPI_APEI_GHES | ||
22 | extern int ghes_disable; | ||
23 | #else | ||
24 | #define ghes_disable 1 | ||
25 | #endif | ||
21 | 26 | ||
22 | #ifdef CONFIG_ACPI_APEI | 27 | #ifdef CONFIG_ACPI_APEI |
23 | void __init acpi_hest_init(void); | 28 | void __init acpi_hest_init(void); |
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2312e850aab8..6001b4da39dd 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h | |||
@@ -279,6 +279,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); | |||
279 | #define OSC_SB_CPUHP_OST_SUPPORT 8 | 279 | #define OSC_SB_CPUHP_OST_SUPPORT 8 |
280 | #define OSC_SB_APEI_SUPPORT 16 | 280 | #define OSC_SB_APEI_SUPPORT 16 |
281 | 281 | ||
282 | extern bool osc_sb_apei_support_acked; | ||
283 | |||
282 | /* PCI defined _OSC bits */ | 284 | /* PCI defined _OSC bits */ |
283 | /* _OSC DW1 Definition (OS Support Fields) */ | 285 | /* _OSC DW1 Definition (OS Support Fields) */ |
284 | #define OSC_EXT_PCI_CONFIG_SUPPORT 1 | 286 | #define OSC_EXT_PCI_CONFIG_SUPPORT 1 |
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3bac44cce142..7ad634501e48 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h | |||
@@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); | |||
146 | extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); | 146 | extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); |
147 | extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); | 147 | extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); |
148 | 148 | ||
149 | #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) | ||
149 | #define BITMAP_LAST_WORD_MASK(nbits) \ | 150 | #define BITMAP_LAST_WORD_MASK(nbits) \ |
150 | ( \ | 151 | ( \ |
151 | ((nbits) % BITS_PER_LONG) ? \ | 152 | ((nbits) % BITS_PER_LONG) ? \ |
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 5bbebda78b02..5e98eeb2af3b 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h | |||
@@ -1,8 +1,26 @@ | |||
1 | /* | 1 | /* |
2 | * Basic general purpose allocator for managing special purpose memory | 2 | * Basic general purpose allocator for managing special purpose |
3 | * not managed by the regular kmalloc/kfree interface. | 3 | * memory, for example, memory that is not managed by the regular |
4 | * Uses for this includes on-device special memory, uncached memory | 4 | * kmalloc/kfree interface. Uses for this includes on-device special |
5 | * etc. | 5 | * memory, uncached memory etc. |
6 | * | ||
7 | * It is safe to use the allocator in NMI handlers and other special | ||
8 | * unblockable contexts that could otherwise deadlock on locks. This | ||
9 | * is implemented by using atomic operations and retries on any | ||
10 | * conflicts. The disadvantage is that there may be livelocks in | ||
11 | * extreme cases. For better scalability, one allocator can be used | ||
12 | * for each CPU. | ||
13 | * | ||
14 | * The lockless operation only works if there is enough memory | ||
15 | * available. If new memory is added to the pool a lock has to be | ||
16 | * still taken. So any user relying on locklessness has to ensure | ||
17 | * that sufficient memory is preallocated. | ||
18 | * | ||
19 | * The basic atomic operation of this allocator is cmpxchg on long. | ||
20 | * On architectures that don't have NMI-safe cmpxchg implementation, | ||
21 | * the allocator can NOT be used in NMI handler. So code uses the | ||
22 | * allocator in NMI handler should depend on | ||
23 | * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
6 | * | 24 | * |
7 | * This source code is licensed under the GNU General Public License, | 25 | * This source code is licensed under the GNU General Public License, |
8 | * Version 2. See the file COPYING for more details. | 26 | * Version 2. See the file COPYING for more details. |
@@ -15,7 +33,7 @@ | |||
15 | * General purpose special memory pool descriptor. | 33 | * General purpose special memory pool descriptor. |
16 | */ | 34 | */ |
17 | struct gen_pool { | 35 | struct gen_pool { |
18 | rwlock_t lock; | 36 | spinlock_t lock; |
19 | struct list_head chunks; /* list of chunks in this pool */ | 37 | struct list_head chunks; /* list of chunks in this pool */ |
20 | int min_alloc_order; /* minimum allocation order */ | 38 | int min_alloc_order; /* minimum allocation order */ |
21 | }; | 39 | }; |
@@ -24,8 +42,8 @@ struct gen_pool { | |||
24 | * General purpose special memory pool chunk descriptor. | 42 | * General purpose special memory pool chunk descriptor. |
25 | */ | 43 | */ |
26 | struct gen_pool_chunk { | 44 | struct gen_pool_chunk { |
27 | spinlock_t lock; | ||
28 | struct list_head next_chunk; /* next chunk in pool */ | 45 | struct list_head next_chunk; /* next chunk in pool */ |
46 | atomic_t avail; | ||
29 | phys_addr_t phys_addr; /* physical starting address of memory chunk */ | 47 | phys_addr_t phys_addr; /* physical starting address of memory chunk */ |
30 | unsigned long start_addr; /* starting address of memory chunk */ | 48 | unsigned long start_addr; /* starting address of memory chunk */ |
31 | unsigned long end_addr; /* ending address of memory chunk */ | 49 | unsigned long end_addr; /* ending address of memory chunk */ |
@@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr, | |||
56 | extern void gen_pool_destroy(struct gen_pool *); | 74 | extern void gen_pool_destroy(struct gen_pool *); |
57 | extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); | 75 | extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); |
58 | extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); | 76 | extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); |
77 | extern void gen_pool_for_each_chunk(struct gen_pool *, | ||
78 | void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); | ||
79 | extern size_t gen_pool_avail(struct gen_pool *); | ||
80 | extern size_t gen_pool_size(struct gen_pool *); | ||
59 | #endif /* __GENALLOC_H__ */ | 81 | #endif /* __GENALLOC_H__ */ |
diff --git a/include/linux/llist.h b/include/linux/llist.h new file mode 100644 index 000000000000..aa0c8b5b3cd0 --- /dev/null +++ b/include/linux/llist.h | |||
@@ -0,0 +1,126 @@ | |||
1 | #ifndef LLIST_H | ||
2 | #define LLIST_H | ||
3 | /* | ||
4 | * Lock-less NULL terminated single linked list | ||
5 | * | ||
6 | * If there are multiple producers and multiple consumers, llist_add | ||
7 | * can be used in producers and llist_del_all can be used in | ||
8 | * consumers. They can work simultaneously without lock. But | ||
9 | * llist_del_first can not be used here. Because llist_del_first | ||
10 | * depends on list->first->next does not changed if list->first is not | ||
11 | * changed during its operation, but llist_del_first, llist_add, | ||
12 | * llist_add (or llist_del_all, llist_add, llist_add) sequence in | ||
13 | * another consumer may violate that. | ||
14 | * | ||
15 | * If there are multiple producers and one consumer, llist_add can be | ||
16 | * used in producers and llist_del_all or llist_del_first can be used | ||
17 | * in the consumer. | ||
18 | * | ||
19 | * This can be summarized as follow: | ||
20 | * | ||
21 | * | add | del_first | del_all | ||
22 | * add | - | - | - | ||
23 | * del_first | | L | L | ||
24 | * del_all | | | - | ||
25 | * | ||
26 | * Where "-" stands for no lock is needed, while "L" stands for lock | ||
27 | * is needed. | ||
28 | * | ||
29 | * The list entries deleted via llist_del_all can be traversed with | ||
30 | * traversing function such as llist_for_each etc. But the list | ||
31 | * entries can not be traversed safely before deleted from the list. | ||
32 | * The order of deleted entries is from the newest to the oldest added | ||
33 | * one. If you want to traverse from the oldest to the newest, you | ||
34 | * must reverse the order by yourself before traversing. | ||
35 | * | ||
36 | * The basic atomic operation of this list is cmpxchg on long. On | ||
37 | * architectures that don't have NMI-safe cmpxchg implementation, the | ||
38 | * list can NOT be used in NMI handler. So code uses the list in NMI | ||
39 | * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
40 | */ | ||
41 | |||
42 | struct llist_head { | ||
43 | struct llist_node *first; | ||
44 | }; | ||
45 | |||
46 | struct llist_node { | ||
47 | struct llist_node *next; | ||
48 | }; | ||
49 | |||
50 | #define LLIST_HEAD_INIT(name) { NULL } | ||
51 | #define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) | ||
52 | |||
53 | /** | ||
54 | * init_llist_head - initialize lock-less list head | ||
55 | * @head: the head for your lock-less list | ||
56 | */ | ||
57 | static inline void init_llist_head(struct llist_head *list) | ||
58 | { | ||
59 | list->first = NULL; | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * llist_entry - get the struct of this entry | ||
64 | * @ptr: the &struct llist_node pointer. | ||
65 | * @type: the type of the struct this is embedded in. | ||
66 | * @member: the name of the llist_node within the struct. | ||
67 | */ | ||
68 | #define llist_entry(ptr, type, member) \ | ||
69 | container_of(ptr, type, member) | ||
70 | |||
71 | /** | ||
72 | * llist_for_each - iterate over some deleted entries of a lock-less list | ||
73 | * @pos: the &struct llist_node to use as a loop cursor | ||
74 | * @node: the first entry of deleted list entries | ||
75 | * | ||
76 | * In general, some entries of the lock-less list can be traversed | ||
77 | * safely only after being deleted from list, so start with an entry | ||
78 | * instead of list head. | ||
79 | * | ||
80 | * If being used on entries deleted from lock-less list directly, the | ||
81 | * traverse order is from the newest to the oldest added entry. If | ||
82 | * you want to traverse from the oldest to the newest, you must | ||
83 | * reverse the order by yourself before traversing. | ||
84 | */ | ||
85 | #define llist_for_each(pos, node) \ | ||
86 | for ((pos) = (node); pos; (pos) = (pos)->next) | ||
87 | |||
88 | /** | ||
89 | * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type | ||
90 | * @pos: the type * to use as a loop cursor. | ||
91 | * @node: the fist entry of deleted list entries. | ||
92 | * @member: the name of the llist_node with the struct. | ||
93 | * | ||
94 | * In general, some entries of the lock-less list can be traversed | ||
95 | * safely only after being removed from list, so start with an entry | ||
96 | * instead of list head. | ||
97 | * | ||
98 | * If being used on entries deleted from lock-less list directly, the | ||
99 | * traverse order is from the newest to the oldest added entry. If | ||
100 | * you want to traverse from the oldest to the newest, you must | ||
101 | * reverse the order by yourself before traversing. | ||
102 | */ | ||
103 | #define llist_for_each_entry(pos, node, member) \ | ||
104 | for ((pos) = llist_entry((node), typeof(*(pos)), member); \ | ||
105 | &(pos)->member != NULL; \ | ||
106 | (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) | ||
107 | |||
108 | /** | ||
109 | * llist_empty - tests whether a lock-less list is empty | ||
110 | * @head: the list to test | ||
111 | * | ||
112 | * Not guaranteed to be accurate or up to date. Just a quick way to | ||
113 | * test whether the list is empty without deleting something from the | ||
114 | * list. | ||
115 | */ | ||
116 | static inline int llist_empty(const struct llist_head *head) | ||
117 | { | ||
118 | return ACCESS_ONCE(head->first) == NULL; | ||
119 | } | ||
120 | |||
121 | void llist_add(struct llist_node *new, struct llist_head *head); | ||
122 | void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, | ||
123 | struct llist_head *head); | ||
124 | struct llist_node *llist_del_first(struct llist_head *head); | ||
125 | struct llist_node *llist_del_all(struct llist_head *head); | ||
126 | #endif /* LLIST_H */ | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 3172a1c0f08e..f2690cf49827 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1600,6 +1600,7 @@ enum mf_flags { | |||
1600 | }; | 1600 | }; |
1601 | extern void memory_failure(unsigned long pfn, int trapno); | 1601 | extern void memory_failure(unsigned long pfn, int trapno); |
1602 | extern int __memory_failure(unsigned long pfn, int trapno, int flags); | 1602 | extern int __memory_failure(unsigned long pfn, int trapno, int flags); |
1603 | extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); | ||
1603 | extern int unpoison_memory(unsigned long pfn); | 1604 | extern int unpoison_memory(unsigned long pfn); |
1604 | extern int sysctl_memory_failure_early_kill; | 1605 | extern int sysctl_memory_failure_early_kill; |
1605 | extern int sysctl_memory_failure_recovery; | 1606 | extern int sysctl_memory_failure_recovery; |
diff --git a/lib/Kconfig b/lib/Kconfig index 32f3e5ae2be5..6c695ff9caba 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -276,4 +276,7 @@ config CORDIC | |||
276 | so its calculations are in fixed point. Modules can select this | 276 | so its calculations are in fixed point. Modules can select this |
277 | when they require this function. Module will be called cordic. | 277 | when they require this function. Module will be called cordic. |
278 | 278 | ||
279 | config LLIST | ||
280 | bool | ||
281 | |||
279 | endmenu | 282 | endmenu |
diff --git a/lib/Makefile b/lib/Makefile index 892f4e282ea1..6457af4a7caf 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o | |||
115 | 115 | ||
116 | obj-$(CONFIG_CORDIC) += cordic.o | 116 | obj-$(CONFIG_CORDIC) += cordic.o |
117 | 117 | ||
118 | obj-$(CONFIG_LLIST) += llist.o | ||
119 | |||
118 | hostprogs-y := gen_crc32table | 120 | hostprogs-y := gen_crc32table |
119 | clean-files := crc32table.h | 121 | clean-files := crc32table.h |
120 | 122 | ||
diff --git a/lib/bitmap.c b/lib/bitmap.c index 37ef4b048795..2f4412e4d071 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) | |||
271 | } | 271 | } |
272 | EXPORT_SYMBOL(__bitmap_weight); | 272 | EXPORT_SYMBOL(__bitmap_weight); |
273 | 273 | ||
274 | #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) | ||
275 | |||
276 | void bitmap_set(unsigned long *map, int start, int nr) | 274 | void bitmap_set(unsigned long *map, int start, int nr) |
277 | { | 275 | { |
278 | unsigned long *p = map + BIT_WORD(start); | 276 | unsigned long *p = map + BIT_WORD(start); |
diff --git a/lib/genalloc.c b/lib/genalloc.c index 577ddf805975..f352cc42f4f8 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c | |||
@@ -1,8 +1,26 @@ | |||
1 | /* | 1 | /* |
2 | * Basic general purpose allocator for managing special purpose memory | 2 | * Basic general purpose allocator for managing special purpose |
3 | * not managed by the regular kmalloc/kfree interface. | 3 | * memory, for example, memory that is not managed by the regular |
4 | * Uses for this includes on-device special memory, uncached memory | 4 | * kmalloc/kfree interface. Uses for this includes on-device special |
5 | * etc. | 5 | * memory, uncached memory etc. |
6 | * | ||
7 | * It is safe to use the allocator in NMI handlers and other special | ||
8 | * unblockable contexts that could otherwise deadlock on locks. This | ||
9 | * is implemented by using atomic operations and retries on any | ||
10 | * conflicts. The disadvantage is that there may be livelocks in | ||
11 | * extreme cases. For better scalability, one allocator can be used | ||
12 | * for each CPU. | ||
13 | * | ||
14 | * The lockless operation only works if there is enough memory | ||
15 | * available. If new memory is added to the pool a lock has to be | ||
16 | * still taken. So any user relying on locklessness has to ensure | ||
17 | * that sufficient memory is preallocated. | ||
18 | * | ||
19 | * The basic atomic operation of this allocator is cmpxchg on long. | ||
20 | * On architectures that don't have NMI-safe cmpxchg implementation, | ||
21 | * the allocator can NOT be used in NMI handler. So code uses the | ||
22 | * allocator in NMI handler should depend on | ||
23 | * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
6 | * | 24 | * |
7 | * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> | 25 | * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> |
8 | * | 26 | * |
@@ -13,8 +31,109 @@ | |||
13 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
14 | #include <linux/module.h> | 32 | #include <linux/module.h> |
15 | #include <linux/bitmap.h> | 33 | #include <linux/bitmap.h> |
34 | #include <linux/rculist.h> | ||
35 | #include <linux/interrupt.h> | ||
16 | #include <linux/genalloc.h> | 36 | #include <linux/genalloc.h> |
17 | 37 | ||
38 | static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) | ||
39 | { | ||
40 | unsigned long val, nval; | ||
41 | |||
42 | nval = *addr; | ||
43 | do { | ||
44 | val = nval; | ||
45 | if (val & mask_to_set) | ||
46 | return -EBUSY; | ||
47 | cpu_relax(); | ||
48 | } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val); | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) | ||
54 | { | ||
55 | unsigned long val, nval; | ||
56 | |||
57 | nval = *addr; | ||
58 | do { | ||
59 | val = nval; | ||
60 | if ((val & mask_to_clear) != mask_to_clear) | ||
61 | return -EBUSY; | ||
62 | cpu_relax(); | ||
63 | } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val); | ||
64 | |||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * bitmap_set_ll - set the specified number of bits at the specified position | ||
70 | * @map: pointer to a bitmap | ||
71 | * @start: a bit position in @map | ||
72 | * @nr: number of bits to set | ||
73 | * | ||
74 | * Set @nr bits start from @start in @map lock-lessly. Several users | ||
75 | * can set/clear the same bitmap simultaneously without lock. If two | ||
76 | * users set the same bit, one user will return remain bits, otherwise | ||
77 | * return 0. | ||
78 | */ | ||
79 | static int bitmap_set_ll(unsigned long *map, int start, int nr) | ||
80 | { | ||
81 | unsigned long *p = map + BIT_WORD(start); | ||
82 | const int size = start + nr; | ||
83 | int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); | ||
84 | unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); | ||
85 | |||
86 | while (nr - bits_to_set >= 0) { | ||
87 | if (set_bits_ll(p, mask_to_set)) | ||
88 | return nr; | ||
89 | nr -= bits_to_set; | ||
90 | bits_to_set = BITS_PER_LONG; | ||
91 | mask_to_set = ~0UL; | ||
92 | p++; | ||
93 | } | ||
94 | if (nr) { | ||
95 | mask_to_set &= BITMAP_LAST_WORD_MASK(size); | ||
96 | if (set_bits_ll(p, mask_to_set)) | ||
97 | return nr; | ||
98 | } | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * bitmap_clear_ll - clear the specified number of bits at the specified position | ||
105 | * @map: pointer to a bitmap | ||
106 | * @start: a bit position in @map | ||
107 | * @nr: number of bits to set | ||
108 | * | ||
109 | * Clear @nr bits start from @start in @map lock-lessly. Several users | ||
110 | * can set/clear the same bitmap simultaneously without lock. If two | ||
111 | * users clear the same bit, one user will return remain bits, | ||
112 | * otherwise return 0. | ||
113 | */ | ||
114 | static int bitmap_clear_ll(unsigned long *map, int start, int nr) | ||
115 | { | ||
116 | unsigned long *p = map + BIT_WORD(start); | ||
117 | const int size = start + nr; | ||
118 | int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); | ||
119 | unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); | ||
120 | |||
121 | while (nr - bits_to_clear >= 0) { | ||
122 | if (clear_bits_ll(p, mask_to_clear)) | ||
123 | return nr; | ||
124 | nr -= bits_to_clear; | ||
125 | bits_to_clear = BITS_PER_LONG; | ||
126 | mask_to_clear = ~0UL; | ||
127 | p++; | ||
128 | } | ||
129 | if (nr) { | ||
130 | mask_to_clear &= BITMAP_LAST_WORD_MASK(size); | ||
131 | if (clear_bits_ll(p, mask_to_clear)) | ||
132 | return nr; | ||
133 | } | ||
134 | |||
135 | return 0; | ||
136 | } | ||
18 | 137 | ||
19 | /** | 138 | /** |
20 | * gen_pool_create - create a new special memory pool | 139 | * gen_pool_create - create a new special memory pool |
@@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) | |||
30 | 149 | ||
31 | pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); | 150 | pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); |
32 | if (pool != NULL) { | 151 | if (pool != NULL) { |
33 | rwlock_init(&pool->lock); | 152 | spin_lock_init(&pool->lock); |
34 | INIT_LIST_HEAD(&pool->chunks); | 153 | INIT_LIST_HEAD(&pool->chunks); |
35 | pool->min_alloc_order = min_alloc_order; | 154 | pool->min_alloc_order = min_alloc_order; |
36 | } | 155 | } |
@@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy | |||
63 | if (unlikely(chunk == NULL)) | 182 | if (unlikely(chunk == NULL)) |
64 | return -ENOMEM; | 183 | return -ENOMEM; |
65 | 184 | ||
66 | spin_lock_init(&chunk->lock); | ||
67 | chunk->phys_addr = phys; | 185 | chunk->phys_addr = phys; |
68 | chunk->start_addr = virt; | 186 | chunk->start_addr = virt; |
69 | chunk->end_addr = virt + size; | 187 | chunk->end_addr = virt + size; |
188 | atomic_set(&chunk->avail, size); | ||
70 | 189 | ||
71 | write_lock(&pool->lock); | 190 | spin_lock(&pool->lock); |
72 | list_add(&chunk->next_chunk, &pool->chunks); | 191 | list_add_rcu(&chunk->next_chunk, &pool->chunks); |
73 | write_unlock(&pool->lock); | 192 | spin_unlock(&pool->lock); |
74 | 193 | ||
75 | return 0; | 194 | return 0; |
76 | } | 195 | } |
@@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt); | |||
85 | */ | 204 | */ |
86 | phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) | 205 | phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) |
87 | { | 206 | { |
88 | struct list_head *_chunk; | ||
89 | struct gen_pool_chunk *chunk; | 207 | struct gen_pool_chunk *chunk; |
208 | phys_addr_t paddr = -1; | ||
90 | 209 | ||
91 | read_lock(&pool->lock); | 210 | rcu_read_lock(); |
92 | list_for_each(_chunk, &pool->chunks) { | 211 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { |
93 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 212 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { |
94 | 213 | paddr = chunk->phys_addr + (addr - chunk->start_addr); | |
95 | if (addr >= chunk->start_addr && addr < chunk->end_addr) | 214 | break; |
96 | return chunk->phys_addr + addr - chunk->start_addr; | 215 | } |
97 | } | 216 | } |
98 | read_unlock(&pool->lock); | 217 | rcu_read_unlock(); |
99 | 218 | ||
100 | return -1; | 219 | return paddr; |
101 | } | 220 | } |
102 | EXPORT_SYMBOL(gen_pool_virt_to_phys); | 221 | EXPORT_SYMBOL(gen_pool_virt_to_phys); |
103 | 222 | ||
@@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool) | |||
115 | int order = pool->min_alloc_order; | 234 | int order = pool->min_alloc_order; |
116 | int bit, end_bit; | 235 | int bit, end_bit; |
117 | 236 | ||
118 | |||
119 | list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { | 237 | list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { |
120 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 238 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); |
121 | list_del(&chunk->next_chunk); | 239 | list_del(&chunk->next_chunk); |
@@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy); | |||
137 | * @size: number of bytes to allocate from the pool | 255 | * @size: number of bytes to allocate from the pool |
138 | * | 256 | * |
139 | * Allocate the requested number of bytes from the specified pool. | 257 | * Allocate the requested number of bytes from the specified pool. |
140 | * Uses a first-fit algorithm. | 258 | * Uses a first-fit algorithm. Can not be used in NMI handler on |
259 | * architectures without NMI-safe cmpxchg implementation. | ||
141 | */ | 260 | */ |
142 | unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) | 261 | unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) |
143 | { | 262 | { |
144 | struct list_head *_chunk; | ||
145 | struct gen_pool_chunk *chunk; | 263 | struct gen_pool_chunk *chunk; |
146 | unsigned long addr, flags; | 264 | unsigned long addr = 0; |
147 | int order = pool->min_alloc_order; | 265 | int order = pool->min_alloc_order; |
148 | int nbits, start_bit, end_bit; | 266 | int nbits, start_bit = 0, end_bit, remain; |
267 | |||
268 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
269 | BUG_ON(in_nmi()); | ||
270 | #endif | ||
149 | 271 | ||
150 | if (size == 0) | 272 | if (size == 0) |
151 | return 0; | 273 | return 0; |
152 | 274 | ||
153 | nbits = (size + (1UL << order) - 1) >> order; | 275 | nbits = (size + (1UL << order) - 1) >> order; |
154 | 276 | rcu_read_lock(); | |
155 | read_lock(&pool->lock); | 277 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { |
156 | list_for_each(_chunk, &pool->chunks) { | 278 | if (size > atomic_read(&chunk->avail)) |
157 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | 279 | continue; |
158 | 280 | ||
159 | end_bit = (chunk->end_addr - chunk->start_addr) >> order; | 281 | end_bit = (chunk->end_addr - chunk->start_addr) >> order; |
160 | 282 | retry: | |
161 | spin_lock_irqsave(&chunk->lock, flags); | 283 | start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, |
162 | start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, | 284 | start_bit, nbits, 0); |
163 | nbits, 0); | 285 | if (start_bit >= end_bit) |
164 | if (start_bit >= end_bit) { | ||
165 | spin_unlock_irqrestore(&chunk->lock, flags); | ||
166 | continue; | 286 | continue; |
287 | remain = bitmap_set_ll(chunk->bits, start_bit, nbits); | ||
288 | if (remain) { | ||
289 | remain = bitmap_clear_ll(chunk->bits, start_bit, | ||
290 | nbits - remain); | ||
291 | BUG_ON(remain); | ||
292 | goto retry; | ||
167 | } | 293 | } |
168 | 294 | ||
169 | addr = chunk->start_addr + ((unsigned long)start_bit << order); | 295 | addr = chunk->start_addr + ((unsigned long)start_bit << order); |
170 | 296 | size = nbits << order; | |
171 | bitmap_set(chunk->bits, start_bit, nbits); | 297 | atomic_sub(size, &chunk->avail); |
172 | spin_unlock_irqrestore(&chunk->lock, flags); | 298 | break; |
173 | read_unlock(&pool->lock); | ||
174 | return addr; | ||
175 | } | 299 | } |
176 | read_unlock(&pool->lock); | 300 | rcu_read_unlock(); |
177 | return 0; | 301 | return addr; |
178 | } | 302 | } |
179 | EXPORT_SYMBOL(gen_pool_alloc); | 303 | EXPORT_SYMBOL(gen_pool_alloc); |
180 | 304 | ||
@@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc); | |||
184 | * @addr: starting address of memory to free back to pool | 308 | * @addr: starting address of memory to free back to pool |
185 | * @size: size in bytes of memory to free | 309 | * @size: size in bytes of memory to free |
186 | * | 310 | * |
187 | * Free previously allocated special memory back to the specified pool. | 311 | * Free previously allocated special memory back to the specified |
312 | * pool. Can not be used in NMI handler on architectures without | ||
313 | * NMI-safe cmpxchg implementation. | ||
188 | */ | 314 | */ |
189 | void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) | 315 | void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) |
190 | { | 316 | { |
191 | struct list_head *_chunk; | ||
192 | struct gen_pool_chunk *chunk; | 317 | struct gen_pool_chunk *chunk; |
193 | unsigned long flags; | ||
194 | int order = pool->min_alloc_order; | 318 | int order = pool->min_alloc_order; |
195 | int bit, nbits; | 319 | int start_bit, nbits, remain; |
196 | 320 | ||
197 | nbits = (size + (1UL << order) - 1) >> order; | 321 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG |
198 | 322 | BUG_ON(in_nmi()); | |
199 | read_lock(&pool->lock); | 323 | #endif |
200 | list_for_each(_chunk, &pool->chunks) { | ||
201 | chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); | ||
202 | 324 | ||
325 | nbits = (size + (1UL << order) - 1) >> order; | ||
326 | rcu_read_lock(); | ||
327 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { | ||
203 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { | 328 | if (addr >= chunk->start_addr && addr < chunk->end_addr) { |
204 | BUG_ON(addr + size > chunk->end_addr); | 329 | BUG_ON(addr + size > chunk->end_addr); |
205 | spin_lock_irqsave(&chunk->lock, flags); | 330 | start_bit = (addr - chunk->start_addr) >> order; |
206 | bit = (addr - chunk->start_addr) >> order; | 331 | remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); |
207 | while (nbits--) | 332 | BUG_ON(remain); |
208 | __clear_bit(bit++, chunk->bits); | 333 | size = nbits << order; |
209 | spin_unlock_irqrestore(&chunk->lock, flags); | 334 | atomic_add(size, &chunk->avail); |
210 | break; | 335 | rcu_read_unlock(); |
336 | return; | ||
211 | } | 337 | } |
212 | } | 338 | } |
213 | BUG_ON(nbits > 0); | 339 | rcu_read_unlock(); |
214 | read_unlock(&pool->lock); | 340 | BUG(); |
215 | } | 341 | } |
216 | EXPORT_SYMBOL(gen_pool_free); | 342 | EXPORT_SYMBOL(gen_pool_free); |
343 | |||
344 | /** | ||
345 | * gen_pool_for_each_chunk - call func for every chunk of generic memory pool | ||
346 | * @pool: the generic memory pool | ||
347 | * @func: func to call | ||
348 | * @data: additional data used by @func | ||
349 | * | ||
350 | * Call @func for every chunk of generic memory pool. The @func is | ||
351 | * called with rcu_read_lock held. | ||
352 | */ | ||
353 | void gen_pool_for_each_chunk(struct gen_pool *pool, | ||
354 | void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data), | ||
355 | void *data) | ||
356 | { | ||
357 | struct gen_pool_chunk *chunk; | ||
358 | |||
359 | rcu_read_lock(); | ||
360 | list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) | ||
361 | func(pool, chunk, data); | ||
362 | rcu_read_unlock(); | ||
363 | } | ||
364 | EXPORT_SYMBOL(gen_pool_for_each_chunk); | ||
365 | |||
366 | /** | ||
367 | * gen_pool_avail - get available free space of the pool | ||
368 | * @pool: pool to get available free space | ||
369 | * | ||
370 | * Return available free space of the specified pool. | ||
371 | */ | ||
372 | size_t gen_pool_avail(struct gen_pool *pool) | ||
373 | { | ||
374 | struct gen_pool_chunk *chunk; | ||
375 | size_t avail = 0; | ||
376 | |||
377 | rcu_read_lock(); | ||
378 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) | ||
379 | avail += atomic_read(&chunk->avail); | ||
380 | rcu_read_unlock(); | ||
381 | return avail; | ||
382 | } | ||
383 | EXPORT_SYMBOL_GPL(gen_pool_avail); | ||
384 | |||
385 | /** | ||
386 | * gen_pool_size - get size in bytes of memory managed by the pool | ||
387 | * @pool: pool to get size | ||
388 | * | ||
389 | * Return size in bytes of memory managed by the pool. | ||
390 | */ | ||
391 | size_t gen_pool_size(struct gen_pool *pool) | ||
392 | { | ||
393 | struct gen_pool_chunk *chunk; | ||
394 | size_t size = 0; | ||
395 | |||
396 | rcu_read_lock(); | ||
397 | list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) | ||
398 | size += chunk->end_addr - chunk->start_addr; | ||
399 | rcu_read_unlock(); | ||
400 | return size; | ||
401 | } | ||
402 | EXPORT_SYMBOL_GPL(gen_pool_size); | ||
diff --git a/lib/llist.c b/lib/llist.c new file mode 100644 index 000000000000..da445724fa1f --- /dev/null +++ b/lib/llist.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* | ||
2 | * Lock-less NULL terminated single linked list | ||
3 | * | ||
4 | * The basic atomic operation of this list is cmpxchg on long. On | ||
5 | * architectures that don't have NMI-safe cmpxchg implementation, the | ||
6 | * list can NOT be used in NMI handler. So code uses the list in NMI | ||
7 | * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. | ||
8 | * | ||
9 | * Copyright 2010,2011 Intel Corp. | ||
10 | * Author: Huang Ying <ying.huang@intel.com> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License version | ||
14 | * 2 as published by the Free Software Foundation; | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | */ | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/interrupt.h> | ||
28 | #include <linux/llist.h> | ||
29 | |||
30 | #include <asm/system.h> | ||
31 | |||
32 | /** | ||
33 | * llist_add - add a new entry | ||
34 | * @new: new entry to be added | ||
35 | * @head: the head for your lock-less list | ||
36 | */ | ||
37 | void llist_add(struct llist_node *new, struct llist_head *head) | ||
38 | { | ||
39 | struct llist_node *entry, *old_entry; | ||
40 | |||
41 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
42 | BUG_ON(in_nmi()); | ||
43 | #endif | ||
44 | |||
45 | entry = head->first; | ||
46 | do { | ||
47 | old_entry = entry; | ||
48 | new->next = entry; | ||
49 | cpu_relax(); | ||
50 | } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); | ||
51 | } | ||
52 | EXPORT_SYMBOL_GPL(llist_add); | ||
53 | |||
54 | /** | ||
55 | * llist_add_batch - add several linked entries in batch | ||
56 | * @new_first: first entry in batch to be added | ||
57 | * @new_last: last entry in batch to be added | ||
58 | * @head: the head for your lock-less list | ||
59 | */ | ||
60 | void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, | ||
61 | struct llist_head *head) | ||
62 | { | ||
63 | struct llist_node *entry, *old_entry; | ||
64 | |||
65 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
66 | BUG_ON(in_nmi()); | ||
67 | #endif | ||
68 | |||
69 | entry = head->first; | ||
70 | do { | ||
71 | old_entry = entry; | ||
72 | new_last->next = entry; | ||
73 | cpu_relax(); | ||
74 | } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry); | ||
75 | } | ||
76 | EXPORT_SYMBOL_GPL(llist_add_batch); | ||
77 | |||
78 | /** | ||
79 | * llist_del_first - delete the first entry of lock-less list | ||
80 | * @head: the head for your lock-less list | ||
81 | * | ||
82 | * If list is empty, return NULL, otherwise, return the first entry | ||
83 | * deleted, this is the newest added one. | ||
84 | * | ||
85 | * Only one llist_del_first user can be used simultaneously with | ||
86 | * multiple llist_add users without lock. Because otherwise | ||
87 | * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, | ||
88 | * llist_add) sequence in another user may change @head->first->next, | ||
89 | * but keep @head->first. If multiple consumers are needed, please | ||
90 | * use llist_del_all or use lock between consumers. | ||
91 | */ | ||
92 | struct llist_node *llist_del_first(struct llist_head *head) | ||
93 | { | ||
94 | struct llist_node *entry, *old_entry, *next; | ||
95 | |||
96 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
97 | BUG_ON(in_nmi()); | ||
98 | #endif | ||
99 | |||
100 | entry = head->first; | ||
101 | do { | ||
102 | if (entry == NULL) | ||
103 | return NULL; | ||
104 | old_entry = entry; | ||
105 | next = entry->next; | ||
106 | cpu_relax(); | ||
107 | } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry); | ||
108 | |||
109 | return entry; | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(llist_del_first); | ||
112 | |||
113 | /** | ||
114 | * llist_del_all - delete all entries from lock-less list | ||
115 | * @head: the head of lock-less list to delete all entries | ||
116 | * | ||
117 | * If list is empty, return NULL, otherwise, delete all entries and | ||
118 | * return the pointer to the first entry. The order of entries | ||
119 | * deleted is from the newest to the oldest added one. | ||
120 | */ | ||
121 | struct llist_node *llist_del_all(struct llist_head *head) | ||
122 | { | ||
123 | #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG | ||
124 | BUG_ON(in_nmi()); | ||
125 | #endif | ||
126 | |||
127 | return xchg(&head->first, NULL); | ||
128 | } | ||
129 | EXPORT_SYMBOL_GPL(llist_del_all); | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 740c4f52059c..2b43ba051ac9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/hugetlb.h> | 53 | #include <linux/hugetlb.h> |
54 | #include <linux/memory_hotplug.h> | 54 | #include <linux/memory_hotplug.h> |
55 | #include <linux/mm_inline.h> | 55 | #include <linux/mm_inline.h> |
56 | #include <linux/kfifo.h> | ||
56 | #include "internal.h" | 57 | #include "internal.h" |
57 | 58 | ||
58 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 59 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno) | |||
1178 | __memory_failure(pfn, trapno, 0); | 1179 | __memory_failure(pfn, trapno, 0); |
1179 | } | 1180 | } |
1180 | 1181 | ||
1182 | #define MEMORY_FAILURE_FIFO_ORDER 4 | ||
1183 | #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) | ||
1184 | |||
1185 | struct memory_failure_entry { | ||
1186 | unsigned long pfn; | ||
1187 | int trapno; | ||
1188 | int flags; | ||
1189 | }; | ||
1190 | |||
1191 | struct memory_failure_cpu { | ||
1192 | DECLARE_KFIFO(fifo, struct memory_failure_entry, | ||
1193 | MEMORY_FAILURE_FIFO_SIZE); | ||
1194 | spinlock_t lock; | ||
1195 | struct work_struct work; | ||
1196 | }; | ||
1197 | |||
1198 | static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu); | ||
1199 | |||
1200 | /** | ||
1201 | * memory_failure_queue - Schedule handling memory failure of a page. | ||
1202 | * @pfn: Page Number of the corrupted page | ||
1203 | * @trapno: Trap number reported in the signal to user space. | ||
1204 | * @flags: Flags for memory failure handling | ||
1205 | * | ||
1206 | * This function is called by the low level hardware error handler | ||
1207 | * when it detects hardware memory corruption of a page. It schedules | ||
1208 | * the recovering of error page, including dropping pages, killing | ||
1209 | * processes etc. | ||
1210 | * | ||
1211 | * The function is primarily of use for corruptions that | ||
1212 | * happen outside the current execution context (e.g. when | ||
1213 | * detected by a background scrubber) | ||
1214 | * | ||
1215 | * Can run in IRQ context. | ||
1216 | */ | ||
1217 | void memory_failure_queue(unsigned long pfn, int trapno, int flags) | ||
1218 | { | ||
1219 | struct memory_failure_cpu *mf_cpu; | ||
1220 | unsigned long proc_flags; | ||
1221 | struct memory_failure_entry entry = { | ||
1222 | .pfn = pfn, | ||
1223 | .trapno = trapno, | ||
1224 | .flags = flags, | ||
1225 | }; | ||
1226 | |||
1227 | mf_cpu = &get_cpu_var(memory_failure_cpu); | ||
1228 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); | ||
1229 | if (kfifo_put(&mf_cpu->fifo, &entry)) | ||
1230 | schedule_work_on(smp_processor_id(), &mf_cpu->work); | ||
1231 | else | ||
1232 | pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n", | ||
1233 | pfn); | ||
1234 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | ||
1235 | put_cpu_var(memory_failure_cpu); | ||
1236 | } | ||
1237 | EXPORT_SYMBOL_GPL(memory_failure_queue); | ||
1238 | |||
1239 | static void memory_failure_work_func(struct work_struct *work) | ||
1240 | { | ||
1241 | struct memory_failure_cpu *mf_cpu; | ||
1242 | struct memory_failure_entry entry = { 0, }; | ||
1243 | unsigned long proc_flags; | ||
1244 | int gotten; | ||
1245 | |||
1246 | mf_cpu = &__get_cpu_var(memory_failure_cpu); | ||
1247 | for (;;) { | ||
1248 | spin_lock_irqsave(&mf_cpu->lock, proc_flags); | ||
1249 | gotten = kfifo_get(&mf_cpu->fifo, &entry); | ||
1250 | spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); | ||
1251 | if (!gotten) | ||
1252 | break; | ||
1253 | __memory_failure(entry.pfn, entry.trapno, entry.flags); | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1257 | static int __init memory_failure_init(void) | ||
1258 | { | ||
1259 | struct memory_failure_cpu *mf_cpu; | ||
1260 | int cpu; | ||
1261 | |||
1262 | for_each_possible_cpu(cpu) { | ||
1263 | mf_cpu = &per_cpu(memory_failure_cpu, cpu); | ||
1264 | spin_lock_init(&mf_cpu->lock); | ||
1265 | INIT_KFIFO(mf_cpu->fifo); | ||
1266 | INIT_WORK(&mf_cpu->work, memory_failure_work_func); | ||
1267 | } | ||
1268 | |||
1269 | return 0; | ||
1270 | } | ||
1271 | core_initcall(memory_failure_init); | ||
1272 | |||
1181 | /** | 1273 | /** |
1182 | * unpoison_memory - Unpoison a previously poisoned page | 1274 | * unpoison_memory - Unpoison a previously poisoned page |
1183 | * @pfn: Page number of the to be unpoisoned page | 1275 | * @pfn: Page number of the to be unpoisoned page |