diff options
author | Chen, Gong <gong.chen@linux.intel.com> | 2014-06-17 22:33:07 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2014-06-25 16:26:47 -0400 |
commit | 2dfb7d51a61d7ca91b131c8db612f27d9390f2d5 (patch) | |
tree | b2e9375f1ffaf2dc93418d78c27b6a13b34c8e88 | |
parent | d963cd95bea93b7db9390a71d1e2cabbb3b2c3ea (diff) |
trace, RAS: Add eMCA trace event interface
Add trace interface to elaborate all H/W error related information.
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | drivers/acpi/Kconfig | 4 | ||||
-rw-r--r-- | drivers/acpi/acpi_extlog.c | 27 | ||||
-rw-r--r-- | drivers/firmware/efi/cper.c | 45 | ||||
-rw-r--r-- | drivers/ras/ras.c | 3 | ||||
-rw-r--r-- | include/linux/cper.h | 23 | ||||
-rw-r--r-- | include/ras/ras_event.h | 64 |
6 files changed, 158 insertions, 8 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index a34a22841002..206942b8d105 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig | |||
@@ -370,6 +370,7 @@ config ACPI_EXTLOG | |||
370 | tristate "Extended Error Log support" | 370 | tristate "Extended Error Log support" |
371 | depends on X86_MCE && X86_LOCAL_APIC | 371 | depends on X86_MCE && X86_LOCAL_APIC |
372 | select UEFI_CPER | 372 | select UEFI_CPER |
373 | select RAS | ||
373 | default n | 374 | default n |
374 | help | 375 | help |
375 | Certain usages such as Predictive Failure Analysis (PFA) require | 376 | Certain usages such as Predictive Failure Analysis (PFA) require |
@@ -384,6 +385,7 @@ config ACPI_EXTLOG | |||
384 | 385 | ||
385 | Enhanced MCA Logging allows firmware to provide additional error | 386 | Enhanced MCA Logging allows firmware to provide additional error |
386 | information to system software, synchronous with MCE or CMCI. This | 387 | information to system software, synchronous with MCE or CMCI. This |
387 | driver adds support for that functionality. | 388 | driver adds support for that functionality with corresponding |
389 | tracepoint which carries that information to userspace. | ||
388 | 390 | ||
389 | endif # ACPI | 391 | endif # ACPI |
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 185334114d71..e61da957f30f 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/mce.h> | 16 | #include <asm/mce.h> |
17 | 17 | ||
18 | #include "apei/apei-internal.h" | 18 | #include "apei/apei-internal.h" |
19 | #include <ras/ras_event.h> | ||
19 | 20 | ||
20 | #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ | 21 | #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ |
21 | 22 | ||
@@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, | |||
137 | struct mce *mce = (struct mce *)data; | 138 | struct mce *mce = (struct mce *)data; |
138 | int bank = mce->bank; | 139 | int bank = mce->bank; |
139 | int cpu = mce->extcpu; | 140 | int cpu = mce->extcpu; |
140 | struct acpi_generic_status *estatus; | 141 | struct acpi_generic_status *estatus, *tmp; |
141 | int rc; | 142 | struct acpi_generic_data *gdata; |
143 | const uuid_le *fru_id = &NULL_UUID_LE; | ||
144 | char *fru_text = ""; | ||
145 | uuid_le *sec_type; | ||
146 | static u32 err_seq; | ||
142 | 147 | ||
143 | estatus = extlog_elog_entry_check(cpu, bank); | 148 | estatus = extlog_elog_entry_check(cpu, bank); |
144 | if (estatus == NULL) | 149 | if (estatus == NULL) |
@@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, | |||
148 | /* clear record status to enable BIOS to update it again */ | 153 | /* clear record status to enable BIOS to update it again */ |
149 | estatus->block_status = 0; | 154 | estatus->block_status = 0; |
150 | 155 | ||
151 | rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); | 156 | tmp = (struct acpi_generic_status *)elog_buf; |
157 | print_extlog_rcd(NULL, tmp, cpu); | ||
158 | |||
159 | /* log event via trace */ | ||
160 | err_seq++; | ||
161 | gdata = (struct acpi_generic_data *)(tmp + 1); | ||
162 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) | ||
163 | fru_id = (uuid_le *)gdata->fru_id; | ||
164 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) | ||
165 | fru_text = gdata->fru_text; | ||
166 | sec_type = (uuid_le *)gdata->section_type; | ||
167 | if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { | ||
168 | struct cper_sec_mem_err *mem = (void *)(gdata + 1); | ||
169 | if (gdata->error_data_length >= sizeof(*mem)) | ||
170 | trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, | ||
171 | (u8)gdata->error_severity); | ||
172 | } | ||
152 | 173 | ||
153 | return NOTIFY_STOP; | 174 | return NOTIFY_STOP; |
154 | } | 175 | } |
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index ac33a9fed341..437e6fd47311 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c | |||
@@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype) | |||
207 | } | 207 | } |
208 | EXPORT_SYMBOL_GPL(cper_mem_err_type_str); | 208 | EXPORT_SYMBOL_GPL(cper_mem_err_type_str); |
209 | 209 | ||
210 | static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) | 210 | static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) |
211 | { | 211 | { |
212 | u32 len, n; | 212 | u32 len, n; |
213 | 213 | ||
@@ -249,7 +249,7 @@ static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) | |||
249 | return n; | 249 | return n; |
250 | } | 250 | } |
251 | 251 | ||
252 | static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) | 252 | static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) |
253 | { | 253 | { |
254 | u32 len, n; | 254 | u32 len, n; |
255 | const char *bank = NULL, *device = NULL; | 255 | const char *bank = NULL, *device = NULL; |
@@ -271,8 +271,44 @@ static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) | |||
271 | return n; | 271 | return n; |
272 | } | 272 | } |
273 | 273 | ||
274 | void cper_mem_err_pack(const struct cper_sec_mem_err *mem, | ||
275 | struct cper_mem_err_compact *cmem) | ||
276 | { | ||
277 | cmem->validation_bits = mem->validation_bits; | ||
278 | cmem->node = mem->node; | ||
279 | cmem->card = mem->card; | ||
280 | cmem->module = mem->module; | ||
281 | cmem->bank = mem->bank; | ||
282 | cmem->device = mem->device; | ||
283 | cmem->row = mem->row; | ||
284 | cmem->column = mem->column; | ||
285 | cmem->bit_pos = mem->bit_pos; | ||
286 | cmem->requestor_id = mem->requestor_id; | ||
287 | cmem->responder_id = mem->responder_id; | ||
288 | cmem->target_id = mem->target_id; | ||
289 | cmem->rank = mem->rank; | ||
290 | cmem->mem_array_handle = mem->mem_array_handle; | ||
291 | cmem->mem_dev_handle = mem->mem_dev_handle; | ||
292 | } | ||
293 | |||
294 | const char *cper_mem_err_unpack(struct trace_seq *p, | ||
295 | struct cper_mem_err_compact *cmem) | ||
296 | { | ||
297 | const char *ret = p->buffer + p->len; | ||
298 | |||
299 | if (cper_mem_err_location(cmem, rcd_decode_str)) | ||
300 | trace_seq_printf(p, "%s", rcd_decode_str); | ||
301 | if (cper_dimm_err_location(cmem, rcd_decode_str)) | ||
302 | trace_seq_printf(p, "%s", rcd_decode_str); | ||
303 | trace_seq_putc(p, '\0'); | ||
304 | |||
305 | return ret; | ||
306 | } | ||
307 | |||
274 | static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) | 308 | static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) |
275 | { | 309 | { |
310 | struct cper_mem_err_compact cmem; | ||
311 | |||
276 | if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) | 312 | if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) |
277 | printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); | 313 | printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); |
278 | if (mem->validation_bits & CPER_MEM_VALID_PA) | 314 | if (mem->validation_bits & CPER_MEM_VALID_PA) |
@@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) | |||
281 | if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) | 317 | if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) |
282 | printk("%s""physical_address_mask: 0x%016llx\n", | 318 | printk("%s""physical_address_mask: 0x%016llx\n", |
283 | pfx, mem->physical_addr_mask); | 319 | pfx, mem->physical_addr_mask); |
284 | if (cper_mem_err_location(mem, rcd_decode_str)) | 320 | cper_mem_err_pack(mem, &cmem); |
321 | if (cper_mem_err_location(&cmem, rcd_decode_str)) | ||
285 | printk("%s%s\n", pfx, rcd_decode_str); | 322 | printk("%s%s\n", pfx, rcd_decode_str); |
286 | if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { | 323 | if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { |
287 | u8 etype = mem->error_type; | 324 | u8 etype = mem->error_type; |
288 | printk("%s""error_type: %d, %s\n", pfx, etype, | 325 | printk("%s""error_type: %d, %s\n", pfx, etype, |
289 | cper_mem_err_type_str(etype)); | 326 | cper_mem_err_type_str(etype)); |
290 | } | 327 | } |
291 | if (cper_dimm_err_location(mem, rcd_decode_str)) | 328 | if (cper_dimm_err_location(&cmem, rcd_decode_str)) |
292 | printk("%s%s\n", pfx, rcd_decode_str); | 329 | printk("%s%s\n", pfx, rcd_decode_str); |
293 | } | 330 | } |
294 | 331 | ||
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 4cac43a1e25c..b67dd362b7b6 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c | |||
@@ -23,4 +23,7 @@ static int __init ras_init(void) | |||
23 | } | 23 | } |
24 | subsys_initcall(ras_init); | 24 | subsys_initcall(ras_init); |
25 | 25 | ||
26 | #if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) | ||
27 | EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); | ||
28 | #endif | ||
26 | EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); | 29 | EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); |
diff --git a/include/linux/cper.h b/include/linux/cper.h index ed088b9c1298..76abba4b238e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #define LINUX_CPER_H | 22 | #define LINUX_CPER_H |
23 | 23 | ||
24 | #include <linux/uuid.h> | 24 | #include <linux/uuid.h> |
25 | #include <linux/trace_seq.h> | ||
25 | 26 | ||
26 | /* CPER record signature and the size */ | 27 | /* CPER record signature and the size */ |
27 | #define CPER_SIG_RECORD "CPER" | 28 | #define CPER_SIG_RECORD "CPER" |
@@ -363,6 +364,24 @@ struct cper_sec_mem_err { | |||
363 | __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ | 364 | __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ |
364 | }; | 365 | }; |
365 | 366 | ||
367 | struct cper_mem_err_compact { | ||
368 | __u64 validation_bits; | ||
369 | __u16 node; | ||
370 | __u16 card; | ||
371 | __u16 module; | ||
372 | __u16 bank; | ||
373 | __u16 device; | ||
374 | __u16 row; | ||
375 | __u16 column; | ||
376 | __u16 bit_pos; | ||
377 | __u64 requestor_id; | ||
378 | __u64 responder_id; | ||
379 | __u64 target_id; | ||
380 | __u16 rank; | ||
381 | __u16 mem_array_handle; | ||
382 | __u16 mem_dev_handle; | ||
383 | }; | ||
384 | |||
366 | struct cper_sec_pcie { | 385 | struct cper_sec_pcie { |
367 | __u64 validation_bits; | 386 | __u64 validation_bits; |
368 | __u32 port_type; | 387 | __u32 port_type; |
@@ -406,5 +425,9 @@ const char *cper_severity_str(unsigned int); | |||
406 | const char *cper_mem_err_type_str(unsigned int); | 425 | const char *cper_mem_err_type_str(unsigned int); |
407 | void cper_print_bits(const char *prefix, unsigned int bits, | 426 | void cper_print_bits(const char *prefix, unsigned int bits, |
408 | const char * const strs[], unsigned int strs_size); | 427 | const char * const strs[], unsigned int strs_size); |
428 | void cper_mem_err_pack(const struct cper_sec_mem_err *, | ||
429 | struct cper_mem_err_compact *); | ||
430 | const char *cper_mem_err_unpack(struct trace_seq *, | ||
431 | struct cper_mem_err_compact *); | ||
409 | 432 | ||
410 | #endif | 433 | #endif |
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index acbcbb88eaaa..47da53c27ffa 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h | |||
@@ -9,6 +9,70 @@ | |||
9 | #include <linux/edac.h> | 9 | #include <linux/edac.h> |
10 | #include <linux/ktime.h> | 10 | #include <linux/ktime.h> |
11 | #include <linux/aer.h> | 11 | #include <linux/aer.h> |
12 | #include <linux/cper.h> | ||
13 | |||
14 | /* | ||
15 | * MCE Extended Error Log trace event | ||
16 | * | ||
17 | * These events are generated when hardware detects a corrected or | ||
18 | * uncorrected event. | ||
19 | */ | ||
20 | |||
21 | /* memory trace event */ | ||
22 | |||
23 | #if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) | ||
24 | TRACE_EVENT(extlog_mem_event, | ||
25 | TP_PROTO(struct cper_sec_mem_err *mem, | ||
26 | u32 err_seq, | ||
27 | const uuid_le *fru_id, | ||
28 | const char *fru_text, | ||
29 | u8 sev), | ||
30 | |||
31 | TP_ARGS(mem, err_seq, fru_id, fru_text, sev), | ||
32 | |||
33 | TP_STRUCT__entry( | ||
34 | __field(u32, err_seq) | ||
35 | __field(u8, etype) | ||
36 | __field(u8, sev) | ||
37 | __field(u64, pa) | ||
38 | __field(u8, pa_mask_lsb) | ||
39 | __field_struct(uuid_le, fru_id) | ||
40 | __string(fru_text, fru_text) | ||
41 | __field_struct(struct cper_mem_err_compact, data) | ||
42 | ), | ||
43 | |||
44 | TP_fast_assign( | ||
45 | __entry->err_seq = err_seq; | ||
46 | if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) | ||
47 | __entry->etype = mem->error_type; | ||
48 | else | ||
49 | __entry->etype = ~0; | ||
50 | __entry->sev = sev; | ||
51 | if (mem->validation_bits & CPER_MEM_VALID_PA) | ||
52 | __entry->pa = mem->physical_addr; | ||
53 | else | ||
54 | __entry->pa = ~0ull; | ||
55 | |||
56 | if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) | ||
57 | __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); | ||
58 | else | ||
59 | __entry->pa_mask_lsb = ~0; | ||
60 | __entry->fru_id = *fru_id; | ||
61 | __assign_str(fru_text, fru_text); | ||
62 | cper_mem_err_pack(mem, &__entry->data); | ||
63 | ), | ||
64 | |||
65 | TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", | ||
66 | __entry->err_seq, | ||
67 | cper_severity_str(__entry->sev), | ||
68 | cper_mem_err_type_str(__entry->etype), | ||
69 | __entry->pa, | ||
70 | __entry->pa_mask_lsb, | ||
71 | cper_mem_err_unpack(p, &__entry->data), | ||
72 | &__entry->fru_id, | ||
73 | __get_str(fru_text)) | ||
74 | ); | ||
75 | #endif | ||
12 | 76 | ||
13 | /* | 77 | /* |
14 | * Hardware Events Report | 78 | * Hardware Events Report |