aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2013-02-19 19:35:41 -0500
committerMauro Carvalho Chehab <mchehab@redhat.com>2013-02-25 17:42:17 -0500
commit8ae8f50ad8979bb670598ff92eebea611b799f10 (patch)
tree4e30597383799571dd7bb025f521fa9504678e09
parent689c9cd8128f13bf9843a3e133423f5e3e0ce4aa (diff)
ghes_edac: Fix RAS tracing
With the current version of CPER, there's no way to associate an error with the memory error. So, the error location in EDAC layers is unused. As CPER has its own idea about memory architectural layers, just output whatever is there inside the driver's detail at the RAS tracepoint. The EDAC location keeps untouched, in the case that, in some future, we could actually map the error into the dimm labels. Now, the error message: [ 72.396625] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 0 [ 72.396627] {1}[Hardware Error]: APEI generic hardware error status [ 72.396628] {1}[Hardware Error]: severity: 2, corrected [ 72.396630] {1}[Hardware Error]: section: 0, severity: 2, corrected [ 72.396632] {1}[Hardware Error]: flags: 0x01 [ 72.396634] {1}[Hardware Error]: primary [ 72.396635] {1}[Hardware Error]: section_type: memory error [ 72.396637] {1}[Hardware Error]: error_status: 0x0000000000000400 [ 72.396638] {1}[Hardware Error]: node: 3 [ 72.396639] {1}[Hardware Error]: card: 0 [ 72.396640] {1}[Hardware Error]: module: 0 [ 72.396641] {1}[Hardware Error]: device: 0 [ 72.396643] {1}[Hardware Error]: error_type: 18, unknown [ 72.396666] EDAC MC0: 1 CE reserved error (18) on unknown label (node:3 card:0 module:0 page:0x0 offset:0x0 grain:0 syndrome:0x0 - status(0x0000000000000400): Storage error in DRAM memory) Is properly represented on the trace event: kworker/0:2-584 [000] .... 72.396657: mc_event: 1 Corrected error: reserved error (18) on unknown label (mc:0 location:-1:-1:-1 address:0x00000000 grain:1 syndrome:0x00000000 APEI location: node:3 card:0 module:0 status(0x0000000000000400): Storage error in DRAM memory) Tested on a 4 sockets E5-4650 Sandy Bridge machine. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
-rw-r--r--drivers/edac/ghes_edac.c13
1 files changed, 13 insertions, 0 deletions
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 1bde45141073..636dcf18d5b6 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -15,6 +15,7 @@
15#include <linux/edac.h> 15#include <linux/edac.h>
16#include <linux/dmi.h> 16#include <linux/dmi.h>
17#include "edac_core.h" 17#include "edac_core.h"
18#include <ras/ras_event.h>
18 19
19#define GHES_EDAC_REVISION " Ver: 1.0.0" 20#define GHES_EDAC_REVISION " Ver: 1.0.0"
20 21
@@ -24,6 +25,7 @@ struct ghes_edac_pvt {
24 struct mem_ctl_info *mci; 25 struct mem_ctl_info *mci;
25 26
26 /* Buffers for the error handling routine */ 27 /* Buffers for the error handling routine */
28 char detail_location[240];
27 char other_detail[160]; 29 char other_detail[160];
28 char msg[80]; 30 char msg[80];
29}; 31};
@@ -191,6 +193,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
191 struct mem_ctl_info *mci; 193 struct mem_ctl_info *mci;
192 struct ghes_edac_pvt *pvt = NULL; 194 struct ghes_edac_pvt *pvt = NULL;
193 char *p; 195 char *p;
196 u8 grain_bits;
194 197
195 list_for_each_entry(pvt, &ghes_reglist, list) { 198 list_for_each_entry(pvt, &ghes_reglist, list) {
196 if (ghes == pvt->ghes) 199 if (ghes == pvt->ghes)
@@ -398,6 +401,16 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
398 if (p > pvt->other_detail) 401 if (p > pvt->other_detail)
399 *(p - 1) = '\0'; 402 *(p - 1) = '\0';
400 403
404 /* Generate the trace event */
405 grain_bits = fls_long(e->grain);
406 sprintf(pvt->detail_location, "APEI location: %s %s",
407 e->location, e->other_detail);
408 trace_mc_event(type, e->msg, e->label, e->error_count,
409 mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
410 PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
411 grain_bits, e->syndrome, pvt->detail_location);
412
413 /* Report the error via EDAC API */
401 edac_raw_mc_handle_error(type, mci, e); 414 edac_raw_mc_handle_error(type, mci, e);
402} 415}
403EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); 416EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);