diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/edac_core.h | 8 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 72 |
2 files changed, 60 insertions, 20 deletions
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index f06ce9ab692c..740c7e22c023 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h | |||
@@ -463,12 +463,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
463 | const unsigned long page_frame_number, | 463 | const unsigned long page_frame_number, |
464 | const unsigned long offset_in_page, | 464 | const unsigned long offset_in_page, |
465 | const unsigned long syndrome, | 465 | const unsigned long syndrome, |
466 | const int layer0, | 466 | const int top_layer, |
467 | const int layer1, | 467 | const int mid_layer, |
468 | const int layer2, | 468 | const int low_layer, |
469 | const char *msg, | 469 | const char *msg, |
470 | const char *other_detail, | 470 | const char *other_detail, |
471 | const void *mcelog); | 471 | const void *arch_log); |
472 | 472 | ||
473 | /* | 473 | /* |
474 | * edac_device APIs | 474 | * edac_device APIs |
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 10f375032e96..ce25750a83f9 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c | |||
@@ -27,12 +27,17 @@ | |||
27 | #include <linux/list.h> | 27 | #include <linux/list.h> |
28 | #include <linux/ctype.h> | 28 | #include <linux/ctype.h> |
29 | #include <linux/edac.h> | 29 | #include <linux/edac.h> |
30 | #include <linux/bitops.h> | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <asm/page.h> | 32 | #include <asm/page.h> |
32 | #include <asm/edac.h> | 33 | #include <asm/edac.h> |
33 | #include "edac_core.h" | 34 | #include "edac_core.h" |
34 | #include "edac_module.h" | 35 | #include "edac_module.h" |
35 | 36 | ||
37 | #define CREATE_TRACE_POINTS | ||
38 | #define TRACE_INCLUDE_PATH ../../include/ras | ||
39 | #include <ras/ras_event.h> | ||
40 | |||
36 | /* lock to memory controller's control array */ | 41 | /* lock to memory controller's control array */ |
37 | static DEFINE_MUTEX(mem_ctls_mutex); | 42 | static DEFINE_MUTEX(mem_ctls_mutex); |
38 | static LIST_HEAD(mc_devices); | 43 | static LIST_HEAD(mc_devices); |
@@ -384,6 +389,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, | |||
384 | * which will perform kobj unregistration and the actual free | 389 | * which will perform kobj unregistration and the actual free |
385 | * will occur during the kobject callback operation | 390 | * will occur during the kobject callback operation |
386 | */ | 391 | */ |
392 | |||
387 | return mci; | 393 | return mci; |
388 | } | 394 | } |
389 | EXPORT_SYMBOL_GPL(edac_mc_alloc); | 395 | EXPORT_SYMBOL_GPL(edac_mc_alloc); |
@@ -902,19 +908,19 @@ static void edac_ce_error(struct mem_ctl_info *mci, | |||
902 | const bool enable_per_layer_report, | 908 | const bool enable_per_layer_report, |
903 | const unsigned long page_frame_number, | 909 | const unsigned long page_frame_number, |
904 | const unsigned long offset_in_page, | 910 | const unsigned long offset_in_page, |
905 | u32 grain) | 911 | long grain) |
906 | { | 912 | { |
907 | unsigned long remapped_page; | 913 | unsigned long remapped_page; |
908 | 914 | ||
909 | if (edac_mc_get_log_ce()) { | 915 | if (edac_mc_get_log_ce()) { |
910 | if (other_detail && *other_detail) | 916 | if (other_detail && *other_detail) |
911 | edac_mc_printk(mci, KERN_WARNING, | 917 | edac_mc_printk(mci, KERN_WARNING, |
912 | "CE %s on %s (%s%s - %s)\n", | 918 | "CE %s on %s (%s %s - %s)\n", |
913 | msg, label, location, | 919 | msg, label, location, |
914 | detail, other_detail); | 920 | detail, other_detail); |
915 | else | 921 | else |
916 | edac_mc_printk(mci, KERN_WARNING, | 922 | edac_mc_printk(mci, KERN_WARNING, |
917 | "CE %s on %s (%s%s)\n", | 923 | "CE %s on %s (%s %s)\n", |
918 | msg, label, location, | 924 | msg, label, location, |
919 | detail); | 925 | detail); |
920 | } | 926 | } |
@@ -953,12 +959,12 @@ static void edac_ue_error(struct mem_ctl_info *mci, | |||
953 | if (edac_mc_get_log_ue()) { | 959 | if (edac_mc_get_log_ue()) { |
954 | if (other_detail && *other_detail) | 960 | if (other_detail && *other_detail) |
955 | edac_mc_printk(mci, KERN_WARNING, | 961 | edac_mc_printk(mci, KERN_WARNING, |
956 | "UE %s on %s (%s%s - %s)\n", | 962 | "UE %s on %s (%s %s - %s)\n", |
957 | msg, label, location, detail, | 963 | msg, label, location, detail, |
958 | other_detail); | 964 | other_detail); |
959 | else | 965 | else |
960 | edac_mc_printk(mci, KERN_WARNING, | 966 | edac_mc_printk(mci, KERN_WARNING, |
961 | "UE %s on %s (%s%s)\n", | 967 | "UE %s on %s (%s %s)\n", |
962 | msg, label, location, detail); | 968 | msg, label, location, detail); |
963 | } | 969 | } |
964 | 970 | ||
@@ -975,27 +981,50 @@ static void edac_ue_error(struct mem_ctl_info *mci, | |||
975 | } | 981 | } |
976 | 982 | ||
977 | #define OTHER_LABEL " or " | 983 | #define OTHER_LABEL " or " |
984 | |||
985 | /** | ||
986 | * edac_mc_handle_error - reports a memory event to userspace | ||
987 | * | ||
988 | * @type: severity of the error (CE/UE/Fatal) | ||
989 | * @mci: a struct mem_ctl_info pointer | ||
990 | * @page_frame_number: mem page where the error occurred | ||
991 | * @offset_in_page: offset of the error inside the page | ||
992 | * @syndrome: ECC syndrome | ||
993 | * @top_layer: Memory layer[0] position | ||
994 | * @mid_layer: Memory layer[1] position | ||
995 | * @low_layer: Memory layer[2] position | ||
996 | * @msg: Message meaningful to the end users that | ||
997 | * explains the event | ||
998 | * @other_detail: Technical details about the event that | ||
999 | * may help hardware manufacturers and | ||
1000 | * EDAC developers to analyse the event | ||
1001 | * @arch_log: Architecture-specific struct that can | ||
1002 | * be used to add extended information to the | ||
1003 | * tracepoint, like dumping MCE registers. | ||
1004 | */ | ||
978 | void edac_mc_handle_error(const enum hw_event_mc_err_type type, | 1005 | void edac_mc_handle_error(const enum hw_event_mc_err_type type, |
979 | struct mem_ctl_info *mci, | 1006 | struct mem_ctl_info *mci, |
980 | const unsigned long page_frame_number, | 1007 | const unsigned long page_frame_number, |
981 | const unsigned long offset_in_page, | 1008 | const unsigned long offset_in_page, |
982 | const unsigned long syndrome, | 1009 | const unsigned long syndrome, |
983 | const int layer0, | 1010 | const int top_layer, |
984 | const int layer1, | 1011 | const int mid_layer, |
985 | const int layer2, | 1012 | const int low_layer, |
986 | const char *msg, | 1013 | const char *msg, |
987 | const char *other_detail, | 1014 | const char *other_detail, |
988 | const void *mcelog) | 1015 | const void *arch_log) |
989 | { | 1016 | { |
990 | /* FIXME: too much for stack: move it to some pre-alocated area */ | 1017 | /* FIXME: too much for stack: move it to some pre-alocated area */ |
991 | char detail[80], location[80]; | 1018 | char detail[80], location[80]; |
992 | char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; | 1019 | char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; |
993 | char *p; | 1020 | char *p; |
994 | int row = -1, chan = -1; | 1021 | int row = -1, chan = -1; |
995 | int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 }; | 1022 | int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; |
996 | int i; | 1023 | int i; |
997 | u32 grain; | 1024 | long grain; |
998 | bool enable_per_layer_report = false; | 1025 | bool enable_per_layer_report = false; |
1026 | u16 error_count; /* FIXME: make it a parameter */ | ||
1027 | u8 grain_bits; | ||
999 | 1028 | ||
1000 | debugf3("MC%d: %s()\n", mci->mc_idx, __func__); | 1029 | debugf3("MC%d: %s()\n", mci->mc_idx, __func__); |
1001 | 1030 | ||
@@ -1045,11 +1074,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1045 | for (i = 0; i < mci->tot_dimms; i++) { | 1074 | for (i = 0; i < mci->tot_dimms; i++) { |
1046 | struct dimm_info *dimm = &mci->dimms[i]; | 1075 | struct dimm_info *dimm = &mci->dimms[i]; |
1047 | 1076 | ||
1048 | if (layer0 >= 0 && layer0 != dimm->location[0]) | 1077 | if (top_layer >= 0 && top_layer != dimm->location[0]) |
1049 | continue; | 1078 | continue; |
1050 | if (layer1 >= 0 && layer1 != dimm->location[1]) | 1079 | if (mid_layer >= 0 && mid_layer != dimm->location[1]) |
1051 | continue; | 1080 | continue; |
1052 | if (layer2 >= 0 && layer2 != dimm->location[2]) | 1081 | if (low_layer >= 0 && low_layer != dimm->location[2]) |
1053 | continue; | 1082 | continue; |
1054 | 1083 | ||
1055 | /* get the max grain, over the error match range */ | 1084 | /* get the max grain, over the error match range */ |
@@ -1120,11 +1149,22 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1120 | edac_layer_name[mci->layers[i].type], | 1149 | edac_layer_name[mci->layers[i].type], |
1121 | pos[i]); | 1150 | pos[i]); |
1122 | } | 1151 | } |
1152 | if (p > location) | ||
1153 | *(p - 1) = '\0'; | ||
1154 | |||
1155 | /* Report the error via the trace interface */ | ||
1156 | |||
1157 | error_count = 1; /* FIXME: allow change it */ | ||
1158 | grain_bits = fls_long(grain) + 1; | ||
1159 | trace_mc_event(type, msg, label, error_count, | ||
1160 | mci->mc_idx, top_layer, mid_layer, low_layer, | ||
1161 | PAGES_TO_MiB(page_frame_number) | offset_in_page, | ||
1162 | grain_bits, syndrome, other_detail); | ||
1123 | 1163 | ||
1124 | /* Memory type dependent details about the error */ | 1164 | /* Memory type dependent details about the error */ |
1125 | if (type == HW_EVENT_ERR_CORRECTED) { | 1165 | if (type == HW_EVENT_ERR_CORRECTED) { |
1126 | snprintf(detail, sizeof(detail), | 1166 | snprintf(detail, sizeof(detail), |
1127 | "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx", | 1167 | "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", |
1128 | page_frame_number, offset_in_page, | 1168 | page_frame_number, offset_in_page, |
1129 | grain, syndrome); | 1169 | grain, syndrome); |
1130 | edac_ce_error(mci, pos, msg, location, label, detail, | 1170 | edac_ce_error(mci, pos, msg, location, label, detail, |
@@ -1132,7 +1172,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, | |||
1132 | page_frame_number, offset_in_page, grain); | 1172 | page_frame_number, offset_in_page, grain); |
1133 | } else { | 1173 | } else { |
1134 | snprintf(detail, sizeof(detail), | 1174 | snprintf(detail, sizeof(detail), |
1135 | "page:0x%lx offset:0x%lx grain:%d", | 1175 | "page:0x%lx offset:0x%lx grain:%ld", |
1136 | page_frame_number, offset_in_page, grain); | 1176 | page_frame_number, offset_in_page, grain); |
1137 | 1177 | ||
1138 | edac_ue_error(mci, pos, msg, location, label, detail, | 1178 | edac_ue_error(mci, pos, msg, location, label, detail, |