aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2013-02-21 11:36:45 -0500
committerMauro Carvalho Chehab <mchehab@redhat.com>2013-02-21 11:48:45 -0500
commitc7ef7645544131b0750478d1cf94cdfa945c809d (patch)
tree4442e415b365cd43be1d8a73703f809f78177560
parent80cc7d87d5eb34375f916d282450a0906a8ead60 (diff)
edac: reduce stack pressure by using a pre-allocated buffer
The number of variables at the stack is too big. Reduces the stack usage by using a pre-allocated error buffer. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
-rw-r--r--drivers/edac/edac_mc.c81
-rw-r--r--include/linux/edac.h56
2 files changed, 104 insertions, 33 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 34eb9703ed33..4f18dd755939 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci,
1065 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); 1065 edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1066} 1066}
1067 1067
1068#define OTHER_LABEL " or "
1069 1068
1070/** 1069/**
1071 * edac_mc_handle_error - reports a memory event to userspace 1070 * edac_mc_handle_error - reports a memory event to userspace
@@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1097 const char *msg, 1096 const char *msg,
1098 const char *other_detail) 1097 const char *other_detail)
1099{ 1098{
1100 /* FIXME: too much for stack: move it to some pre-alocated area */ 1099 char detail[80];
1101 char detail[80], location[80];
1102 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1103 char *p; 1100 char *p;
1104 int row = -1, chan = -1; 1101 int row = -1, chan = -1;
1105 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; 1102 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1106 int i; 1103 int i, n_labels = 0;
1107 long grain;
1108 bool enable_per_layer_report = false;
1109 u8 grain_bits; 1104 u8 grain_bits;
1105 struct edac_raw_error_desc *e = &mci->error_desc;
1110 1106
1111 edac_dbg(3, "MC%d\n", mci->mc_idx); 1107 edac_dbg(3, "MC%d\n", mci->mc_idx);
1112 1108
1109 /* Fills the error report buffer */
1110 memset(e, 0, sizeof (*e));
1111 e->error_count = error_count;
1112 e->top_layer = top_layer;
1113 e->mid_layer = mid_layer;
1114 e->low_layer = low_layer;
1115 e->page_frame_number = page_frame_number;
1116 e->offset_in_page = offset_in_page;
1117 e->syndrome = syndrome;
1118 e->msg = msg;
1119 e->other_detail = other_detail;
1120
1113 /* 1121 /*
1114 * Check if the event report is consistent and if the memory 1122 * Check if the event report is consistent and if the memory
1115 * location is known. If it is known, enable_per_layer_report will be 1123 * location is known. If it is known, enable_per_layer_report will be
@@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1132 pos[i] = -1; 1140 pos[i] = -1;
1133 } 1141 }
1134 if (pos[i] >= 0) 1142 if (pos[i] >= 0)
1135 enable_per_layer_report = true; 1143 e->enable_per_layer_report = true;
1136 } 1144 }
1137 1145
1138 /* 1146 /*
@@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1146 * where each memory belongs to a separate channel within the same 1154 * where each memory belongs to a separate channel within the same
1147 * branch. 1155 * branch.
1148 */ 1156 */
1149 grain = 0; 1157 p = e->label;
1150 p = label;
1151 *p = '\0'; 1158 *p = '\0';
1152 1159
1153 for (i = 0; i < mci->tot_dimms; i++) { 1160 for (i = 0; i < mci->tot_dimms; i++) {
@@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1161 continue; 1168 continue;
1162 1169
1163 /* get the max grain, over the error match range */ 1170 /* get the max grain, over the error match range */
1164 if (dimm->grain > grain) 1171 if (dimm->grain > e->grain)
1165 grain = dimm->grain; 1172 e->grain = dimm->grain;
1166 1173
1167 /* 1174 /*
1168 * If the error is memory-controller wide, there's no need to 1175 * If the error is memory-controller wide, there's no need to
@@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1170 * channel/memory controller/... may be affected. 1177 * channel/memory controller/... may be affected.
1171 * Also, don't show errors for empty DIMM slots. 1178 * Also, don't show errors for empty DIMM slots.
1172 */ 1179 */
1173 if (enable_per_layer_report && dimm->nr_pages) { 1180 if (e->enable_per_layer_report && dimm->nr_pages) {
1174 if (p != label) { 1181 if (n_labels >= EDAC_MAX_LABELS) {
1182 e->enable_per_layer_report = false;
1183 break;
1184 }
1185 n_labels++;
1186 if (p != e->label) {
1175 strcpy(p, OTHER_LABEL); 1187 strcpy(p, OTHER_LABEL);
1176 p += strlen(OTHER_LABEL); 1188 p += strlen(OTHER_LABEL);
1177 } 1189 }
@@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1198 } 1210 }
1199 } 1211 }
1200 1212
1201 if (!enable_per_layer_report) { 1213 if (!e->enable_per_layer_report) {
1202 strcpy(label, "any memory"); 1214 strcpy(e->label, "any memory");
1203 } else { 1215 } else {
1204 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); 1216 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1205 if (p == label) 1217 if (p == e->label)
1206 strcpy(label, "unknown memory"); 1218 strcpy(e->label, "unknown memory");
1207 if (type == HW_EVENT_ERR_CORRECTED) { 1219 if (type == HW_EVENT_ERR_CORRECTED) {
1208 if (row >= 0) { 1220 if (row >= 0) {
1209 mci->csrows[row]->ce_count += error_count; 1221 mci->csrows[row]->ce_count += error_count;
@@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1216 } 1228 }
1217 1229
1218 /* Fill the RAM location data */ 1230 /* Fill the RAM location data */
1219 p = location; 1231 p = e->location;
1220 1232
1221 for (i = 0; i < mci->n_layers; i++) { 1233 for (i = 0; i < mci->n_layers; i++) {
1222 if (pos[i] < 0) 1234 if (pos[i] < 0)
@@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1226 edac_layer_name[mci->layers[i].type], 1238 edac_layer_name[mci->layers[i].type],
1227 pos[i]); 1239 pos[i]);
1228 } 1240 }
1229 if (p > location) 1241 if (p > e->location)
1230 *(p - 1) = '\0'; 1242 *(p - 1) = '\0';
1231 1243
1232 /* Report the error via the trace interface */ 1244 /* Report the error via the trace interface */
1233 grain_bits = fls_long(grain) + 1; 1245 grain_bits = fls_long(e->grain) + 1;
1234 trace_mc_event(type, msg, label, error_count, 1246 trace_mc_event(type, e->msg, e->label, e->error_count,
1235 mci->mc_idx, top_layer, mid_layer, low_layer, 1247 mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1236 PAGES_TO_MiB(page_frame_number) | offset_in_page, 1248 PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1237 grain_bits, syndrome, other_detail); 1249 grain_bits, e->syndrome, other_detail);
1238 1250
1239 /* Memory type dependent details about the error */ 1251 /* Memory type dependent details about the error */
1240 if (type == HW_EVENT_ERR_CORRECTED) { 1252 if (type == HW_EVENT_ERR_CORRECTED) {
1241 snprintf(detail, sizeof(detail), 1253 snprintf(detail, sizeof(detail),
1242 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", 1254 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1243 page_frame_number, offset_in_page, 1255 e->page_frame_number, e->offset_in_page,
1244 grain, syndrome); 1256 e->grain, e->syndrome);
1245 edac_ce_error(mci, error_count, pos, msg, location, label, 1257 edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
1246 detail, other_detail, enable_per_layer_report, 1258 e->label, detail, other_detail,
1247 page_frame_number, offset_in_page, grain); 1259 e->enable_per_layer_report,
1260 e->page_frame_number, e->offset_in_page,
1261 e->grain);
1248 } else { 1262 } else {
1249 snprintf(detail, sizeof(detail), 1263 snprintf(detail, sizeof(detail),
1250 "page:0x%lx offset:0x%lx grain:%ld", 1264 "page:0x%lx offset:0x%lx grain:%ld",
1251 page_frame_number, offset_in_page, grain); 1265 page_frame_number, offset_in_page, e->grain);
1252 1266
1253 edac_ue_error(mci, error_count, pos, msg, location, label, 1267 edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
1254 detail, other_detail, enable_per_layer_report); 1268 e->label, detail, other_detail,
1269 e->enable_per_layer_report);
1255 } 1270 }
1256} 1271}
1257EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1272EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index ff18efc754f3..096b7fcdf484 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -47,8 +47,18 @@ static inline void opstate_init(void)
47 return; 47 return;
48} 48}
49 49
50/* Max length of a DIMM label*/
50#define EDAC_MC_LABEL_LEN 31 51#define EDAC_MC_LABEL_LEN 31
51 52
53/* Maximum size of the location string */
54#define LOCATION_SIZE 80
55
56/* Defines the maximum number of labels that can be reported */
57#define EDAC_MAX_LABELS 8
58
59/* String used to join two or more labels */
60#define OTHER_LABEL " or "
61
52/** 62/**
53 * enum dev_type - describe the type of memory DRAM chips used at the stick 63 * enum dev_type - describe the type of memory DRAM chips used at the stick
54 * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it 64 * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it
@@ -553,6 +563,46 @@ struct errcount_attribute_data {
553 int layer0, layer1, layer2; 563 int layer0, layer1, layer2;
554}; 564};
555 565
566/**
567 * edac_raw_error_desc - Raw error report structure
568 * @grain: minimum granularity for an error report, in bytes
569 * @error_count: number of errors of the same type
570 * @top_layer: top layer of the error (layer[0])
571 * @mid_layer: middle layer of the error (layer[1])
572 * @low_layer: low layer of the error (layer[2])
573 * @page_frame_number: page where the error happened
574 * @offset_in_page: page offset
575 * @syndrome: syndrome of the error (or 0 if unknown or if
576 * the syndrome is not applicable)
577 * @msg: error message
578 * @location: location of the error
579 * @label: label of the affected DIMM(s)
580 * @other_detail: other driver-specific detail about the error
581 * @enable_per_layer_report: if false, the error affects all layers
582 * (typically, a memory controller error)
583 */
584struct edac_raw_error_desc {
585 /*
586 * NOTE: everything before grain won't be cleaned by
587 * edac_raw_error_desc_clean()
588 */
589 char location[LOCATION_SIZE];
590 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
591 long grain;
592
593 /* the vars below and grain will be cleaned on every new error report */
594 u16 error_count;
595 int top_layer;
596 int mid_layer;
597 int low_layer;
598 unsigned long page_frame_number;
599 unsigned long offset_in_page;
600 unsigned long syndrome;
601 const char *msg;
602 const char *other_detail;
603 bool enable_per_layer_report;
604};
605
556/* MEMORY controller information structure 606/* MEMORY controller information structure
557 */ 607 */
558struct mem_ctl_info { 608struct mem_ctl_info {
@@ -660,6 +710,12 @@ struct mem_ctl_info {
660 /* work struct for this MC */ 710 /* work struct for this MC */
661 struct delayed_work work; 711 struct delayed_work work;
662 712
713 /*
714 * Used to report an error - by being at the global struct
715 * makes the memory allocated by the EDAC core
716 */
717 struct edac_raw_error_desc error_desc;
718
663 /* the internal state of this controller instance */ 719 /* the internal state of this controller instance */
664 int op_state; 720 int op_state;
665 721