aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/edac/ghes_edac.c195
1 files changed, 180 insertions, 15 deletions
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index b4acc4f2074d..1bde45141073 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -22,6 +22,10 @@ struct ghes_edac_pvt {
22 struct list_head list; 22 struct list_head list;
23 struct ghes *ghes; 23 struct ghes *ghes;
24 struct mem_ctl_info *mci; 24 struct mem_ctl_info *mci;
25
26 /* Buffers for the error handling routine */
27 char other_detail[160];
28 char msg[80];
25}; 29};
26 30
27static LIST_HEAD(ghes_reglist); 31static LIST_HEAD(ghes_reglist);
@@ -186,6 +190,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
186 struct edac_raw_error_desc *e; 190 struct edac_raw_error_desc *e;
187 struct mem_ctl_info *mci; 191 struct mem_ctl_info *mci;
188 struct ghes_edac_pvt *pvt = NULL; 192 struct ghes_edac_pvt *pvt = NULL;
193 char *p;
189 194
190 list_for_each_entry(pvt, &ghes_reglist, list) { 195 list_for_each_entry(pvt, &ghes_reglist, list) {
191 if (ghes == pvt->ghes) 196 if (ghes == pvt->ghes)
@@ -201,15 +206,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
201 /* Cleans the error report buffer */ 206 /* Cleans the error report buffer */
202 memset(e, 0, sizeof (*e)); 207 memset(e, 0, sizeof (*e));
203 e->error_count = 1; 208 e->error_count = 1;
204 e->msg = "APEI"; 209 strcpy(e->label, "unknown label");
205 strcpy(e->label, "unknown"); 210 e->msg = pvt->msg;
206 e->other_detail = ""; 211 e->other_detail = pvt->other_detail;
207 212 e->top_layer = -1;
208 if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { 213 e->mid_layer = -1;
209 e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; 214 e->low_layer = -1;
210 e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; 215 *pvt->other_detail = '\0';
211 e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); 216 *pvt->msg = '\0';
212 }
213 217
214 switch (sev) { 218 switch (sev) {
215 case GHES_SEV_CORRECTED: 219 case GHES_SEV_CORRECTED:
@@ -226,12 +230,173 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
226 type = HW_EVENT_ERR_INFO; 230 type = HW_EVENT_ERR_INFO;
227 } 231 }
228 232
229 sprintf(e->location, 233 edac_dbg(1, "error validation_bits: 0x%08llx\n",
230 "node:%d card:%d module:%d bank:%d device:%d row: %d column:%d bit_pos:%d", 234 (long long)mem_err->validation_bits);
231 mem_err->node, mem_err->card, mem_err->module, 235
232 mem_err->bank, mem_err->device, mem_err->row, mem_err->column, 236 /* Error type, mapped on e->msg */
233 mem_err->bit_pos); 237 if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
234 edac_dbg(3, "error at location %s\n", e->location); 238 p = pvt->msg;
239 switch (mem_err->error_type) {
240 case 0:
241 p += sprintf(p, "Unknown");
242 break;
243 case 1:
244 p += sprintf(p, "No error");
245 break;
246 case 2:
247 p += sprintf(p, "Single-bit ECC");
248 break;
249 case 3:
250 p += sprintf(p, "Multi-bit ECC");
251 break;
252 case 4:
253 p += sprintf(p, "Single-symbol ChipKill ECC");
254 break;
255 case 5:
256 p += sprintf(p, "Multi-symbol ChipKill ECC");
257 break;
258 case 6:
259 p += sprintf(p, "Master abort");
260 break;
261 case 7:
262 p += sprintf(p, "Target abort");
263 break;
264 case 8:
265 p += sprintf(p, "Parity Error");
266 break;
267 case 9:
268 p += sprintf(p, "Watchdog timeout");
269 break;
270 case 10:
271 p += sprintf(p, "Invalid address");
272 break;
273 case 11:
274 p += sprintf(p, "Mirror Broken");
275 break;
276 case 12:
277 p += sprintf(p, "Memory Sparing");
278 break;
279 case 13:
280 p += sprintf(p, "Scrub corrected error");
281 break;
282 case 14:
283 p += sprintf(p, "Scrub uncorrected error");
284 break;
285 case 15:
286 p += sprintf(p, "Physical Memory Map-out event");
287 break;
288 default:
289 p += sprintf(p, "reserved error (%d)",
290 mem_err->error_type);
291 }
292 } else {
293 strcpy(pvt->msg, "unknown error");
294 }
295
296 /* Error address */
297 if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
298 e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
299 e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
300 }
301
302 /* Error grain */
303 if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
304 e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
305 }
306
307 /* Memory error location, mapped on e->location */
308 p = e->location;
309 if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
310 p += sprintf(p, "node:%d ", mem_err->node);
311 if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
312 p += sprintf(p, "card:%d ", mem_err->card);
313 if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
314 p += sprintf(p, "module:%d ", mem_err->module);
315 if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
316 p += sprintf(p, "bank:%d ", mem_err->bank);
317 if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
318 p += sprintf(p, "row:%d ", mem_err->row);
319 if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
320 p += sprintf(p, "col:%d ", mem_err->column);
321 if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
322 p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
323 if (p > e->location)
324 *(p - 1) = '\0';
325
326 /* All other fields are mapped on e->other_detail */
327 p = pvt->other_detail;
328 if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
329 u64 status = mem_err->error_status;
330
331 p += sprintf(p, "status(0x%016llx): ", (long long)status);
332 switch ((status >> 8) & 0xff) {
333 case 1:
334 p += sprintf(p, "Error detected internal to the component ");
335 break;
336 case 16:
337 p += sprintf(p, "Error detected in the bus ");
338 break;
339 case 4:
340 p += sprintf(p, "Storage error in DRAM memory ");
341 break;
342 case 5:
343 p += sprintf(p, "Storage error in TLB ");
344 break;
345 case 6:
346 p += sprintf(p, "Storage error in cache ");
347 break;
348 case 7:
349 p += sprintf(p, "Error in one or more functional units ");
350 break;
351 case 8:
352 p += sprintf(p, "component failed self test ");
353 break;
354 case 9:
355 p += sprintf(p, "Overflow or undervalue of internal queue ");
356 break;
357 case 17:
358 p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
359 break;
360 case 18:
361 p += sprintf(p, "Improper access error ");
362 break;
363 case 19:
364 p += sprintf(p, "Access to a memory address which is not mapped to any component ");
365 break;
366 case 20:
367 p += sprintf(p, "Loss of Lockstep ");
368 break;
369 case 21:
370 p += sprintf(p, "Response not associated with a request ");
371 break;
372 case 22:
373 p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
374 break;
375 case 23:
376 p += sprintf(p, "Detection of a PATH_ERROR ");
377 break;
378 case 25:
379 p += sprintf(p, "Bus operation timeout ");
380 break;
381 case 26:
382 p += sprintf(p, "A read was issued to data that has been poisoned ");
383 break;
384 default:
385 p += sprintf(p, "reserved ");
386 break;
387 }
388 }
389 if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
390 p += sprintf(p, "requestorID: 0x%016llx ",
391 (long long)mem_err->requestor_id);
392 if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
393 p += sprintf(p, "responderID: 0x%016llx ",
394 (long long)mem_err->responder_id);
395 if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
396 p += sprintf(p, "targetID: 0x%016llx ",
397 (long long)mem_err->responder_id);
398 if (p > pvt->other_detail)
399 *(p - 1) = '\0';
235 400
236 edac_raw_mc_handle_error(type, mci, e); 401 edac_raw_mc_handle_error(type, mci, e);
237} 402}