diff options
author | Mauro Carvalho Chehab <mchehab@redhat.com> | 2010-08-26 23:16:12 -0400 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2010-08-30 13:56:48 -0400 |
commit | 5de6e07ed75ee29a302f50e149339ca747131121 (patch) | |
tree | 07f40e95d30d340a0fa39f722196ffd34d8ccc3a /drivers/edac | |
parent | 3e57eef64c53d4a45790fb7bb60a4ee6bf2bad30 (diff) |
i7300_edac: Add error detection code for global errors
There's no mention at the datasheet about how to enable global error
reporting. So, I'm assuming that those errors are always enabled.
Maybe I'm plain wrong about that ;)
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/i7300_edac.c | 568 |
1 files changed, 77 insertions, 491 deletions
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 3e60dbaa0e7e..e617b4f79dd8 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c | |||
@@ -158,8 +158,17 @@ static const char *numcol_toString[] = { | |||
158 | * Device 16.2: Global Error Registers | 158 | * Device 16.2: Global Error Registers |
159 | */ | 159 | */ |
160 | 160 | ||
161 | #define FERR_GLOBAL_HI 0x48 | ||
162 | static const char *ferr_global_hi_name[] = { | ||
163 | [3] = "FSB 3 Fatal Error", | ||
164 | [2] = "FSB 2 Fatal Error", | ||
165 | [1] = "FSB 1 Fatal Error", | ||
166 | [0] = "FSB 0 Fatal Error", | ||
167 | }; | ||
168 | #define ferr_global_hi_is_fatal(errno) 1 | ||
169 | |||
161 | #define FERR_GLOBAL_LO 0x40 | 170 | #define FERR_GLOBAL_LO 0x40 |
162 | static const char *ferr_global_name[] = { | 171 | static const char *ferr_global_lo_name[] = { |
163 | [31] = "Internal MCH Fatal Error", | 172 | [31] = "Internal MCH Fatal Error", |
164 | [30] = "Intel QuickData Technology Device Fatal Error", | 173 | [30] = "Intel QuickData Technology Device Fatal Error", |
165 | [29] = "FSB1 Fatal Error", | 174 | [29] = "FSB1 Fatal Error", |
@@ -193,190 +202,7 @@ static const char *ferr_global_name[] = { | |||
193 | [1] = "PCI Express Device 1 Non-Fatal Error", | 202 | [1] = "PCI Express Device 1 Non-Fatal Error", |
194 | [0] = "ESI Non-Fatal Error", | 203 | [0] = "ESI Non-Fatal Error", |
195 | }; | 204 | }; |
196 | 205 | #define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1) | |
197 | #define NERR_GLOBAL 0x44 | ||
198 | static const char *nerr_global_name[] = { | ||
199 | [31] = "Internal MCH Fatal Error", | ||
200 | [30] = "Intel QuickData Technology Device Fatal Error", | ||
201 | [29] = "FSB1 Fatal Error", | ||
202 | [28] = "FSB0 Fatal Error", | ||
203 | [27] = "FSB2 Fatal Error", | ||
204 | [26] = "FSB3 Fatal Error", | ||
205 | [25] = "Reserved", | ||
206 | [24] = "FBD Channel 0,1,2 or 3 Fatal Error", | ||
207 | [23] = "PCI Express Device 7 Fatal Error", | ||
208 | [22] = "PCI Express Device 6 Fatal Error", | ||
209 | [21] = "PCI Express Device 5 Fatal Error", | ||
210 | [20] = "PCI Express Device 4 Fatal Error", | ||
211 | [19] = "PCI Express Device 3 Fatal Error", | ||
212 | [18] = "PCI Express Device 2 Fatal Error", | ||
213 | [17] = "PCI Express Device 1 Fatal Error", | ||
214 | [16] = "ESI Fatal Error", | ||
215 | [15] = "Internal MCH Non-Fatal Error", | ||
216 | [14] = "Intel QuickData Technology Device Non Fatal Error", | ||
217 | [13] = "FSB1 Non-Fatal Error", | ||
218 | [12] = "FSB0 Non-Fatal Error", | ||
219 | [11] = "FSB2 Non-Fatal Error", | ||
220 | [10] = "FSB3 Non-Fatal Error", | ||
221 | [9] = "Reserved", | ||
222 | [8] = "FBD Channel 0,1, 2 or 3 Non-Fatal Error", | ||
223 | [7] = "PCI Express Device 7 Non-Fatal Error", | ||
224 | [6] = "PCI Express Device 6 Non-Fatal Error", | ||
225 | [5] = "PCI Express Device 5 Non-Fatal Error", | ||
226 | [4] = "PCI Express Device 4 Non-Fatal Error", | ||
227 | [3] = "PCI Express Device 3 Non-Fatal Error", | ||
228 | [2] = "PCI Express Device 2 Non-Fatal Error", | ||
229 | [1] = "PCI Express Device 1 Non-Fatal Error", | ||
230 | [0] = "ESI Non-Fatal Error", | ||
231 | }; | ||
232 | |||
233 | #if 0 | ||
234 | |||
235 | /* | ||
236 | * Error indicator bits and masks | ||
237 | * Error masks are according with Table 5-17 of i7300 datasheet | ||
238 | */ | ||
239 | |||
240 | enum error_mask { | ||
241 | EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ | ||
242 | EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ | ||
243 | EMASK_M3 = 1<<2, /* Reserved */ | ||
244 | EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ | ||
245 | EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ | ||
246 | EMASK_M6 = 1<<5, /* Unsupported on i7300 */ | ||
247 | EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ | ||
248 | EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ | ||
249 | EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ | ||
250 | EMASK_M10 = 1<<9, /* Unsupported on i7300 */ | ||
251 | EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ | ||
252 | EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ | ||
253 | EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ | ||
254 | EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ | ||
255 | EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ | ||
256 | EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ | ||
257 | EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ | ||
258 | EMASK_M18 = 1<<17, /* Unsupported on i7300 */ | ||
259 | EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ | ||
260 | EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ | ||
261 | EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ | ||
262 | EMASK_M22 = 1<<21, /* SPD protocol Error */ | ||
263 | EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ | ||
264 | EMASK_M24 = 1<<23, /* Refresh error */ | ||
265 | EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ | ||
266 | EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ | ||
267 | EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ | ||
268 | EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ | ||
269 | EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ | ||
270 | }; | ||
271 | |||
272 | /* | ||
273 | * Names to translate bit error into something useful | ||
274 | */ | ||
275 | static const char *error_name[] = { | ||
276 | [0] = "Memory Write error on non-redundant retry", | ||
277 | [1] = "Memory or FB-DIMM configuration CRC read error", | ||
278 | /* Reserved */ | ||
279 | [3] = "Uncorrectable Data ECC on Replay", | ||
280 | [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", | ||
281 | /* M6 Unsupported on i7300 */ | ||
282 | [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", | ||
283 | [7] = "Aliased Uncorrectable Patrol Data ECC", | ||
284 | [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", | ||
285 | /* M10 Unsupported on i7300 */ | ||
286 | [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", | ||
287 | [11] = "Non-Aliased Uncorrectable Patrol Data ECC", | ||
288 | [12] = "Memory Write error on first attempt", | ||
289 | [13] = "FB-DIMM Configuration Write error on first attempt", | ||
290 | [14] = "Memory or FB-DIMM configuration CRC read error", | ||
291 | [15] = "Channel Failed-Over Occurred", | ||
292 | [16] = "Correctable Non-Mirrored Demand Data ECC", | ||
293 | /* M18 Unsupported on i7300 */ | ||
294 | [18] = "Correctable Resilver- or Spare-Copy Data ECC", | ||
295 | [19] = "Correctable Patrol Data ECC", | ||
296 | [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", | ||
297 | [21] = "SPD protocol Error", | ||
298 | [22] = "Non-Redundant Fast Reset Timeout", | ||
299 | [23] = "Refresh error", | ||
300 | [24] = "Memory Write error on redundant retry", | ||
301 | [25] = "Redundant Fast Reset Timeout", | ||
302 | [26] = "Correctable Counter Threshold Exceeded", | ||
303 | [27] = "DIMM-Spare Copy Completed", | ||
304 | [28] = "DIMM-Isolation Completed", | ||
305 | }; | ||
306 | |||
307 | /* Fatal errors */ | ||
308 | #define ERROR_FAT_MASK (EMASK_M1 | \ | ||
309 | EMASK_M2 | \ | ||
310 | EMASK_M23) | ||
311 | |||
312 | /* Correctable errors */ | ||
313 | #define ERROR_NF_CORRECTABLE (EMASK_M27 | \ | ||
314 | EMASK_M20 | \ | ||
315 | EMASK_M19 | \ | ||
316 | EMASK_M18 | \ | ||
317 | EMASK_M17 | \ | ||
318 | EMASK_M16) | ||
319 | #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ | ||
320 | EMASK_M28) | ||
321 | #define ERROR_NF_SPD_PROTOCOL (EMASK_M22) | ||
322 | #define ERROR_NF_NORTH_CRC (EMASK_M21) | ||
323 | |||
324 | /* Recoverable errors */ | ||
325 | #define ERROR_NF_RECOVERABLE (EMASK_M26 | \ | ||
326 | EMASK_M25 | \ | ||
327 | EMASK_M24 | \ | ||
328 | EMASK_M15 | \ | ||
329 | EMASK_M14 | \ | ||
330 | EMASK_M13 | \ | ||
331 | EMASK_M12 | \ | ||
332 | EMASK_M11 | \ | ||
333 | EMASK_M9 | \ | ||
334 | EMASK_M8 | \ | ||
335 | EMASK_M7 | \ | ||
336 | EMASK_M5) | ||
337 | |||
338 | /* uncorrectable errors */ | ||
339 | #define ERROR_NF_UNCORRECTABLE (EMASK_M4) | ||
340 | |||
341 | /* mask to all non-fatal errors */ | ||
342 | #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ | ||
343 | ERROR_NF_UNCORRECTABLE | \ | ||
344 | ERROR_NF_RECOVERABLE | \ | ||
345 | ERROR_NF_DIMM_SPARE | \ | ||
346 | ERROR_NF_SPD_PROTOCOL | \ | ||
347 | ERROR_NF_NORTH_CRC) | ||
348 | |||
349 | /* | ||
350 | * Define error masks for the several registers | ||
351 | */ | ||
352 | |||
353 | /* Enable all fatal and non fatal errors */ | ||
354 | #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) | ||
355 | |||
356 | /* mask for fatal error registers */ | ||
357 | #define FERR_FAT_MASK ERROR_FAT_MASK | ||
358 | |||
359 | /* masks for non-fatal error register */ | ||
360 | static inline int to_nf_mask(unsigned int mask) | ||
361 | { | ||
362 | return (mask & EMASK_M29) | (mask >> 3); | ||
363 | }; | ||
364 | |||
365 | static inline int from_nf_ferr(unsigned int mask) | ||
366 | { | ||
367 | return (mask & EMASK_M29) | /* Bit 28 */ | ||
368 | (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ | ||
369 | }; | ||
370 | |||
371 | #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) | ||
372 | #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) | ||
373 | #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) | ||
374 | #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) | ||
375 | #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) | ||
376 | #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) | ||
377 | #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) | ||
378 | |||
379 | #endif | ||
380 | 206 | ||
381 | /* Device name and register DID (Device ID) */ | 207 | /* Device name and register DID (Device ID) */ |
382 | struct i7300_dev_info { | 208 | struct i7300_dev_info { |
@@ -416,85 +242,28 @@ struct i7300_pvt { | |||
416 | struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; | 242 | struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; |
417 | }; | 243 | }; |
418 | 244 | ||
419 | #if 0 | ||
420 | /* I7300 MCH error information retrieved from Hardware */ | ||
421 | struct i7300_error_info { | ||
422 | /* These registers are always read from the MC */ | ||
423 | u32 ferr_fat_fbd; /* First Errors Fatal */ | ||
424 | u32 nerr_fat_fbd; /* Next Errors Fatal */ | ||
425 | u32 ferr_nf_fbd; /* First Errors Non-Fatal */ | ||
426 | u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ | ||
427 | |||
428 | /* These registers are input ONLY if there was a Recoverable Error */ | ||
429 | u32 redmemb; /* Recoverable Mem Data Error log B */ | ||
430 | u16 recmema; /* Recoverable Mem Error log A */ | ||
431 | u32 recmemb; /* Recoverable Mem Error log B */ | ||
432 | |||
433 | /* These registers are input ONLY if there was a Non-Rec Error */ | ||
434 | u16 nrecmema; /* Non-Recoverable Mem log A */ | ||
435 | u16 nrecmemb; /* Non-Recoverable Mem log B */ | ||
436 | |||
437 | }; | ||
438 | #endif | ||
439 | |||
440 | /* FIXME: Why do we need to have this static? */ | 245 | /* FIXME: Why do we need to have this static? */ |
441 | static struct edac_pci_ctl_info *i7300_pci; | 246 | static struct edac_pci_ctl_info *i7300_pci; |
442 | 247 | ||
248 | /******************************************** | ||
249 | * i7300 Functions related to error detection | ||
250 | ********************************************/ | ||
443 | 251 | ||
444 | #if 0 | 252 | struct i7300_error_info { |
445 | /* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and | 253 | int dummy; /* FIXME */ |
446 | 5400 better to use an inline function than a macro in this case */ | 254 | }; |
447 | static inline int nrec_bank(struct i7300_error_info *info) | 255 | |
448 | { | 256 | const char *get_err_from_table(const char *table[], int size, int pos) |
449 | return ((info->nrecmema) >> 12) & 0x7; | ||
450 | } | ||
451 | static inline int nrec_rank(struct i7300_error_info *info) | ||
452 | { | ||
453 | return ((info->nrecmema) >> 8) & 0xf; | ||
454 | } | ||
455 | static inline int nrec_buf_id(struct i7300_error_info *info) | ||
456 | { | ||
457 | return ((info->nrecmema)) & 0xff; | ||
458 | } | ||
459 | static inline int nrec_rdwr(struct i7300_error_info *info) | ||
460 | { | ||
461 | return (info->nrecmemb) >> 31; | ||
462 | } | ||
463 | /* This applies to both NREC and REC string so it can be used with nrec_rdwr | ||
464 | and rec_rdwr */ | ||
465 | static inline const char *rdwr_str(int rdwr) | ||
466 | { | ||
467 | return rdwr ? "Write" : "Read"; | ||
468 | } | ||
469 | static inline int nrec_cas(struct i7300_error_info *info) | ||
470 | { | ||
471 | return ((info->nrecmemb) >> 16) & 0x1fff; | ||
472 | } | ||
473 | static inline int nrec_ras(struct i7300_error_info *info) | ||
474 | { | ||
475 | return (info->nrecmemb) & 0xffff; | ||
476 | } | ||
477 | static inline int rec_bank(struct i7300_error_info *info) | ||
478 | { | ||
479 | return ((info->recmema) >> 12) & 0x7; | ||
480 | } | ||
481 | static inline int rec_rank(struct i7300_error_info *info) | ||
482 | { | ||
483 | return ((info->recmema) >> 8) & 0xf; | ||
484 | } | ||
485 | static inline int rec_rdwr(struct i7300_error_info *info) | ||
486 | { | ||
487 | return (info->recmemb) >> 31; | ||
488 | } | ||
489 | static inline int rec_cas(struct i7300_error_info *info) | ||
490 | { | ||
491 | return ((info->recmemb) >> 16) & 0x1fff; | ||
492 | } | ||
493 | static inline int rec_ras(struct i7300_error_info *info) | ||
494 | { | 257 | { |
495 | return (info->recmemb) & 0xffff; | 258 | if (pos >= size) |
259 | return "Reserved"; | ||
260 | |||
261 | return table[pos]; | ||
496 | } | 262 | } |
497 | 263 | ||
264 | #define GET_ERR_FROM_TABLE(table, pos) \ | ||
265 | get_err_from_table(table, ARRAY_SIZE(table), pos) | ||
266 | |||
498 | /* | 267 | /* |
499 | * i7300_get_error_info Retrieve the hardware error information from | 268 | * i7300_get_error_info Retrieve the hardware error information from |
500 | * the hardware and cache it in the 'info' | 269 | * the hardware and cache it in the 'info' |
@@ -503,234 +272,63 @@ static inline int rec_ras(struct i7300_error_info *info) | |||
503 | static void i7300_get_error_info(struct mem_ctl_info *mci, | 272 | static void i7300_get_error_info(struct mem_ctl_info *mci, |
504 | struct i7300_error_info *info) | 273 | struct i7300_error_info *info) |
505 | { | 274 | { |
506 | struct i7300_pvt *pvt; | ||
507 | u32 value; | ||
508 | |||
509 | pvt = mci->pvt_info; | ||
510 | |||
511 | /* read in the 1st FATAL error register */ | ||
512 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_FAT_FBD, &value); | ||
513 | |||
514 | /* Mask only the bits that the doc says are valid | ||
515 | */ | ||
516 | value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); | ||
517 | |||
518 | /* If there is an error, then read in the | ||
519 | NEXT FATAL error register and the Memory Error Log Register A | ||
520 | */ | ||
521 | if (value & FERR_FAT_MASK) { | ||
522 | info->ferr_fat_fbd = value; | ||
523 | |||
524 | /* harvest the various error data we need */ | ||
525 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
526 | NERR_FAT_FBD, &info->nerr_fat_fbd); | ||
527 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | ||
528 | NRECMEMA, &info->nrecmema); | ||
529 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | ||
530 | NRECMEMB, &info->nrecmemb); | ||
531 | |||
532 | /* Clear the error bits, by writing them back */ | ||
533 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
534 | FERR_FAT_FBD, value); | ||
535 | } else { | ||
536 | info->ferr_fat_fbd = 0; | ||
537 | info->nerr_fat_fbd = 0; | ||
538 | info->nrecmema = 0; | ||
539 | info->nrecmemb = 0; | ||
540 | } | ||
541 | |||
542 | /* read in the 1st NON-FATAL error register */ | ||
543 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_NF_FBD, &value); | ||
544 | |||
545 | /* If there is an error, then read in the 1st NON-FATAL error | ||
546 | * register as well */ | ||
547 | if (value & FERR_NF_MASK) { | ||
548 | info->ferr_nf_fbd = value; | ||
549 | |||
550 | /* harvest the various error data we need */ | ||
551 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
552 | NERR_NF_FBD, &info->nerr_nf_fbd); | ||
553 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | ||
554 | RECMEMA, &info->recmema); | ||
555 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
556 | RECMEMB, &info->recmemb); | ||
557 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
558 | REDMEMB, &info->redmemb); | ||
559 | |||
560 | /* Clear the error bits, by writing them back */ | ||
561 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
562 | FERR_NF_FBD, value); | ||
563 | } else { | ||
564 | info->ferr_nf_fbd = 0; | ||
565 | info->nerr_nf_fbd = 0; | ||
566 | info->recmema = 0; | ||
567 | info->recmemb = 0; | ||
568 | info->redmemb = 0; | ||
569 | } | ||
570 | } | 275 | } |
571 | 276 | ||
572 | /* | 277 | /* |
573 | * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, | 278 | * i7300_process_error_global Retrieve the hardware error information from |
574 | * struct i7300_error_info *info, | 279 | * the hardware and cache it in the 'info' |
575 | * int handle_errors); | 280 | * structure |
576 | * | ||
577 | * handle the Intel FATAL and unrecoverable errors, if any | ||
578 | */ | ||
579 | static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, | ||
580 | struct i7300_error_info *info, | ||
581 | unsigned long allErrors) | ||
582 | { | ||
583 | char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; | ||
584 | int branch; | ||
585 | int channel; | ||
586 | int bank; | ||
587 | int buf_id; | ||
588 | int rank; | ||
589 | int rdwr; | ||
590 | int ras, cas; | ||
591 | int errnum; | ||
592 | char *type = NULL; | ||
593 | |||
594 | if (!allErrors) | ||
595 | return; /* if no error, return now */ | ||
596 | |||
597 | if (allErrors & ERROR_FAT_MASK) | ||
598 | type = "FATAL"; | ||
599 | else if (allErrors & FERR_NF_UNCORRECTABLE) | ||
600 | type = "NON-FATAL uncorrected"; | ||
601 | else | ||
602 | type = "NON-FATAL recoverable"; | ||
603 | |||
604 | /* ONLY ONE of the possible error bits will be set, as per the docs */ | ||
605 | |||
606 | branch = extract_fbdchan_indx(info->ferr_fat_fbd); | ||
607 | channel = branch; | ||
608 | |||
609 | /* Use the NON-Recoverable macros to extract data */ | ||
610 | bank = nrec_bank(info); | ||
611 | rank = nrec_rank(info); | ||
612 | buf_id = nrec_buf_id(info); | ||
613 | rdwr = nrec_rdwr(info); | ||
614 | ras = nrec_ras(info); | ||
615 | cas = nrec_cas(info); | ||
616 | |||
617 | debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " | ||
618 | "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", | ||
619 | rank, channel, channel + 1, branch >> 1, bank, | ||
620 | buf_id, rdwr_str(rdwr), ras, cas); | ||
621 | |||
622 | /* Only 1 bit will be on */ | ||
623 | errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); | ||
624 | |||
625 | /* Form out message */ | ||
626 | snprintf(msg, sizeof(msg), | ||
627 | "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " | ||
628 | "RAS=%d CAS=%d %s Err=0x%lx (%s))", | ||
629 | type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, | ||
630 | type, allErrors, error_name[errnum]); | ||
631 | |||
632 | /* Call the helper to output message */ | ||
633 | edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); | ||
634 | } | ||
635 | |||
636 | /* | ||
637 | * i7300_process_fatal_error_info(struct mem_ctl_info *mci, | ||
638 | * struct i7300_error_info *info, | ||
639 | * int handle_errors); | ||
640 | * | ||
641 | * handle the Intel NON-FATAL errors, if any | ||
642 | */ | 281 | */ |
643 | static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, | 282 | static void i7300_process_error_global(struct mem_ctl_info *mci, |
644 | struct i7300_error_info *info) | 283 | struct i7300_error_info *info) |
645 | { | 284 | { |
646 | char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; | 285 | struct i7300_pvt *pvt; |
647 | unsigned long allErrors; | 286 | u32 errnum, value; |
648 | int branch; | 287 | unsigned long errors; |
649 | int channel; | 288 | const char *specific; |
650 | int bank; | 289 | bool is_fatal; |
651 | int rank; | ||
652 | int rdwr; | ||
653 | int ras, cas; | ||
654 | int errnum; | ||
655 | |||
656 | /* mask off the Error bits that are possible */ | ||
657 | allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); | ||
658 | if (!allErrors) | ||
659 | return; /* if no error, return now */ | ||
660 | |||
661 | /* ONLY ONE of the possible error bits will be set, as per the docs */ | ||
662 | |||
663 | if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { | ||
664 | i7300_proccess_non_recoverable_info(mci, info, allErrors); | ||
665 | return; | ||
666 | } | ||
667 | |||
668 | /* Correctable errors */ | ||
669 | if (allErrors & ERROR_NF_CORRECTABLE) { | ||
670 | debugf0("\tCorrected bits= 0x%lx\n", allErrors); | ||
671 | |||
672 | branch = extract_fbdchan_indx(info->ferr_nf_fbd); | ||
673 | |||
674 | channel = 0; | ||
675 | if (REC_ECC_LOCATOR_ODD(info->redmemb)) | ||
676 | channel = 1; | ||
677 | |||
678 | /* Convert channel to be based from zero, instead of | ||
679 | * from branch base of 0 */ | ||
680 | channel += branch; | ||
681 | |||
682 | bank = rec_bank(info); | ||
683 | rank = rec_rank(info); | ||
684 | rdwr = rec_rdwr(info); | ||
685 | ras = rec_ras(info); | ||
686 | cas = rec_cas(info); | ||
687 | |||
688 | /* Only 1 bit will be on */ | ||
689 | errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); | ||
690 | |||
691 | debugf0("\t\tCSROW= %d Channel= %d (Branch %d " | ||
692 | "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", | ||
693 | rank, channel, branch >> 1, bank, | ||
694 | rdwr_str(rdwr), ras, cas); | ||
695 | |||
696 | /* Form out message */ | ||
697 | snprintf(msg, sizeof(msg), | ||
698 | "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " | ||
699 | "RAS=%d CAS=%d, CE Err=0x%lx (%s))", | ||
700 | branch >> 1, bank, rdwr_str(rdwr), ras, cas, | ||
701 | allErrors, error_name[errnum]); | ||
702 | 290 | ||
703 | /* Call the helper to output message */ | 291 | pvt = mci->pvt_info; |
704 | edac_mc_handle_fbd_ce(mci, rank, channel, msg); | ||
705 | 292 | ||
706 | return; | 293 | /* read in the 1st FATAL error register */ |
294 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | ||
295 | FERR_GLOBAL_HI, &value); | ||
296 | if (unlikely(value)) { | ||
297 | errors = value; | ||
298 | errnum = find_first_bit(&errors, | ||
299 | ARRAY_SIZE(ferr_global_hi_name)); | ||
300 | specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); | ||
301 | is_fatal = ferr_global_hi_is_fatal(errnum); | ||
302 | goto error_global; | ||
707 | } | 303 | } |
708 | 304 | ||
709 | /* Miscelaneous errors */ | 305 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, |
710 | errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); | 306 | FERR_GLOBAL_LO, &value); |
711 | 307 | if (unlikely(value)) { | |
712 | branch = extract_fbdchan_indx(info->ferr_nf_fbd); | 308 | errors = value; |
309 | errnum = find_first_bit(&errors, | ||
310 | ARRAY_SIZE(ferr_global_lo_name)); | ||
311 | specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); | ||
312 | is_fatal = ferr_global_lo_is_fatal(errnum); | ||
313 | goto error_global; | ||
314 | } | ||
315 | return; | ||
713 | 316 | ||
714 | i7300_mc_printk(mci, KERN_EMERG, | 317 | error_global: |
715 | "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", | 318 | i7300_mc_printk(mci, KERN_EMERG, "%s misc error: %s\n", |
716 | branch >> 1, allErrors, error_name[errnum]); | 319 | is_fatal ? "Fatal" : "NOT fatal", specific); |
717 | } | 320 | } |
718 | 321 | ||
719 | /* | 322 | /* |
720 | * i7300_process_error_info Process the error info that is | 323 | * i7300_process_error_info Retrieve the hardware error information from |
721 | * in the 'info' structure, previously retrieved from hardware | 324 | * the hardware and cache it in the 'info' |
325 | * structure | ||
722 | */ | 326 | */ |
723 | static void i7300_process_error_info(struct mem_ctl_info *mci, | 327 | static void i7300_process_error_info(struct mem_ctl_info *mci, |
724 | struct i7300_error_info *info) | 328 | struct i7300_error_info *info) |
725 | { u32 allErrors; | 329 | { |
726 | 330 | i7300_process_error_global(mci, info); | |
727 | /* First handle any fatal errors that occurred */ | 331 | }; |
728 | allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); | ||
729 | i7300_proccess_non_recoverable_info(mci, info, allErrors); | ||
730 | |||
731 | /* now handle any non-fatal errors that occurred */ | ||
732 | i7300_process_nonfatal_error_info(mci, info); | ||
733 | } | ||
734 | 332 | ||
735 | /* | 333 | /* |
736 | * i7300_clear_error Retrieve any error from the hardware | 334 | * i7300_clear_error Retrieve any error from the hardware |
@@ -753,6 +351,7 @@ static void i7300_check_error(struct mem_ctl_info *mci) | |||
753 | { | 351 | { |
754 | struct i7300_error_info info; | 352 | struct i7300_error_info info; |
755 | debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); | 353 | debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); |
354 | |||
756 | i7300_get_error_info(mci, &info); | 355 | i7300_get_error_info(mci, &info); |
757 | i7300_process_error_info(mci, &info); | 356 | i7300_process_error_info(mci, &info); |
758 | } | 357 | } |
@@ -763,22 +362,11 @@ static void i7300_check_error(struct mem_ctl_info *mci) | |||
763 | */ | 362 | */ |
764 | static void i7300_enable_error_reporting(struct mem_ctl_info *mci) | 363 | static void i7300_enable_error_reporting(struct mem_ctl_info *mci) |
765 | { | 364 | { |
766 | struct i7300_pvt *pvt; | ||
767 | u32 fbd_error_mask; | ||
768 | |||
769 | pvt = mci->pvt_info; | ||
770 | |||
771 | /* Read the FBD Error Mask Register */ | ||
772 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, | ||
773 | &fbd_error_mask); | ||
774 | |||
775 | /* Enable with a '0' */ | ||
776 | fbd_error_mask &= ~(ENABLE_EMASK_ALL); | ||
777 | |||
778 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, | ||
779 | fbd_error_mask); | ||
780 | } | 365 | } |
781 | #endif | 366 | |
367 | /************************************************ | ||
368 | * i7300 Functions related to memory enumberation | ||
369 | ************************************************/ | ||
782 | 370 | ||
783 | /* | 371 | /* |
784 | * determine_mtr(pvt, csrow, channel) | 372 | * determine_mtr(pvt, csrow, channel) |
@@ -1070,6 +658,10 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) | |||
1070 | return 0; | 658 | return 0; |
1071 | } | 659 | } |
1072 | 660 | ||
661 | /************************************************* | ||
662 | * i7300 Functions related to device probe/release | ||
663 | *************************************************/ | ||
664 | |||
1073 | /* | 665 | /* |
1074 | * i7300_put_devices 'put' all the devices that we have | 666 | * i7300_put_devices 'put' all the devices that we have |
1075 | * reserved via 'get' | 667 | * reserved via 'get' |
@@ -1238,10 +830,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) | |||
1238 | mci->dev_name = pci_name(pdev); | 830 | mci->dev_name = pci_name(pdev); |
1239 | mci->ctl_page_to_phys = NULL; | 831 | mci->ctl_page_to_phys = NULL; |
1240 | 832 | ||
1241 | #if 0 | ||
1242 | /* Set the function pointer to an actual operation function */ | 833 | /* Set the function pointer to an actual operation function */ |
1243 | mci->edac_check = i7300_check_error; | 834 | mci->edac_check = i7300_check_error; |
1244 | #endif | ||
1245 | 835 | ||
1246 | /* initialize the MC control structure 'csrows' table | 836 | /* initialize the MC control structure 'csrows' table |
1247 | * with the mapping and control information */ | 837 | * with the mapping and control information */ |
@@ -1251,10 +841,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) | |||
1251 | "value\n"); | 841 | "value\n"); |
1252 | mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ | 842 | mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ |
1253 | } else { | 843 | } else { |
1254 | #if 0 | ||
1255 | debugf1("MC: Enable error reporting now\n"); | 844 | debugf1("MC: Enable error reporting now\n"); |
1256 | i7300_enable_error_reporting(mci); | 845 | i7300_enable_error_reporting(mci); |
1257 | #endif | ||
1258 | } | 846 | } |
1259 | 847 | ||
1260 | /* add this new MC control structure to EDAC's list of MCs */ | 848 | /* add this new MC control structure to EDAC's list of MCs */ |
@@ -1267,9 +855,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) | |||
1267 | goto fail1; | 855 | goto fail1; |
1268 | } | 856 | } |
1269 | 857 | ||
1270 | #if 0 | ||
1271 | i7300_clear_error(mci); | 858 | i7300_clear_error(mci); |
1272 | #endif | ||
1273 | 859 | ||
1274 | /* allocating generic PCI control info */ | 860 | /* allocating generic PCI control info */ |
1275 | i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); | 861 | i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); |