diff options
-rw-r--r-- | drivers/edac/i7300_edac.c | 568 |
1 files changed, 77 insertions, 491 deletions
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 3e60dbaa0e7e..e617b4f79dd8 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c | |||
@@ -158,8 +158,17 @@ static const char *numcol_toString[] = { | |||
158 | * Device 16.2: Global Error Registers | 158 | * Device 16.2: Global Error Registers |
159 | */ | 159 | */ |
160 | 160 | ||
161 | #define FERR_GLOBAL_HI 0x48 | ||
162 | static const char *ferr_global_hi_name[] = { | ||
163 | [3] = "FSB 3 Fatal Error", | ||
164 | [2] = "FSB 2 Fatal Error", | ||
165 | [1] = "FSB 1 Fatal Error", | ||
166 | [0] = "FSB 0 Fatal Error", | ||
167 | }; | ||
168 | #define ferr_global_hi_is_fatal(errno) 1 | ||
169 | |||
161 | #define FERR_GLOBAL_LO 0x40 | 170 | #define FERR_GLOBAL_LO 0x40 |
162 | static const char *ferr_global_name[] = { | 171 | static const char *ferr_global_lo_name[] = { |
163 | [31] = "Internal MCH Fatal Error", | 172 | [31] = "Internal MCH Fatal Error", |
164 | [30] = "Intel QuickData Technology Device Fatal Error", | 173 | [30] = "Intel QuickData Technology Device Fatal Error", |
165 | [29] = "FSB1 Fatal Error", | 174 | [29] = "FSB1 Fatal Error", |
@@ -193,190 +202,7 @@ static const char *ferr_global_name[] = { | |||
193 | [1] = "PCI Express Device 1 Non-Fatal Error", | 202 | [1] = "PCI Express Device 1 Non-Fatal Error", |
194 | [0] = "ESI Non-Fatal Error", | 203 | [0] = "ESI Non-Fatal Error", |
195 | }; | 204 | }; |
196 | 205 | #define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1) | |
197 | #define NERR_GLOBAL 0x44 | ||
198 | static const char *nerr_global_name[] = { | ||
199 | [31] = "Internal MCH Fatal Error", | ||
200 | [30] = "Intel QuickData Technology Device Fatal Error", | ||
201 | [29] = "FSB1 Fatal Error", | ||
202 | [28] = "FSB0 Fatal Error", | ||
203 | [27] = "FSB2 Fatal Error", | ||
204 | [26] = "FSB3 Fatal Error", | ||
205 | [25] = "Reserved", | ||
206 | [24] = "FBD Channel 0,1,2 or 3 Fatal Error", | ||
207 | [23] = "PCI Express Device 7 Fatal Error", | ||
208 | [22] = "PCI Express Device 6 Fatal Error", | ||
209 | [21] = "PCI Express Device 5 Fatal Error", | ||
210 | [20] = "PCI Express Device 4 Fatal Error", | ||
211 | [19] = "PCI Express Device 3 Fatal Error", | ||
212 | [18] = "PCI Express Device 2 Fatal Error", | ||
213 | [17] = "PCI Express Device 1 Fatal Error", | ||
214 | [16] = "ESI Fatal Error", | ||
215 | [15] = "Internal MCH Non-Fatal Error", | ||
216 | [14] = "Intel QuickData Technology Device Non Fatal Error", | ||
217 | [13] = "FSB1 Non-Fatal Error", | ||
218 | [12] = "FSB0 Non-Fatal Error", | ||
219 | [11] = "FSB2 Non-Fatal Error", | ||
220 | [10] = "FSB3 Non-Fatal Error", | ||
221 | [9] = "Reserved", | ||
222 | [8] = "FBD Channel 0,1, 2 or 3 Non-Fatal Error", | ||
223 | [7] = "PCI Express Device 7 Non-Fatal Error", | ||
224 | [6] = "PCI Express Device 6 Non-Fatal Error", | ||
225 | [5] = "PCI Express Device 5 Non-Fatal Error", | ||
226 | [4] = "PCI Express Device 4 Non-Fatal Error", | ||
227 | [3] = "PCI Express Device 3 Non-Fatal Error", | ||
228 | [2] = "PCI Express Device 2 Non-Fatal Error", | ||
229 | [1] = "PCI Express Device 1 Non-Fatal Error", | ||
230 | [0] = "ESI Non-Fatal Error", | ||
231 | }; | ||
232 | |||
233 | #if 0 | ||
234 | |||
235 | /* | ||
236 | * Error indicator bits and masks | ||
237 | * Error masks are according with Table 5-17 of i7300 datasheet | ||
238 | */ | ||
239 | |||
240 | enum error_mask { | ||
241 | EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ | ||
242 | EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ | ||
243 | EMASK_M3 = 1<<2, /* Reserved */ | ||
244 | EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ | ||
245 | EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ | ||
246 | EMASK_M6 = 1<<5, /* Unsupported on i7300 */ | ||
247 | EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ | ||
248 | EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ | ||
249 | EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ | ||
250 | EMASK_M10 = 1<<9, /* Unsupported on i7300 */ | ||
251 | EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ | ||
252 | EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ | ||
253 | EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ | ||
254 | EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ | ||
255 | EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ | ||
256 | EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ | ||
257 | EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ | ||
258 | EMASK_M18 = 1<<17, /* Unsupported on i7300 */ | ||
259 | EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ | ||
260 | EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ | ||
261 | EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ | ||
262 | EMASK_M22 = 1<<21, /* SPD protocol Error */ | ||
263 | EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ | ||
264 | EMASK_M24 = 1<<23, /* Refresh error */ | ||
265 | EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ | ||
266 | EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ | ||
267 | EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ | ||
268 | EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ | ||
269 | EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ | ||
270 | }; | ||
271 | |||
272 | /* | ||
273 | * Names to translate bit error into something useful | ||
274 | */ | ||
275 | static const char *error_name[] = { | ||
276 | [0] = "Memory Write error on non-redundant retry", | ||
277 | [1] = "Memory or FB-DIMM configuration CRC read error", | ||
278 | /* Reserved */ | ||
279 | [3] = "Uncorrectable Data ECC on Replay", | ||
280 | [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", | ||
281 | /* M6 Unsupported on i7300 */ | ||
282 | [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", | ||
283 | [7] = "Aliased Uncorrectable Patrol Data ECC", | ||
284 | [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", | ||
285 | /* M10 Unsupported on i7300 */ | ||
286 | [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", | ||
287 | [11] = "Non-Aliased Uncorrectable Patrol Data ECC", | ||
288 | [12] = "Memory Write error on first attempt", | ||
289 | [13] = "FB-DIMM Configuration Write error on first attempt", | ||
290 | [14] = "Memory or FB-DIMM configuration CRC read error", | ||
291 | [15] = "Channel Failed-Over Occurred", | ||
292 | [16] = "Correctable Non-Mirrored Demand Data ECC", | ||
293 | /* M18 Unsupported on i7300 */ | ||
294 | [18] = "Correctable Resilver- or Spare-Copy Data ECC", | ||
295 | [19] = "Correctable Patrol Data ECC", | ||
296 | [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", | ||
297 | [21] = "SPD protocol Error", | ||
298 | [22] = "Non-Redundant Fast Reset Timeout", | ||
299 | [23] = "Refresh error", | ||
300 | [24] = "Memory Write error on redundant retry", | ||
301 | [25] = "Redundant Fast Reset Timeout", | ||
302 | [26] = "Correctable Counter Threshold Exceeded", | ||
303 | [27] = "DIMM-Spare Copy Completed", | ||
304 | [28] = "DIMM-Isolation Completed", | ||
305 | }; | ||
306 | |||
307 | /* Fatal errors */ | ||
308 | #define ERROR_FAT_MASK (EMASK_M1 | \ | ||
309 | EMASK_M2 | \ | ||
310 | EMASK_M23) | ||
311 | |||
312 | /* Correctable errors */ | ||
313 | #define ERROR_NF_CORRECTABLE (EMASK_M27 | \ | ||
314 | EMASK_M20 | \ | ||
315 | EMASK_M19 | \ | ||
316 | EMASK_M18 | \ | ||
317 | EMASK_M17 | \ | ||
318 | EMASK_M16) | ||
319 | #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ | ||
320 | EMASK_M28) | ||
321 | #define ERROR_NF_SPD_PROTOCOL (EMASK_M22) | ||
322 | #define ERROR_NF_NORTH_CRC (EMASK_M21) | ||
323 | |||
324 | /* Recoverable errors */ | ||
325 | #define ERROR_NF_RECOVERABLE (EMASK_M26 | \ | ||
326 | EMASK_M25 | \ | ||
327 | EMASK_M24 | \ | ||
328 | EMASK_M15 | \ | ||
329 | EMASK_M14 | \ | ||
330 | EMASK_M13 | \ | ||
331 | EMASK_M12 | \ | ||
332 | EMASK_M11 | \ | ||
333 | EMASK_M9 | \ | ||
334 | EMASK_M8 | \ | ||
335 | EMASK_M7 | \ | ||
336 | EMASK_M5) | ||
337 | |||
338 | /* uncorrectable errors */ | ||
339 | #define ERROR_NF_UNCORRECTABLE (EMASK_M4) | ||
340 | |||
341 | /* mask to all non-fatal errors */ | ||
342 | #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ | ||
343 | ERROR_NF_UNCORRECTABLE | \ | ||
344 | ERROR_NF_RECOVERABLE | \ | ||
345 | ERROR_NF_DIMM_SPARE | \ | ||
346 | ERROR_NF_SPD_PROTOCOL | \ | ||
347 | ERROR_NF_NORTH_CRC) | ||
348 | |||
349 | /* | ||
350 | * Define error masks for the several registers | ||
351 | */ | ||
352 | |||
353 | /* Enable all fatal and non fatal errors */ | ||
354 | #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) | ||
355 | |||
356 | /* mask for fatal error registers */ | ||
357 | #define FERR_FAT_MASK ERROR_FAT_MASK | ||
358 | |||
359 | /* masks for non-fatal error register */ | ||
360 | static inline int to_nf_mask(unsigned int mask) | ||
361 | { | ||
362 | return (mask & EMASK_M29) | (mask >> 3); | ||
363 | }; | ||
364 | |||
365 | static inline int from_nf_ferr(unsigned int mask) | ||
366 | { | ||
367 | return (mask & EMASK_M29) | /* Bit 28 */ | ||
368 | (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ | ||
369 | }; | ||
370 | |||
371 | #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) | ||
372 | #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) | ||
373 | #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) | ||
374 | #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) | ||
375 | #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) | ||
376 | #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) | ||
377 | #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) | ||
378 | |||
379 | #endif | ||
380 | 206 | ||
381 | /* Device name and register DID (Device ID) */ | 207 | /* Device name and register DID (Device ID) */ |
382 | struct i7300_dev_info { | 208 | struct i7300_dev_info { |
@@ -416,85 +242,28 @@ struct i7300_pvt { | |||
416 | struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; | 242 | struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; |
417 | }; | 243 | }; |
418 | 244 | ||
419 | #if 0 | ||
420 | /* I7300 MCH error information retrieved from Hardware */ | ||
421 | struct i7300_error_info { | ||
422 | /* These registers are always read from the MC */ | ||
423 | u32 ferr_fat_fbd; /* First Errors Fatal */ | ||
424 | u32 nerr_fat_fbd; /* Next Errors Fatal */ | ||
425 | u32 ferr_nf_fbd; /* First Errors Non-Fatal */ | ||
426 | u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ | ||
427 | |||
428 | /* These registers are input ONLY if there was a Recoverable Error */ | ||
429 | u32 redmemb; /* Recoverable Mem Data Error log B */ | ||
430 | u16 recmema; /* Recoverable Mem Error log A */ | ||
431 | u32 recmemb; /* Recoverable Mem Error log B */ | ||
432 | |||
433 | /* These registers are input ONLY if there was a Non-Rec Error */ | ||
434 | u16 nrecmema; /* Non-Recoverable Mem log A */ | ||
435 | u16 nrecmemb; /* Non-Recoverable Mem log B */ | ||
436 | |||
437 | }; | ||
438 | #endif | ||
439 | |||
440 | /* FIXME: Why do we need to have this static? */ | 245 | /* FIXME: Why do we need to have this static? */ |
441 | static struct edac_pci_ctl_info *i7300_pci; | 246 | static struct edac_pci_ctl_info *i7300_pci; |
442 | 247 | ||
248 | /******************************************** | ||
249 | * i7300 Functions related to error detection | ||
250 | ********************************************/ | ||
443 | 251 | ||
444 | #if 0 | 252 | struct i7300_error_info { |
445 | /* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and | 253 | int dummy; /* FIXME */ |
446 | 5400 better to use an inline function than a macro in this case */ | 254 | }; |
447 | static inline int nrec_bank(struct i7300_error_info *info) | 255 | |
448 | { | 256 | const char *get_err_from_table(const char *table[], int size, int pos) |
449 | return ((info->nrecmema) >> 12) & 0x7; | ||
450 | } | ||
451 | static inline int nrec_rank(struct i7300_error_info *info) | ||
452 | { | ||
453 | return ((info->nrecmema) >> 8) & 0xf; | ||
454 | } | ||
455 | static inline int nrec_buf_id(struct i7300_error_info *info) | ||
456 | { | ||
457 | return ((info->nrecmema)) & 0xff; | ||
458 | } | ||
459 | static inline int nrec_rdwr(struct i7300_error_info *info) | ||
460 | { | ||
461 | return (info->nrecmemb) >> 31; | ||
462 | } | ||
463 | /* This applies to both NREC and REC string so it can be used with nrec_rdwr | ||
464 | and rec_rdwr */ | ||
465 | static inline const char *rdwr_str(int rdwr) | ||
466 | { | ||
467 | return rdwr ? "Write" : "Read"; | ||
468 | } | ||
469 | static inline int nrec_cas(struct i7300_error_info *info) | ||
470 | { | ||
471 | return ((info->nrecmemb) >> 16) & 0x1fff; | ||
472 | } | ||
473 | static inline int nrec_ras(struct i7300_error_info *info) | ||
474 | { | ||
475 | return (info->nrecmemb) & 0xffff; | ||
476 | } | ||
477 | static inline int rec_bank(struct i7300_error_info *info) | ||
478 | { | ||
479 | return ((info->recmema) >> 12) & 0x7; | ||
480 | } | ||
481 | static inline int rec_rank(struct i7300_error_info *info) | ||
482 | { | ||
483 | return ((info->recmema) >> 8) & 0xf; | ||
484 | } | ||
485 | static inline int rec_rdwr(struct i7300_error_info *info) | ||
486 | { | ||
487 | return (info->recmemb) >> 31; | ||
488 | } | ||
489 | static inline int rec_cas(struct i7300_error_info *info) | ||
490 | { | ||
491 | return ((info->recmemb) >> 16) & 0x1fff; | ||
492 | } | ||
493 | static inline int rec_ras(struct i7300_error_info *info) | ||
494 | { | 257 | { |
495 | return (info->recmemb) & 0xffff; | 258 | if (pos >= size) |
259 | return "Reserved"; | ||
260 | |||
261 | return table[pos]; | ||
496 | } | 262 | } |
497 | 263 | ||
264 | #define GET_ERR_FROM_TABLE(table, pos) \ | ||
265 | get_err_from_table(table, ARRAY_SIZE(table), pos) | ||
266 | |||
498 | /* | 267 | /* |
499 | * i7300_get_error_info Retrieve the hardware error information from | 268 | * i7300_get_error_info Retrieve the hardware error information from |
500 | * the hardware and cache it in the 'info' | 269 | * the hardware and cache it in the 'info' |
@@ -503,234 +272,63 @@ static inline int rec_ras(struct i7300_error_info *info) | |||
503 | static void i7300_get_error_info(struct mem_ctl_info *mci, | 272 | static void i7300_get_error_info(struct mem_ctl_info *mci, |
504 | struct i7300_error_info *info) | 273 | struct i7300_error_info *info) |
505 | { | 274 | { |
506 | struct i7300_pvt *pvt; | ||
507 | u32 value; | ||
508 | |||
509 | pvt = mci->pvt_info; | ||
510 | |||
511 | /* read in the 1st FATAL error register */ | ||
512 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_FAT_FBD, &value); | ||
513 | |||
514 | /* Mask only the bits that the doc says are valid | ||
515 | */ | ||
516 | value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); | ||
517 | |||
518 | /* If there is an error, then read in the | ||
519 | NEXT FATAL error register and the Memory Error Log Register A | ||
520 | */ | ||
521 | if (value & FERR_FAT_MASK) { | ||
522 | info->ferr_fat_fbd = value; | ||
523 | |||
524 | /* harvest the various error data we need */ | ||
525 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
526 | NERR_FAT_FBD, &info->nerr_fat_fbd); | ||
527 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | ||
528 | NRECMEMA, &info->nrecmema); | ||
529 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | ||
530 | NRECMEMB, &info->nrecmemb); | ||
531 | |||
532 | /* Clear the error bits, by writing them back */ | ||
533 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
534 | FERR_FAT_FBD, value); | ||
535 | } else { | ||
536 | info->ferr_fat_fbd = 0; | ||
537 | info->nerr_fat_fbd = 0; | ||
538 | info->nrecmema = 0; | ||
539 | info->nrecmemb = 0; | ||
540 | } | ||
541 | |||
542 | /* read in the 1st NON-FATAL error register */ | ||
543 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_NF_FBD, &value); | ||
544 | |||
545 | /* If there is an error, then read in the 1st NON-FATAL error | ||
546 | * register as well */ | ||
547 | if (value & FERR_NF_MASK) { | ||
548 | info->ferr_nf_fbd = value; | ||
549 | |||
550 | /* harvest the various error data we need */ | ||
551 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
552 | NERR_NF_FBD, &info->nerr_nf_fbd); | ||
553 | pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, | ||
554 | RECMEMA, &info->recmema); | ||
555 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
556 | RECMEMB, &info->recmemb); | ||
557 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
558 | REDMEMB, &info->redmemb); | ||
559 | |||
560 | /* Clear the error bits, by writing them back */ | ||
561 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, | ||
562 | FERR_NF_FBD, value); | ||
563 | } else { | ||
564 | info->ferr_nf_fbd = 0; | ||
565 | info->nerr_nf_fbd = 0; | ||
566 | info->recmema = 0; | ||
567 | info->recmemb = 0; | ||
568 | info->redmemb = 0; | ||
569 | } | ||
570 | } | 275 | } |
571 | 276 | ||
572 | /* | 277 | /* |
573 | * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, | 278 | * i7300_process_error_global Retrieve the hardware error information from |
574 | * struct i7300_error_info *info, | 279 | * the hardware and cache it in the 'info' |
575 | * int handle_errors); | 280 | * structure |
576 | * | ||
577 | * handle the Intel FATAL and unrecoverable errors, if any | ||
578 | */ | ||
579 | static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, | ||
580 | struct i7300_error_info *info, | ||
581 | unsigned long allErrors) | ||
582 | { | ||
583 | char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; | ||
584 | int branch; | ||
585 | int channel; | ||
586 | int bank; | ||
587 | int buf_id; | ||
588 | int rank; | ||
589 | int rdwr; | ||
590 | int ras, cas; | ||
591 | int errnum; | ||
592 | char *type = NULL; | ||
593 | |||
594 | if (!allErrors) | ||
595 | return; /* if no error, return now */ | ||
596 | |||
597 | if (allErrors & ERROR_FAT_MASK) | ||
598 | type = "FATAL"; | ||
599 | else if (allErrors & FERR_NF_UNCORRECTABLE) | ||
600 | type = "NON-FATAL uncorrected"; | ||
601 | else | ||
602 | type = "NON-FATAL recoverable"; | ||
603 | |||
604 | /* ONLY ONE of the possible error bits will be set, as per the docs */ | ||
605 | |||
606 | branch = extract_fbdchan_indx(info->ferr_fat_fbd); | ||
607 | channel = branch; | ||
608 | |||
609 | /* Use the NON-Recoverable macros to extract data */ | ||
610 | bank = nrec_bank(info); | ||
611 | rank = nrec_rank(info); | ||
612 | buf_id = nrec_buf_id(info); | ||
613 | rdwr = nrec_rdwr(info); | ||
614 | ras = nrec_ras(info); | ||
615 | cas = nrec_cas(info); | ||
616 | |||
617 | debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " | ||
618 | "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", | ||
619 | rank, channel, channel + 1, branch >> 1, bank, | ||
620 | buf_id, rdwr_str(rdwr), ras, cas); | ||
621 | |||
622 | /* Only 1 bit will be on */ | ||
623 | errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); | ||
624 | |||
625 | /* Form out message */ | ||
626 | snprintf(msg, sizeof(msg), | ||
627 | "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " | ||
628 | "RAS=%d CAS=%d %s Err=0x%lx (%s))", | ||
629 | type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, | ||
630 | type, allErrors, error_name[errnum]); | ||
631 | |||
632 | /* Call the helper to output message */ | ||
633 | edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); | ||
634 | } | ||
635 | |||
636 | /* | ||
637 | * i7300_process_fatal_error_info(struct mem_ctl_info *mci, | ||
638 | * struct i7300_error_info *info, | ||
639 | * int handle_errors); | ||
640 | * | ||
641 | * handle the Intel NON-FATAL errors, if any | ||
642 | */ | 281 | */ |
643 | static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, | 282 | static void i7300_process_error_global(struct mem_ctl_info *mci, |
644 | struct i7300_error_info *info) | 283 | struct i7300_error_info *info) |
645 | { | 284 | { |
646 | char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; | 285 | struct i7300_pvt *pvt; |
647 | unsigned long allErrors; | 286 | u32 errnum, value; |
648 | int branch; | 287 | unsigned long errors; |
649 | int channel; | 288 | const char *specific; |
650 | int bank; | 289 | bool is_fatal; |
651 | int rank; | ||
652 | int rdwr; | ||
653 | int ras, cas; | ||
654 | int errnum; | ||
655 | |||
656 | /* mask off the Error bits that are possible */ | ||
657 | allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); | ||
658 | if (!allErrors) | ||
659 | return; /* if no error, return now */ | ||
660 | |||
661 | /* ONLY ONE of the possible error bits will be set, as per the docs */ | ||
662 | |||
663 | if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { | ||
664 | i7300_proccess_non_recoverable_info(mci, info, allErrors); | ||
665 | return; | ||
666 | } | ||
667 | |||
668 | /* Correctable errors */ | ||
669 | if (allErrors & ERROR_NF_CORRECTABLE) { | ||
670 | debugf0("\tCorrected bits= 0x%lx\n", allErrors); | ||
671 | |||
672 | branch = extract_fbdchan_indx(info->ferr_nf_fbd); | ||
673 | |||
674 | channel = 0; | ||
675 | if (REC_ECC_LOCATOR_ODD(info->redmemb)) | ||
676 | channel = 1; | ||
677 | |||
678 | /* Convert channel to be based from zero, instead of | ||
679 | * from branch base of 0 */ | ||
680 | channel += branch; | ||
681 | |||
682 | bank = rec_bank(info); | ||
683 | rank = rec_rank(info); | ||
684 | rdwr = rec_rdwr(info); | ||
685 | ras = rec_ras(info); | ||
686 | cas = rec_cas(info); | ||
687 | |||
688 | /* Only 1 bit will be on */ | ||
689 | errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); | ||
690 | |||
691 | debugf0("\t\tCSROW= %d Channel= %d (Branch %d " | ||
692 | "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", | ||
693 | rank, channel, branch >> 1, bank, | ||
694 | rdwr_str(rdwr), ras, cas); | ||
695 | |||
696 | /* Form out message */ | ||
697 | snprintf(msg, sizeof(msg), | ||
698 | "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " | ||
699 | "RAS=%d CAS=%d, CE Err=0x%lx (%s))", | ||
700 | branch >> 1, bank, rdwr_str(rdwr), ras, cas, | ||
701 | allErrors, error_name[errnum]); | ||
702 | 290 | ||
703 | /* Call the helper to output message */ | 291 | pvt = mci->pvt_info; |
704 | edac_mc_handle_fbd_ce(mci, rank, channel, msg); | ||
705 | 292 | ||
706 | return; | 293 | /* read in the 1st FATAL error register */ |
294 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, | ||
295 | FERR_GLOBAL_HI, &value); | ||
296 | if (unlikely(value)) { | ||
297 | errors = value; | ||
298 | errnum = find_first_bit(&errors, | ||
299 | ARRAY_SIZE(ferr_global_hi_name)); | ||
300 | specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum); | ||
301 | is_fatal = ferr_global_hi_is_fatal(errnum); | ||
302 | goto error_global; | ||
707 | } | 303 | } |
708 | 304 | ||
709 | /* Miscelaneous errors */ | 305 | pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs, |
710 | errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); | 306 | FERR_GLOBAL_LO, &value); |
711 | 307 | if (unlikely(value)) { | |
712 | branch = extract_fbdchan_indx(info->ferr_nf_fbd); | 308 | errors = value; |
309 | errnum = find_first_bit(&errors, | ||
310 | ARRAY_SIZE(ferr_global_lo_name)); | ||
311 | specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum); | ||
312 | is_fatal = ferr_global_lo_is_fatal(errnum); | ||
313 | goto error_global; | ||
314 | } | ||
315 | return; | ||
713 | 316 | ||
714 | i7300_mc_printk(mci, KERN_EMERG, | 317 | error_global: |
715 | "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", | 318 | i7300_mc_printk(mci, KERN_EMERG, "%s misc error: %s\n", |
716 | branch >> 1, allErrors, error_name[errnum]); | 319 | is_fatal ? "Fatal" : "NOT fatal", specific); |
717 | } | 320 | } |
718 | 321 | ||
719 | /* | 322 | /* |
720 | * i7300_process_error_info Process the error info that is | 323 | * i7300_process_error_info Retrieve the hardware error information from |
721 | * in the 'info' structure, previously retrieved from hardware | 324 | * the hardware and cache it in the 'info' |
325 | * structure | ||
722 | */ | 326 | */ |
723 | static void i7300_process_error_info(struct mem_ctl_info *mci, | 327 | static void i7300_process_error_info(struct mem_ctl_info *mci, |
724 | struct i7300_error_info *info) | 328 | struct i7300_error_info *info) |
725 | { u32 allErrors; | 329 | { |
726 | 330 | i7300_process_error_global(mci, info); | |
727 | /* First handle any fatal errors that occurred */ | 331 | }; |
728 | allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); | ||
729 | i7300_proccess_non_recoverable_info(mci, info, allErrors); | ||
730 | |||
731 | /* now handle any non-fatal errors that occurred */ | ||
732 | i7300_process_nonfatal_error_info(mci, info); | ||
733 | } | ||
734 | 332 | ||
735 | /* | 333 | /* |
736 | * i7300_clear_error Retrieve any error from the hardware | 334 | * i7300_clear_error Retrieve any error from the hardware |
@@ -753,6 +351,7 @@ static void i7300_check_error(struct mem_ctl_info *mci) | |||
753 | { | 351 | { |
754 | struct i7300_error_info info; | 352 | struct i7300_error_info info; |
755 | debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); | 353 | debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); |
354 | |||
756 | i7300_get_error_info(mci, &info); | 355 | i7300_get_error_info(mci, &info); |
757 | i7300_process_error_info(mci, &info); | 356 | i7300_process_error_info(mci, &info); |
758 | } | 357 | } |
@@ -763,22 +362,11 @@ static void i7300_check_error(struct mem_ctl_info *mci) | |||
763 | */ | 362 | */ |
764 | static void i7300_enable_error_reporting(struct mem_ctl_info *mci) | 363 | static void i7300_enable_error_reporting(struct mem_ctl_info *mci) |
765 | { | 364 | { |
766 | struct i7300_pvt *pvt; | ||
767 | u32 fbd_error_mask; | ||
768 | |||
769 | pvt = mci->pvt_info; | ||
770 | |||
771 | /* Read the FBD Error Mask Register */ | ||
772 | pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, | ||
773 | &fbd_error_mask); | ||
774 | |||
775 | /* Enable with a '0' */ | ||
776 | fbd_error_mask &= ~(ENABLE_EMASK_ALL); | ||
777 | |||
778 | pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD, | ||
779 | fbd_error_mask); | ||
780 | } | 365 | } |
781 | #endif | 366 | |
367 | /************************************************ | ||
368 | * i7300 Functions related to memory enumberation | ||
369 | ************************************************/ | ||
782 | 370 | ||
783 | /* | 371 | /* |
784 | * determine_mtr(pvt, csrow, channel) | 372 | * determine_mtr(pvt, csrow, channel) |
@@ -1070,6 +658,10 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci) | |||
1070 | return 0; | 658 | return 0; |
1071 | } | 659 | } |
1072 | 660 | ||
661 | /************************************************* | ||
662 | * i7300 Functions related to device probe/release | ||
663 | *************************************************/ | ||
664 | |||
1073 | /* | 665 | /* |
1074 | * i7300_put_devices 'put' all the devices that we have | 666 | * i7300_put_devices 'put' all the devices that we have |
1075 | * reserved via 'get' | 667 | * reserved via 'get' |
@@ -1238,10 +830,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) | |||
1238 | mci->dev_name = pci_name(pdev); | 830 | mci->dev_name = pci_name(pdev); |
1239 | mci->ctl_page_to_phys = NULL; | 831 | mci->ctl_page_to_phys = NULL; |
1240 | 832 | ||
1241 | #if 0 | ||
1242 | /* Set the function pointer to an actual operation function */ | 833 | /* Set the function pointer to an actual operation function */ |
1243 | mci->edac_check = i7300_check_error; | 834 | mci->edac_check = i7300_check_error; |
1244 | #endif | ||
1245 | 835 | ||
1246 | /* initialize the MC control structure 'csrows' table | 836 | /* initialize the MC control structure 'csrows' table |
1247 | * with the mapping and control information */ | 837 | * with the mapping and control information */ |
@@ -1251,10 +841,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) | |||
1251 | "value\n"); | 841 | "value\n"); |
1252 | mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ | 842 | mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ |
1253 | } else { | 843 | } else { |
1254 | #if 0 | ||
1255 | debugf1("MC: Enable error reporting now\n"); | 844 | debugf1("MC: Enable error reporting now\n"); |
1256 | i7300_enable_error_reporting(mci); | 845 | i7300_enable_error_reporting(mci); |
1257 | #endif | ||
1258 | } | 846 | } |
1259 | 847 | ||
1260 | /* add this new MC control structure to EDAC's list of MCs */ | 848 | /* add this new MC control structure to EDAC's list of MCs */ |
@@ -1267,9 +855,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx) | |||
1267 | goto fail1; | 855 | goto fail1; |
1268 | } | 856 | } |
1269 | 857 | ||
1270 | #if 0 | ||
1271 | i7300_clear_error(mci); | 858 | i7300_clear_error(mci); |
1272 | #endif | ||
1273 | 859 | ||
1274 | /* allocating generic PCI control info */ | 860 | /* allocating generic PCI control info */ |
1275 | i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); | 861 | i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); |