aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2010-08-26 23:16:12 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2010-08-30 13:56:48 -0400
commit5de6e07ed75ee29a302f50e149339ca747131121 (patch)
tree07f40e95d30d340a0fa39f722196ffd34d8ccc3a /drivers
parent3e57eef64c53d4a45790fb7bb60a4ee6bf2bad30 (diff)
i7300_edac: Add error detection code for global errors
There's no mention at the datasheet about how to enable global error reporting. So, I'm assuming that those errors are always enabled. Maybe I'm plain wrong about that ;) Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/edac/i7300_edac.c568
1 files changed, 77 insertions, 491 deletions
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 3e60dbaa0e7e..e617b4f79dd8 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -158,8 +158,17 @@ static const char *numcol_toString[] = {
158 * Device 16.2: Global Error Registers 158 * Device 16.2: Global Error Registers
159 */ 159 */
160 160
161#define FERR_GLOBAL_HI 0x48
162static const char *ferr_global_hi_name[] = {
163 [3] = "FSB 3 Fatal Error",
164 [2] = "FSB 2 Fatal Error",
165 [1] = "FSB 1 Fatal Error",
166 [0] = "FSB 0 Fatal Error",
167};
168#define ferr_global_hi_is_fatal(errno) 1
169
161#define FERR_GLOBAL_LO 0x40 170#define FERR_GLOBAL_LO 0x40
162static const char *ferr_global_name[] = { 171static const char *ferr_global_lo_name[] = {
163 [31] = "Internal MCH Fatal Error", 172 [31] = "Internal MCH Fatal Error",
164 [30] = "Intel QuickData Technology Device Fatal Error", 173 [30] = "Intel QuickData Technology Device Fatal Error",
165 [29] = "FSB1 Fatal Error", 174 [29] = "FSB1 Fatal Error",
@@ -193,190 +202,7 @@ static const char *ferr_global_name[] = {
193 [1] = "PCI Express Device 1 Non-Fatal Error", 202 [1] = "PCI Express Device 1 Non-Fatal Error",
194 [0] = "ESI Non-Fatal Error", 203 [0] = "ESI Non-Fatal Error",
195}; 204};
196 205#define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1)
197#define NERR_GLOBAL 0x44
198static const char *nerr_global_name[] = {
199 [31] = "Internal MCH Fatal Error",
200 [30] = "Intel QuickData Technology Device Fatal Error",
201 [29] = "FSB1 Fatal Error",
202 [28] = "FSB0 Fatal Error",
203 [27] = "FSB2 Fatal Error",
204 [26] = "FSB3 Fatal Error",
205 [25] = "Reserved",
206 [24] = "FBD Channel 0,1,2 or 3 Fatal Error",
207 [23] = "PCI Express Device 7 Fatal Error",
208 [22] = "PCI Express Device 6 Fatal Error",
209 [21] = "PCI Express Device 5 Fatal Error",
210 [20] = "PCI Express Device 4 Fatal Error",
211 [19] = "PCI Express Device 3 Fatal Error",
212 [18] = "PCI Express Device 2 Fatal Error",
213 [17] = "PCI Express Device 1 Fatal Error",
214 [16] = "ESI Fatal Error",
215 [15] = "Internal MCH Non-Fatal Error",
216 [14] = "Intel QuickData Technology Device Non Fatal Error",
217 [13] = "FSB1 Non-Fatal Error",
218 [12] = "FSB0 Non-Fatal Error",
219 [11] = "FSB2 Non-Fatal Error",
220 [10] = "FSB3 Non-Fatal Error",
221 [9] = "Reserved",
222 [8] = "FBD Channel 0,1, 2 or 3 Non-Fatal Error",
223 [7] = "PCI Express Device 7 Non-Fatal Error",
224 [6] = "PCI Express Device 6 Non-Fatal Error",
225 [5] = "PCI Express Device 5 Non-Fatal Error",
226 [4] = "PCI Express Device 4 Non-Fatal Error",
227 [3] = "PCI Express Device 3 Non-Fatal Error",
228 [2] = "PCI Express Device 2 Non-Fatal Error",
229 [1] = "PCI Express Device 1 Non-Fatal Error",
230 [0] = "ESI Non-Fatal Error",
231};
232
233#if 0
234
235/*
236 * Error indicator bits and masks
237 * Error masks are according with Table 5-17 of i7300 datasheet
238 */
239
240enum error_mask {
241 EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */
242 EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */
243 EMASK_M3 = 1<<2, /* Reserved */
244 EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */
245 EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */
246 EMASK_M6 = 1<<5, /* Unsupported on i7300 */
247 EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
248 EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */
249 EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */
250 EMASK_M10 = 1<<9, /* Unsupported on i7300 */
251 EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */
252 EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */
253 EMASK_M13 = 1<<12, /* Memory Write error on first attempt */
254 EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */
255 EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */
256 EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */
257 EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */
258 EMASK_M18 = 1<<17, /* Unsupported on i7300 */
259 EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */
260 EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */
261 EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */
262 EMASK_M22 = 1<<21, /* SPD protocol Error */
263 EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */
264 EMASK_M24 = 1<<23, /* Refresh error */
265 EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */
266 EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */
267 EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */
268 EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */
269 EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */
270};
271
272/*
273 * Names to translate bit error into something useful
274 */
275static const char *error_name[] = {
276 [0] = "Memory Write error on non-redundant retry",
277 [1] = "Memory or FB-DIMM configuration CRC read error",
278 /* Reserved */
279 [3] = "Uncorrectable Data ECC on Replay",
280 [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
281 /* M6 Unsupported on i7300 */
282 [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
283 [7] = "Aliased Uncorrectable Patrol Data ECC",
284 [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
285 /* M10 Unsupported on i7300 */
286 [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
287 [11] = "Non-Aliased Uncorrectable Patrol Data ECC",
288 [12] = "Memory Write error on first attempt",
289 [13] = "FB-DIMM Configuration Write error on first attempt",
290 [14] = "Memory or FB-DIMM configuration CRC read error",
291 [15] = "Channel Failed-Over Occurred",
292 [16] = "Correctable Non-Mirrored Demand Data ECC",
293 /* M18 Unsupported on i7300 */
294 [18] = "Correctable Resilver- or Spare-Copy Data ECC",
295 [19] = "Correctable Patrol Data ECC",
296 [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
297 [21] = "SPD protocol Error",
298 [22] = "Non-Redundant Fast Reset Timeout",
299 [23] = "Refresh error",
300 [24] = "Memory Write error on redundant retry",
301 [25] = "Redundant Fast Reset Timeout",
302 [26] = "Correctable Counter Threshold Exceeded",
303 [27] = "DIMM-Spare Copy Completed",
304 [28] = "DIMM-Isolation Completed",
305};
306
307/* Fatal errors */
308#define ERROR_FAT_MASK (EMASK_M1 | \
309 EMASK_M2 | \
310 EMASK_M23)
311
312/* Correctable errors */
313#define ERROR_NF_CORRECTABLE (EMASK_M27 | \
314 EMASK_M20 | \
315 EMASK_M19 | \
316 EMASK_M18 | \
317 EMASK_M17 | \
318 EMASK_M16)
319#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \
320 EMASK_M28)
321#define ERROR_NF_SPD_PROTOCOL (EMASK_M22)
322#define ERROR_NF_NORTH_CRC (EMASK_M21)
323
324/* Recoverable errors */
325#define ERROR_NF_RECOVERABLE (EMASK_M26 | \
326 EMASK_M25 | \
327 EMASK_M24 | \
328 EMASK_M15 | \
329 EMASK_M14 | \
330 EMASK_M13 | \
331 EMASK_M12 | \
332 EMASK_M11 | \
333 EMASK_M9 | \
334 EMASK_M8 | \
335 EMASK_M7 | \
336 EMASK_M5)
337
338/* uncorrectable errors */
339#define ERROR_NF_UNCORRECTABLE (EMASK_M4)
340
341/* mask to all non-fatal errors */
342#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \
343 ERROR_NF_UNCORRECTABLE | \
344 ERROR_NF_RECOVERABLE | \
345 ERROR_NF_DIMM_SPARE | \
346 ERROR_NF_SPD_PROTOCOL | \
347 ERROR_NF_NORTH_CRC)
348
349/*
350 * Define error masks for the several registers
351 */
352
353/* Enable all fatal and non fatal errors */
354#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK)
355
356/* mask for fatal error registers */
357#define FERR_FAT_MASK ERROR_FAT_MASK
358
359/* masks for non-fatal error register */
360static inline int to_nf_mask(unsigned int mask)
361{
362 return (mask & EMASK_M29) | (mask >> 3);
363};
364
365static inline int from_nf_ferr(unsigned int mask)
366{
367 return (mask & EMASK_M29) | /* Bit 28 */
368 (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */
369};
370
371#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK)
372#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE)
373#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE)
374#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL)
375#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC)
376#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE)
377#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE)
378
379#endif
380 206
381/* Device name and register DID (Device ID) */ 207/* Device name and register DID (Device ID) */
382struct i7300_dev_info { 208struct i7300_dev_info {
@@ -416,85 +242,28 @@ struct i7300_pvt {
416 struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; 242 struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS];
417}; 243};
418 244
419#if 0
420/* I7300 MCH error information retrieved from Hardware */
421struct i7300_error_info {
422 /* These registers are always read from the MC */
423 u32 ferr_fat_fbd; /* First Errors Fatal */
424 u32 nerr_fat_fbd; /* Next Errors Fatal */
425 u32 ferr_nf_fbd; /* First Errors Non-Fatal */
426 u32 nerr_nf_fbd; /* Next Errors Non-Fatal */
427
428 /* These registers are input ONLY if there was a Recoverable Error */
429 u32 redmemb; /* Recoverable Mem Data Error log B */
430 u16 recmema; /* Recoverable Mem Error log A */
431 u32 recmemb; /* Recoverable Mem Error log B */
432
433 /* These registers are input ONLY if there was a Non-Rec Error */
434 u16 nrecmema; /* Non-Recoverable Mem log A */
435 u16 nrecmemb; /* Non-Recoverable Mem log B */
436
437};
438#endif
439
440/* FIXME: Why do we need to have this static? */ 245/* FIXME: Why do we need to have this static? */
441static struct edac_pci_ctl_info *i7300_pci; 246static struct edac_pci_ctl_info *i7300_pci;
442 247
248/********************************************
249 * i7300 Functions related to error detection
250 ********************************************/
443 251
444#if 0 252struct i7300_error_info {
445/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and 253 int dummy; /* FIXME */
446 5400 better to use an inline function than a macro in this case */ 254};
447static inline int nrec_bank(struct i7300_error_info *info) 255
448{ 256const char *get_err_from_table(const char *table[], int size, int pos)
449 return ((info->nrecmema) >> 12) & 0x7;
450}
451static inline int nrec_rank(struct i7300_error_info *info)
452{
453 return ((info->nrecmema) >> 8) & 0xf;
454}
455static inline int nrec_buf_id(struct i7300_error_info *info)
456{
457 return ((info->nrecmema)) & 0xff;
458}
459static inline int nrec_rdwr(struct i7300_error_info *info)
460{
461 return (info->nrecmemb) >> 31;
462}
463/* This applies to both NREC and REC string so it can be used with nrec_rdwr
464 and rec_rdwr */
465static inline const char *rdwr_str(int rdwr)
466{
467 return rdwr ? "Write" : "Read";
468}
469static inline int nrec_cas(struct i7300_error_info *info)
470{
471 return ((info->nrecmemb) >> 16) & 0x1fff;
472}
473static inline int nrec_ras(struct i7300_error_info *info)
474{
475 return (info->nrecmemb) & 0xffff;
476}
477static inline int rec_bank(struct i7300_error_info *info)
478{
479 return ((info->recmema) >> 12) & 0x7;
480}
481static inline int rec_rank(struct i7300_error_info *info)
482{
483 return ((info->recmema) >> 8) & 0xf;
484}
485static inline int rec_rdwr(struct i7300_error_info *info)
486{
487 return (info->recmemb) >> 31;
488}
489static inline int rec_cas(struct i7300_error_info *info)
490{
491 return ((info->recmemb) >> 16) & 0x1fff;
492}
493static inline int rec_ras(struct i7300_error_info *info)
494{ 257{
495 return (info->recmemb) & 0xffff; 258 if (pos >= size)
259 return "Reserved";
260
261 return table[pos];
496} 262}
497 263
264#define GET_ERR_FROM_TABLE(table, pos) \
265 get_err_from_table(table, ARRAY_SIZE(table), pos)
266
498/* 267/*
499 * i7300_get_error_info Retrieve the hardware error information from 268 * i7300_get_error_info Retrieve the hardware error information from
500 * the hardware and cache it in the 'info' 269 * the hardware and cache it in the 'info'
@@ -503,234 +272,63 @@ static inline int rec_ras(struct i7300_error_info *info)
503static void i7300_get_error_info(struct mem_ctl_info *mci, 272static void i7300_get_error_info(struct mem_ctl_info *mci,
504 struct i7300_error_info *info) 273 struct i7300_error_info *info)
505{ 274{
506 struct i7300_pvt *pvt;
507 u32 value;
508
509 pvt = mci->pvt_info;
510
511 /* read in the 1st FATAL error register */
512 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_FAT_FBD, &value);
513
514 /* Mask only the bits that the doc says are valid
515 */
516 value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
517
518 /* If there is an error, then read in the
519 NEXT FATAL error register and the Memory Error Log Register A
520 */
521 if (value & FERR_FAT_MASK) {
522 info->ferr_fat_fbd = value;
523
524 /* harvest the various error data we need */
525 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
526 NERR_FAT_FBD, &info->nerr_fat_fbd);
527 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
528 NRECMEMA, &info->nrecmema);
529 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
530 NRECMEMB, &info->nrecmemb);
531
532 /* Clear the error bits, by writing them back */
533 pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
534 FERR_FAT_FBD, value);
535 } else {
536 info->ferr_fat_fbd = 0;
537 info->nerr_fat_fbd = 0;
538 info->nrecmema = 0;
539 info->nrecmemb = 0;
540 }
541
542 /* read in the 1st NON-FATAL error register */
543 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, FERR_NF_FBD, &value);
544
545 /* If there is an error, then read in the 1st NON-FATAL error
546 * register as well */
547 if (value & FERR_NF_MASK) {
548 info->ferr_nf_fbd = value;
549
550 /* harvest the various error data we need */
551 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
552 NERR_NF_FBD, &info->nerr_nf_fbd);
553 pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
554 RECMEMA, &info->recmema);
555 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
556 RECMEMB, &info->recmemb);
557 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
558 REDMEMB, &info->redmemb);
559
560 /* Clear the error bits, by writing them back */
561 pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
562 FERR_NF_FBD, value);
563 } else {
564 info->ferr_nf_fbd = 0;
565 info->nerr_nf_fbd = 0;
566 info->recmema = 0;
567 info->recmemb = 0;
568 info->redmemb = 0;
569 }
570} 275}
571 276
572/* 277/*
573 * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, 278 * i7300_process_error_global Retrieve the hardware error information from
574 * struct i7300_error_info *info, 279 * the hardware and cache it in the 'info'
575 * int handle_errors); 280 * structure
576 *
577 * handle the Intel FATAL and unrecoverable errors, if any
578 */
579static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci,
580 struct i7300_error_info *info,
581 unsigned long allErrors)
582{
583 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80];
584 int branch;
585 int channel;
586 int bank;
587 int buf_id;
588 int rank;
589 int rdwr;
590 int ras, cas;
591 int errnum;
592 char *type = NULL;
593
594 if (!allErrors)
595 return; /* if no error, return now */
596
597 if (allErrors & ERROR_FAT_MASK)
598 type = "FATAL";
599 else if (allErrors & FERR_NF_UNCORRECTABLE)
600 type = "NON-FATAL uncorrected";
601 else
602 type = "NON-FATAL recoverable";
603
604 /* ONLY ONE of the possible error bits will be set, as per the docs */
605
606 branch = extract_fbdchan_indx(info->ferr_fat_fbd);
607 channel = branch;
608
609 /* Use the NON-Recoverable macros to extract data */
610 bank = nrec_bank(info);
611 rank = nrec_rank(info);
612 buf_id = nrec_buf_id(info);
613 rdwr = nrec_rdwr(info);
614 ras = nrec_ras(info);
615 cas = nrec_cas(info);
616
617 debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d "
618 "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
619 rank, channel, channel + 1, branch >> 1, bank,
620 buf_id, rdwr_str(rdwr), ras, cas);
621
622 /* Only 1 bit will be on */
623 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
624
625 /* Form out message */
626 snprintf(msg, sizeof(msg),
627 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
628 "RAS=%d CAS=%d %s Err=0x%lx (%s))",
629 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
630 type, allErrors, error_name[errnum]);
631
632 /* Call the helper to output message */
633 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
634}
635
636/*
637 * i7300_process_fatal_error_info(struct mem_ctl_info *mci,
638 * struct i7300_error_info *info,
639 * int handle_errors);
640 *
641 * handle the Intel NON-FATAL errors, if any
642 */ 281 */
643static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, 282static void i7300_process_error_global(struct mem_ctl_info *mci,
644 struct i7300_error_info *info) 283 struct i7300_error_info *info)
645{ 284{
646 char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; 285 struct i7300_pvt *pvt;
647 unsigned long allErrors; 286 u32 errnum, value;
648 int branch; 287 unsigned long errors;
649 int channel; 288 const char *specific;
650 int bank; 289 bool is_fatal;
651 int rank;
652 int rdwr;
653 int ras, cas;
654 int errnum;
655
656 /* mask off the Error bits that are possible */
657 allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
658 if (!allErrors)
659 return; /* if no error, return now */
660
661 /* ONLY ONE of the possible error bits will be set, as per the docs */
662
663 if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) {
664 i7300_proccess_non_recoverable_info(mci, info, allErrors);
665 return;
666 }
667
668 /* Correctable errors */
669 if (allErrors & ERROR_NF_CORRECTABLE) {
670 debugf0("\tCorrected bits= 0x%lx\n", allErrors);
671
672 branch = extract_fbdchan_indx(info->ferr_nf_fbd);
673
674 channel = 0;
675 if (REC_ECC_LOCATOR_ODD(info->redmemb))
676 channel = 1;
677
678 /* Convert channel to be based from zero, instead of
679 * from branch base of 0 */
680 channel += branch;
681
682 bank = rec_bank(info);
683 rank = rec_rank(info);
684 rdwr = rec_rdwr(info);
685 ras = rec_ras(info);
686 cas = rec_cas(info);
687
688 /* Only 1 bit will be on */
689 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name));
690
691 debugf0("\t\tCSROW= %d Channel= %d (Branch %d "
692 "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
693 rank, channel, branch >> 1, bank,
694 rdwr_str(rdwr), ras, cas);
695
696 /* Form out message */
697 snprintf(msg, sizeof(msg),
698 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
699 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
700 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
701 allErrors, error_name[errnum]);
702 290
703 /* Call the helper to output message */ 291 pvt = mci->pvt_info;
704 edac_mc_handle_fbd_ce(mci, rank, channel, msg);
705 292
706 return; 293 /* read in the 1st FATAL error register */
294 pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
295 FERR_GLOBAL_HI, &value);
296 if (unlikely(value)) {
297 errors = value;
298 errnum = find_first_bit(&errors,
299 ARRAY_SIZE(ferr_global_hi_name));
300 specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
301 is_fatal = ferr_global_hi_is_fatal(errnum);
302 goto error_global;
707 } 303 }
708 304
709 /* Miscelaneous errors */ 305 pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
710 errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); 306 FERR_GLOBAL_LO, &value);
711 307 if (unlikely(value)) {
712 branch = extract_fbdchan_indx(info->ferr_nf_fbd); 308 errors = value;
309 errnum = find_first_bit(&errors,
310 ARRAY_SIZE(ferr_global_lo_name));
311 specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
312 is_fatal = ferr_global_lo_is_fatal(errnum);
313 goto error_global;
314 }
315 return;
713 316
714 i7300_mc_printk(mci, KERN_EMERG, 317error_global:
715 "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", 318 i7300_mc_printk(mci, KERN_EMERG, "%s misc error: %s\n",
716 branch >> 1, allErrors, error_name[errnum]); 319 is_fatal ? "Fatal" : "NOT fatal", specific);
717} 320}
718 321
719/* 322/*
720 * i7300_process_error_info Process the error info that is 323 * i7300_process_error_info Retrieve the hardware error information from
721 * in the 'info' structure, previously retrieved from hardware 324 * the hardware and cache it in the 'info'
325 * structure
722 */ 326 */
723static void i7300_process_error_info(struct mem_ctl_info *mci, 327static void i7300_process_error_info(struct mem_ctl_info *mci,
724 struct i7300_error_info *info) 328 struct i7300_error_info *info)
725{ u32 allErrors; 329{
726 330 i7300_process_error_global(mci, info);
727 /* First handle any fatal errors that occurred */ 331};
728 allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
729 i7300_proccess_non_recoverable_info(mci, info, allErrors);
730
731 /* now handle any non-fatal errors that occurred */
732 i7300_process_nonfatal_error_info(mci, info);
733}
734 332
735/* 333/*
736 * i7300_clear_error Retrieve any error from the hardware 334 * i7300_clear_error Retrieve any error from the hardware
@@ -753,6 +351,7 @@ static void i7300_check_error(struct mem_ctl_info *mci)
753{ 351{
754 struct i7300_error_info info; 352 struct i7300_error_info info;
755 debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); 353 debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
354
756 i7300_get_error_info(mci, &info); 355 i7300_get_error_info(mci, &info);
757 i7300_process_error_info(mci, &info); 356 i7300_process_error_info(mci, &info);
758} 357}
@@ -763,22 +362,11 @@ static void i7300_check_error(struct mem_ctl_info *mci)
763 */ 362 */
764static void i7300_enable_error_reporting(struct mem_ctl_info *mci) 363static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
765{ 364{
766 struct i7300_pvt *pvt;
767 u32 fbd_error_mask;
768
769 pvt = mci->pvt_info;
770
771 /* Read the FBD Error Mask Register */
772 pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD,
773 &fbd_error_mask);
774
775 /* Enable with a '0' */
776 fbd_error_mask &= ~(ENABLE_EMASK_ALL);
777
778 pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map, EMASK_FBD,
779 fbd_error_mask);
780} 365}
781#endif 366
367/************************************************
368 * i7300 Functions related to memory enumberation
369 ************************************************/
782 370
783/* 371/*
784 * determine_mtr(pvt, csrow, channel) 372 * determine_mtr(pvt, csrow, channel)
@@ -1070,6 +658,10 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci)
1070 return 0; 658 return 0;
1071} 659}
1072 660
661/*************************************************
662 * i7300 Functions related to device probe/release
663 *************************************************/
664
1073/* 665/*
1074 * i7300_put_devices 'put' all the devices that we have 666 * i7300_put_devices 'put' all the devices that we have
1075 * reserved via 'get' 667 * reserved via 'get'
@@ -1238,10 +830,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx)
1238 mci->dev_name = pci_name(pdev); 830 mci->dev_name = pci_name(pdev);
1239 mci->ctl_page_to_phys = NULL; 831 mci->ctl_page_to_phys = NULL;
1240 832
1241#if 0
1242 /* Set the function pointer to an actual operation function */ 833 /* Set the function pointer to an actual operation function */
1243 mci->edac_check = i7300_check_error; 834 mci->edac_check = i7300_check_error;
1244#endif
1245 835
1246 /* initialize the MC control structure 'csrows' table 836 /* initialize the MC control structure 'csrows' table
1247 * with the mapping and control information */ 837 * with the mapping and control information */
@@ -1251,10 +841,8 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx)
1251 "value\n"); 841 "value\n");
1252 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ 842 mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1253 } else { 843 } else {
1254#if 0
1255 debugf1("MC: Enable error reporting now\n"); 844 debugf1("MC: Enable error reporting now\n");
1256 i7300_enable_error_reporting(mci); 845 i7300_enable_error_reporting(mci);
1257#endif
1258 } 846 }
1259 847
1260 /* add this new MC control structure to EDAC's list of MCs */ 848 /* add this new MC control structure to EDAC's list of MCs */
@@ -1267,9 +855,7 @@ static int i7300_probe1(struct pci_dev *pdev, int dev_idx)
1267 goto fail1; 855 goto fail1;
1268 } 856 }
1269 857
1270#if 0
1271 i7300_clear_error(mci); 858 i7300_clear_error(mci);
1272#endif
1273 859
1274 /* allocating generic PCI control info */ 860 /* allocating generic PCI control info */
1275 i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); 861 i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);