aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/edac_core.h
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-04-18 14:20:50 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-05-28 18:10:59 -0400
commit4275be63559719c3149b19751029f1b0f1b26775 (patch)
treed215a184f4278d7bc9095f18eb4c748149e241f3 /drivers/edac/edac_core.h
parent982216a4290543fe73ae4f0a156f3d7906bd9b73 (diff)
edac: Change internal representation to work with layers
Change the EDAC internal representation to work with non-csrow based memory controllers. There are lots of those memory controllers nowadays, and more are coming. So, the EDAC internal representation needs to be changed, in order to work with those memory controllers, while preserving backward compatibility with the old ones. The edac core was written with the idea that memory controllers are able to directly access csrows. This is not true for FB-DIMM and RAMBUS memory controllers. Also, some recent advanced memory controllers don't present a per-csrows view. Instead, they view memories as DIMMs, instead of ranks. So, change the allocation and error report routines to allow them to work with all types of architectures. This will allow the removal of several hacks with FB-DIMM and RAMBUS memory controllers. Also, several tests were done on different platforms using different x86 drivers. TODO: a multi-rank DIMMs are currently represented by multiple DIMM entries in struct dimm_info. That means that changing a label for one rank won't change the same label for the other ranks at the same DIMM. This bug is present since the beginning of the EDAC, so it is not a big deal. However, on several drivers, it is possible to fix this issue, but it should be a per-driver fix, as the csrow => DIMM arrangement may not be equal for all. So, don't try to fix it here yet. I tried to make this patch as short as possible, preceding it with several other patches that simplified the logic here. Yet, as the internal API changes, all drivers need changes. The changes are generally bigger in the drivers for FB-DIMMs. Cc: Aristeu Rozanski <arozansk@redhat.com> Cc: Doug Thompson <norsk5@yahoo.com> Cc: Borislav Petkov <borislav.petkov@amd.com> Cc: Mark Gross <mark.gross@intel.com> Cc: Jason Uhlenkott <juhlenko@akamai.com> Cc: Tim Small <tim@buttersideup.com> Cc: Ranganathan Desikan <ravi@jetztechnologies.com> Cc: "Arvind R." <arvino55@gmail.com> Cc: Olof Johansson <olof@lixom.net> Cc: Egor Martovetsky <egor@pasemi.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Michal Marek <mmarek@suse.cz> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Joe Perches <joe@perches.com> Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Hitoshi Mitake <h.mitake@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com> Cc: Shaohui Xie <Shaohui.Xie@freescale.com> Cc: Josh Boyer <jwboyer@gmail.com> Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac/edac_core.h')
-rw-r--r--drivers/edac/edac_core.h99
1 files changed, 79 insertions, 20 deletions
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index e48ab3108ad8..1286c5e1bdc0 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -447,8 +447,12 @@ static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
447 447
448#endif /* CONFIG_PCI */ 448#endif /* CONFIG_PCI */
449 449
450extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 450struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
451 unsigned nr_chans, int edac_index); 451 unsigned nr_chans, int edac_index);
452struct mem_ctl_info *new_edac_mc_alloc(unsigned edac_index,
453 unsigned n_layers,
454 struct edac_mc_layer *layers,
455 unsigned sz_pvt);
452extern int edac_mc_add_mc(struct mem_ctl_info *mci); 456extern int edac_mc_add_mc(struct mem_ctl_info *mci);
453extern void edac_mc_free(struct mem_ctl_info *mci); 457extern void edac_mc_free(struct mem_ctl_info *mci);
454extern struct mem_ctl_info *edac_mc_find(int idx); 458extern struct mem_ctl_info *edac_mc_find(int idx);
@@ -467,24 +471,78 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
467 * reporting logic and function interface - reduces conditional 471 * reporting logic and function interface - reduces conditional
468 * statement clutter and extra function arguments. 472 * statement clutter and extra function arguments.
469 */ 473 */
470extern void edac_mc_handle_ce(struct mem_ctl_info *mci, 474
471 unsigned long page_frame_number, 475void edac_mc_handle_error(const enum hw_event_mc_err_type type,
472 unsigned long offset_in_page, 476 struct mem_ctl_info *mci,
473 unsigned long syndrome, int row, int channel, 477 const unsigned long page_frame_number,
474 const char *msg); 478 const unsigned long offset_in_page,
475extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, 479 const unsigned long syndrome,
476 const char *msg); 480 const int layer0,
477extern void edac_mc_handle_ue(struct mem_ctl_info *mci, 481 const int layer1,
478 unsigned long page_frame_number, 482 const int layer2,
479 unsigned long offset_in_page, int row, 483 const char *msg,
480 const char *msg); 484 const char *other_detail,
481extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, 485 const void *mcelog);
482 const char *msg); 486
483extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow, 487static inline void edac_mc_handle_ce(struct mem_ctl_info *mci,
484 unsigned int channel0, unsigned int channel1, 488 unsigned long page_frame_number,
485 char *msg); 489 unsigned long offset_in_page,
486extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow, 490 unsigned long syndrome, int row, int channel,
487 unsigned int channel, char *msg); 491 const char *msg)
492{
493 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
494 page_frame_number, offset_in_page, syndrome,
495 row, channel, -1, msg, NULL, NULL);
496}
497
498static inline void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
499 const char *msg)
500{
501 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
502 0, 0, 0, -1, -1, -1, msg, NULL, NULL);
503}
504
505static inline void edac_mc_handle_ue(struct mem_ctl_info *mci,
506 unsigned long page_frame_number,
507 unsigned long offset_in_page, int row,
508 const char *msg)
509{
510 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
511 page_frame_number, offset_in_page, 0,
512 row, -1, -1, msg, NULL, NULL);
513}
514
515static inline void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
516 const char *msg)
517{
518 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
519 0, 0, 0, -1, -1, -1, msg, NULL, NULL);
520}
521
522static inline void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
523 unsigned int csrow,
524 unsigned int channel0,
525 unsigned int channel1,
526 char *msg)
527{
528 /*
529 *FIXME: The error can also be at channel1 (e. g. at the second
530 * channel of the same branch). The fix is to push
531 * edac_mc_handle_error() call into each driver
532 */
533 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
534 0, 0, 0,
535 csrow, channel0, -1, msg, NULL, NULL);
536}
537
538static inline void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
539 unsigned int csrow,
540 unsigned int channel, char *msg)
541{
542 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
543 0, 0, 0,
544 csrow, channel, -1, msg, NULL, NULL);
545}
488 546
489/* 547/*
490 * edac_device APIs 548 * edac_device APIs
@@ -496,6 +554,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
496extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, 554extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
497 int inst_nr, int block_nr, const char *msg); 555 int inst_nr, int block_nr, const char *msg);
498extern int edac_device_alloc_index(void); 556extern int edac_device_alloc_index(void);
557extern const char *edac_layer_name[];
499 558
500/* 559/*
501 * edac_pci APIs 560 * edac_pci APIs