aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/edac.h
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-04-18 14:20:50 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-05-28 18:10:59 -0400
commit4275be63559719c3149b19751029f1b0f1b26775 (patch)
treed215a184f4278d7bc9095f18eb4c748149e241f3 /include/linux/edac.h
parent982216a4290543fe73ae4f0a156f3d7906bd9b73 (diff)
edac: Change internal representation to work with layers
Change the EDAC internal representation to work with non-csrow based memory controllers. There are lots of those memory controllers nowadays, and more are coming. So, the EDAC internal representation needs to be changed, in order to work with those memory controllers, while preserving backward compatibility with the old ones. The edac core was written with the idea that memory controllers are able to directly access csrows. This is not true for FB-DIMM and RAMBUS memory controllers. Also, some recent advanced memory controllers don't present a per-csrows view. Instead, they view memories as DIMMs, instead of ranks. So, change the allocation and error report routines to allow them to work with all types of architectures. This will allow the removal of several hacks with FB-DIMM and RAMBUS memory controllers. Also, several tests were done on different platforms using different x86 drivers. TODO: a multi-rank DIMMs are currently represented by multiple DIMM entries in struct dimm_info. That means that changing a label for one rank won't change the same label for the other ranks at the same DIMM. This bug is present since the beginning of the EDAC, so it is not a big deal. However, on several drivers, it is possible to fix this issue, but it should be a per-driver fix, as the csrow => DIMM arrangement may not be equal for all. So, don't try to fix it here yet. I tried to make this patch as short as possible, preceding it with several other patches that simplified the logic here. Yet, as the internal API changes, all drivers need changes. The changes are generally bigger in the drivers for FB-DIMMs. Cc: Aristeu Rozanski <arozansk@redhat.com> Cc: Doug Thompson <norsk5@yahoo.com> Cc: Borislav Petkov <borislav.petkov@amd.com> Cc: Mark Gross <mark.gross@intel.com> Cc: Jason Uhlenkott <juhlenko@akamai.com> Cc: Tim Small <tim@buttersideup.com> Cc: Ranganathan Desikan <ravi@jetztechnologies.com> Cc: "Arvind R." <arvino55@gmail.com> Cc: Olof Johansson <olof@lixom.net> Cc: Egor Martovetsky <egor@pasemi.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Michal Marek <mmarek@suse.cz> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Joe Perches <joe@perches.com> Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Hitoshi Mitake <h.mitake@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com> Cc: Shaohui Xie <Shaohui.Xie@freescale.com> Cc: Josh Boyer <jwboyer@gmail.com> Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'include/linux/edac.h')
-rw-r--r--include/linux/edac.h38
1 files changed, 25 insertions, 13 deletions
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 9e628434e164..d68b01cad068 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -416,18 +416,20 @@ struct edac_mc_layer {
416/* FIXME: add the proper per-location error counts */ 416/* FIXME: add the proper per-location error counts */
417struct dimm_info { 417struct dimm_info {
418 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ 418 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
419 unsigned memory_controller; 419
420 unsigned csrow; 420 /* Memory location data */
421 unsigned csrow_channel; 421 unsigned location[EDAC_MAX_LAYERS];
422
423 struct mem_ctl_info *mci; /* the parent */
422 424
423 u32 grain; /* granularity of reported error in bytes */ 425 u32 grain; /* granularity of reported error in bytes */
424 enum dev_type dtype; /* memory device type */ 426 enum dev_type dtype; /* memory device type */
425 enum mem_type mtype; /* memory dimm type */ 427 enum mem_type mtype; /* memory dimm type */
426 enum edac_type edac_mode; /* EDAC mode for this dimm */ 428 enum edac_type edac_mode; /* EDAC mode for this dimm */
427 429
428 u32 nr_pages; /* number of pages in csrow */ 430 u32 nr_pages; /* number of pages on this dimm */
429 431
430 u32 ce_count; /* Correctable Errors for this dimm */ 432 unsigned csrow, cschannel; /* Points to the old API data */
431}; 433};
432 434
433/** 435/**
@@ -447,9 +449,10 @@ struct dimm_info {
447 */ 449 */
448struct rank_info { 450struct rank_info {
449 int chan_idx; 451 int chan_idx;
450 u32 ce_count;
451 struct csrow_info *csrow; 452 struct csrow_info *csrow;
452 struct dimm_info *dimm; 453 struct dimm_info *dimm;
454
455 u32 ce_count; /* Correctable Errors for this csrow */
453}; 456};
454 457
455struct csrow_info { 458struct csrow_info {
@@ -545,13 +548,18 @@ struct mem_ctl_info {
545 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci, 548 unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
546 unsigned long page); 549 unsigned long page);
547 int mc_idx; 550 int mc_idx;
548 int nr_csrows;
549 struct csrow_info *csrows; 551 struct csrow_info *csrows;
552 unsigned nr_csrows, num_cschannel;
553
554 /* Memory Controller hierarchy */
555 unsigned n_layers;
556 struct edac_mc_layer *layers;
557 bool mem_is_per_rank;
550 558
551 /* 559 /*
552 * DIMM info. Will eventually remove the entire csrows_info some day 560 * DIMM info. Will eventually remove the entire csrows_info some day
553 */ 561 */
554 unsigned nr_dimms; 562 unsigned tot_dimms;
555 struct dimm_info *dimms; 563 struct dimm_info *dimms;
556 564
557 /* 565 /*
@@ -566,12 +574,16 @@ struct mem_ctl_info {
566 const char *dev_name; 574 const char *dev_name;
567 char proc_name[MC_PROC_NAME_MAX_LEN + 1]; 575 char proc_name[MC_PROC_NAME_MAX_LEN + 1];
568 void *pvt_info; 576 void *pvt_info;
569 u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
570 u32 ce_noinfo_count; /* Correctable Errors w/o info */
571 u32 ue_count; /* Total Uncorrectable Errors for this MC */
572 u32 ce_count; /* Total Correctable Errors for this MC */
573 unsigned long start_time; /* mci load start time (in jiffies) */ 577 unsigned long start_time; /* mci load start time (in jiffies) */
574 578
579 /*
580 * drivers shouldn't access those fields directly, as the core
581 * already handles that.
582 */
583 u32 ce_noinfo_count, ue_noinfo_count;
584 u32 ue_count, ce_count;
585 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
586
575 struct completion complete; 587 struct completion complete;
576 588
577 /* edac sysfs device control */ 589 /* edac sysfs device control */
@@ -584,7 +596,7 @@ struct mem_ctl_info {
584 * by the low level driver. 596 * by the low level driver.
585 * 597 *
586 * Set by the low level driver to provide attributes at the 598 * Set by the low level driver to provide attributes at the
587 * controller level, same level as 'ue_count' and 'ce_count' above. 599 * controller level.
588 * An array of structures, NULL terminated 600 * An array of structures, NULL terminated
589 * 601 *
590 * If attributes are desired, then set to array of attributes 602 * If attributes are desired, then set to array of attributes