From 80f5ab097b87c86581cb9736a8e55c5a3047d4bb Mon Sep 17 00:00:00 2001 From: Shaun Ruffell Date: Sun, 19 Aug 2012 01:11:24 -0300 Subject: edac: edac_mc no longer deals with kobjects directly There are no more embedded kobjects in struct mem_ctl_info. Remove a header and a comment that does not reflect the code anymore. Signed-off-by: Shaun Ruffell Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b8c02b36f76..4784213c819d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -14,7 +14,6 @@ #include #include -#include #include #include #include -- cgit v1.2.2 From 52608ba20546139dc76cca8a46c1d901455d5450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 8 Aug 2012 12:30:56 -0300 Subject: i5100_edac: probe for device 19 function 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probe and store the device handle for the device 19 function 0 during driver initialization. The device is used during fault injection. Signed-off-by: Niklas Söderlund Signed-off-by: Mauro Carvalho Chehab --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0eb65796bcb9..d0d1e801e350 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2776,6 +2776,7 @@ #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 -- cgit v1.2.2 From c66b5a79a9348ccd6d1cd81416027d0e12da965d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 15 Feb 2013 07:21:08 -0300 Subject: edac: add a new memory layer type There are some cases where the memory controller layout is completely hidden. This is the case of firmware-driven error code, like the one provided by GHES. Add a new layer to be used on such memory error report mechanisms. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 4784213c819d..1b7744c219b8 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -375,6 +375,9 @@ enum scrub_type { * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" + * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped + * as a single memory area. This is used when + * retrieving errors from a firmware driven driver. * * This enum is used by the drivers to tell edac_mc_sysfs what name should * be used when describing a memory stick location. @@ -384,6 +387,7 @@ enum edac_mc_layer_type { EDAC_MC_LAYER_CHANNEL, EDAC_MC_LAYER_SLOT, EDAC_MC_LAYER_CHIP_SELECT, + EDAC_MC_LAYER_ALL_MEM, }; /** -- cgit v1.2.2 From c2c93dbc97622e26dc19edc71e50ebaa996d7804 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 19 Feb 2013 06:50:05 -0300 Subject: edac: remove proc_name from mci structure proc_name isn't used anywhere. Remove it. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b7744c219b8..ff18efc754f3 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -48,7 +48,6 @@ static inline void opstate_init(void) } #define EDAC_MC_LABEL_LEN 31 -#define MC_PROC_NAME_MAX_LEN 7 /** * enum dev_type - describe the type of memory DRAM chips used at the stick @@ -633,7 +632,6 @@ struct mem_ctl_info { const char *mod_ver; const char *ctl_name; const char *dev_name; - char proc_name[MC_PROC_NAME_MAX_LEN + 1]; void *pvt_info; unsigned long start_time; /* mci load start time (in jiffies) */ -- cgit v1.2.2 From c7ef7645544131b0750478d1cf94cdfa945c809d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 21 Feb 2013 13:36:45 -0300 Subject: edac: reduce stack pressure by using a pre-allocated buffer The number of variables at the stack is too big. Reduces the stack usage by using a pre-allocated error buffer. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index ff18efc754f3..096b7fcdf484 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -47,8 +47,18 @@ static inline void opstate_init(void) return; } +/* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN 31 +/* Maximum size of the location string */ +#define LOCATION_SIZE 80 + +/* Defines the maximum number of labels that can be reported */ +#define EDAC_MAX_LABELS 8 + +/* String used to join two or more labels */ +#define OTHER_LABEL " or " + /** * enum dev_type - describe the type of memory DRAM chips used at the stick * @DEV_UNKNOWN: Can't be determined, or MC doesn't support detect it @@ -553,6 +563,46 @@ struct errcount_attribute_data { int layer0, layer1, layer2; }; +/** + * edac_raw_error_desc - Raw error report structure + * @grain: minimum granularity for an error report, in bytes + * @error_count: number of errors of the same type + * @top_layer: top layer of the error (layer[0]) + * @mid_layer: middle layer of the error (layer[1]) + * @low_layer: low layer of the error (layer[2]) + * @page_frame_number: page where the error happened + * @offset_in_page: page offset + * @syndrome: syndrome of the error (or 0 if unknown or if + * the syndrome is not applicable) + * @msg: error message + * @location: location of the error + * @label: label of the affected DIMM(s) + * @other_detail: other driver-specific detail about the error + * @enable_per_layer_report: if false, the error affects all layers + * (typically, a memory controller error) + */ +struct edac_raw_error_desc { + /* + * NOTE: everything before grain won't be cleaned by + * edac_raw_error_desc_clean() + */ + char location[LOCATION_SIZE]; + char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; + long grain; + + /* the vars below and grain will be cleaned on every new error report */ + u16 error_count; + int top_layer; + int mid_layer; + int low_layer; + unsigned long page_frame_number; + unsigned long offset_in_page; + unsigned long syndrome; + const char *msg; + const char *other_detail; + bool enable_per_layer_report; +}; + /* MEMORY controller information structure */ struct mem_ctl_info { @@ -660,6 +710,12 @@ struct mem_ctl_info { /* work struct for this MC */ struct delayed_work work; + /* + * Used to report an error - by being at the global struct + * makes the memory allocated by the EDAC core + */ + struct edac_raw_error_desc error_desc; + /* the internal state of this controller instance */ int op_state; -- cgit v1.2.2 From 8dd93d450bff251575c56b8f058393124e1f00fb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 19 Feb 2013 21:26:22 -0300 Subject: edac: add support for error type "Info" The CPER spec defines a forth type of error: informational logs. Add support for it at the edac API and at the trace event interface. Signed-off-by: Mauro Carvalho Chehab --- include/linux/edac.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 096b7fcdf484..4fd4999ccb5b 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -109,8 +109,24 @@ enum hw_event_mc_err_type { HW_EVENT_ERR_CORRECTED, HW_EVENT_ERR_UNCORRECTED, HW_EVENT_ERR_FATAL, + HW_EVENT_ERR_INFO, }; +static inline char *mc_event_error_type(const unsigned int err_type) +{ + switch (err_type) { + case HW_EVENT_ERR_CORRECTED: + return "Corrected"; + case HW_EVENT_ERR_UNCORRECTED: + return "Uncorrected"; + case HW_EVENT_ERR_FATAL: + return "Fatal"; + default: + case HW_EVENT_ERR_INFO: + return "Info"; + } +} + /** * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM -- cgit v1.2.2