aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 22:52:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 22:52:06 -0400
commit45471cd98decae5fced8b38e46c223f54a924814 (patch)
tree6ce2f3dc9da8a538d26cdfaa0115f2308ebdde01
parent93a4b1b9465d92e8be031b57166afa3d5611e142 (diff)
parent043b43180efee8dcc41dde5ca710827b26d17510 (diff)
Merge tag 'edac_for_4.2_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov: - New APM X-Gene SoC EDAC driver (Loc Ho) - AMD error injection module improvements (Aravind Gopalakrishnan) - Altera Arria 10 support (Thor Thayer) - misc fixes and cleanups all over the place * tag 'edac_for_4.2_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (28 commits) EDAC: Update Documentation/edac.txt EDAC: Fix typos in Documentation/edac.txt EDAC, mce_amd_inj: Set MISCV on injection EDAC, mce_amd_inj: Move bit preparations before the injection EDAC, mce_amd_inj: Cleanup and simplify README EDAC, altera: Do not allow suspend when EDAC is enabled EDAC, mce_amd_inj: Make inj_type static arm: socfpga: dts: Add Arria10 SDRAM EDAC DTS support EDAC, altera: Add Arria10 EDAC support EDAC, altera: Refactor for Altera CycloneV SoC EDAC, altera: Generalize driver to use DT Memory size EDAC, mce_amd_inj: Add README file EDAC, mce_amd_inj: Add individual permissions field to dfs_node EDAC, mce_amd_inj: Modify flags attribute to use string arguments EDAC, mce_amd_inj: Read out number of MCE banks from the hardware EDAC, mce_amd_inj: Use MCE_INJECT_GET macro for bank node too EDAC, xgene: Fix cpuid abuse EDAC, mpc85xx: Extend error address to 64 bit EDAC, mpc8xxx: Adapt for FSL SoC EDAC, edac_stub: Drop arch-specific include ...
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt2
-rw-r--r--Documentation/devicetree/bindings/edac/apm-xgene-edac.txt79
-rw-r--r--Documentation/edac.txt289
-rw-r--r--MAINTAINERS17
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/dts/socfpga_arria10.dtsi11
-rw-r--r--arch/arm/include/asm/edac.h5
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi83
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/edac.h4
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/edac.h4
-rw-r--r--arch/tile/Kconfig1
-rw-r--r--arch/tile/include/asm/edac.h29
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/edac.h2
-rw-r--r--drivers/edac/Kconfig22
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/altera_edac.c381
-rw-r--r--drivers/edac/altera_edac.h201
-rw-r--r--drivers/edac/edac_mc.c9
-rw-r--r--drivers/edac/edac_stub.c1
-rw-r--r--drivers/edac/mce_amd_inj.c177
-rw-r--r--drivers/edac/mpc85xx_edac.c10
-rw-r--r--drivers/edac/mpc85xx_edac.h1
-rw-r--r--drivers/edac/xgene_edac.c1215
27 files changed, 2175 insertions, 377 deletions
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
index d0ce01da5c59..f5ad0ff69fae 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
@@ -2,7 +2,7 @@ Altera SOCFPGA SDRAM Error Detection & Correction [EDAC]
2The EDAC accesses a range of registers in the SDRAM controller. 2The EDAC accesses a range of registers in the SDRAM controller.
3 3
4Required properties: 4Required properties:
5- compatible : should contain "altr,sdram-edac"; 5- compatible : should contain "altr,sdram-edac" or "altr,sdram-edac-a10"
6- altr,sdr-syscon : phandle of the sdr module 6- altr,sdr-syscon : phandle of the sdr module
7- interrupts : Should contain the SDRAM ECC IRQ in the 7- interrupts : Should contain the SDRAM ECC IRQ in the
8 appropriate format for the IRQ controller. 8 appropriate format for the IRQ controller.
diff --git a/Documentation/devicetree/bindings/edac/apm-xgene-edac.txt b/Documentation/devicetree/bindings/edac/apm-xgene-edac.txt
new file mode 100644
index 000000000000..78edb80002c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/edac/apm-xgene-edac.txt
@@ -0,0 +1,79 @@
1* APM X-Gene SoC EDAC node
2
3EDAC node is defined to describe on-chip error detection and correction.
4The follow error types are supported:
5
6 memory controller - Memory controller
7 PMD (L1/L2) - Processor module unit (PMD) L1/L2 cache
8
9The following section describes the EDAC DT node binding.
10
11Required properties:
12- compatible : Shall be "apm,xgene-edac".
13- regmap-csw : Regmap of the CPU switch fabric (CSW) resource.
14- regmap-mcba : Regmap of the MCB-A (memory bridge) resource.
15- regmap-mcbb : Regmap of the MCB-B (memory bridge) resource.
16- regmap-efuse : Regmap of the PMD efuse resource.
17- reg : First resource shall be the CPU bus (PCP) resource.
18- interrupts : Interrupt-specifier for MCU, PMD, L3, or SoC error
19 IRQ(s).
20
21Required properties for memory controller subnode:
22- compatible : Shall be "apm,xgene-edac-mc".
23- reg : First resource shall be the memory controller unit
24 (MCU) resource.
25- memory-controller : Instance number of the memory controller.
26
27Required properties for PMD subnode:
28- compatible : Shall be "apm,xgene-edac-pmd" or
29 "apm,xgene-edac-pmd-v2".
30- reg : First resource shall be the PMD resource.
31- pmd-controller : Instance number of the PMD controller.
32
33Example:
34 csw: csw@7e200000 {
35 compatible = "apm,xgene-csw", "syscon";
36 reg = <0x0 0x7e200000 0x0 0x1000>;
37 };
38
39 mcba: mcba@7e700000 {
40 compatible = "apm,xgene-mcb", "syscon";
41 reg = <0x0 0x7e700000 0x0 0x1000>;
42 };
43
44 mcbb: mcbb@7e720000 {
45 compatible = "apm,xgene-mcb", "syscon";
46 reg = <0x0 0x7e720000 0x0 0x1000>;
47 };
48
49 efuse: efuse@1054a000 {
50 compatible = "apm,xgene-efuse", "syscon";
51 reg = <0x0 0x1054a000 0x0 0x20>;
52 };
53
54 edac@78800000 {
55 compatible = "apm,xgene-edac";
56 #address-cells = <2>;
57 #size-cells = <2>;
58 ranges;
59 regmap-csw = <&csw>;
60 regmap-mcba = <&mcba>;
61 regmap-mcbb = <&mcbb>;
62 regmap-efuse = <&efuse>;
63 reg = <0x0 0x78800000 0x0 0x100>;
64 interrupts = <0x0 0x20 0x4>,
65 <0x0 0x21 0x4>,
66 <0x0 0x27 0x4>;
67
68 edacmc@7e800000 {
69 compatible = "apm,xgene-edac-mc";
70 reg = <0x0 0x7e800000 0x0 0x1000>;
71 memory-controller = <0>;
72 };
73
74 edacpmd@7c000000 {
75 compatible = "apm,xgene-edac-pmd";
76 reg = <0x0 0x7c000000 0x0 0x200000>;
77 pmd-controller = <0>;
78 };
79 };
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 73fff13e848f..0cf27a3544a5 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -1,53 +1,34 @@
1
2
3EDAC - Error Detection And Correction 1EDAC - Error Detection And Correction
4 2=====================================
5Written by Doug Thompson <dougthompson@xmission.com>
67 Dec 2005
717 Jul 2007 Updated
8
9(c) Mauro Carvalho Chehab
1005 Aug 2009 Nehalem interface
11
12EDAC is maintained and written by:
13
14 Doug Thompson, Dave Jiang, Dave Peterson et al,
15 original author: Thayne Harbaugh,
16
17Contact:
18 website: bluesmoke.sourceforge.net
19 mailing list: bluesmoke-devel@lists.sourceforge.net
20 3
21"bluesmoke" was the name for this device driver when it was "out-of-tree" 4"bluesmoke" was the name for this device driver when it was "out-of-tree"
22and maintained at sourceforge.net. When it was pushed into 2.6.16 for the 5and maintained at sourceforge.net. When it was pushed into 2.6.16 for the
23first time, it was renamed to 'EDAC'. 6first time, it was renamed to 'EDAC'.
24 7
25The bluesmoke project at sourceforge.net is now utilized as a 'staging area' 8PURPOSE
26for EDAC development, before it is sent upstream to kernel.org 9-------
27
28At the bluesmoke/EDAC project site is a series of quilt patches against
29recent kernels, stored in a SVN repository. For easier downloading, there
30is also a tarball snapshot available.
31 10
32============================================================================ 11The 'edac' kernel module's goal is to detect and report hardware errors
33EDAC PURPOSE 12that occur within the computer system running under linux.
34
35The 'edac' kernel module goal is to detect and report errors that occur
36within the computer system running under linux.
37 13
38MEMORY 14MEMORY
15------
39 16
40In the initial release, memory Correctable Errors (CE) and Uncorrectable 17Memory Correctable Errors (CE) and Uncorrectable Errors (UE) are the
41Errors (UE) are the primary errors being harvested. These types of errors 18primary errors being harvested. These types of errors are harvested by
42are harvested by the 'edac_mc' class of device. 19the 'edac_mc' device.
43 20
44Detecting CE events, then harvesting those events and reporting them, 21Detecting CE events, then harvesting those events and reporting them,
45CAN be a predictor of future UE events. With CE events, the system can 22*can* but must not necessarily be a predictor of future UE events. With
46continue to operate, but with less safety. Preventive maintenance and 23CE events only, the system can and will continue to operate as no data
47proactive part replacement of memory DIMMs exhibiting CEs can reduce 24has been damaged yet.
48the likelihood of the dreaded UE events and system 'panics'. 25
26However, preventive maintenance and proactive part replacement of memory
27DIMMs exhibiting CEs can reduce the likelihood of the dreaded UE events
28and system panics.
49 29
50NON-MEMORY 30OTHER HARDWARE ELEMENTS
31-----------------------
51 32
52A new feature for EDAC, the edac_device class of device, was added in 33A new feature for EDAC, the edac_device class of device, was added in
53the 2.6.23 version of the kernel. 34the 2.6.23 version of the kernel.
@@ -56,70 +37,57 @@ This new device type allows for non-memory type of ECC hardware detectors
56to have their states harvested and presented to userspace via the sysfs 37to have their states harvested and presented to userspace via the sysfs
57interface. 38interface.
58 39
59Some architectures have ECC detectors for L1, L2 and L3 caches, along with DMA 40Some architectures have ECC detectors for L1, L2 and L3 caches,
60engines, fabric switches, main data path switches, interconnections, 41along with DMA engines, fabric switches, main data path switches,
61and various other hardware data paths. If the hardware reports it, then 42interconnections, and various other hardware data paths. If the hardware
62a edac_device device probably can be constructed to harvest and present 43reports it, then a edac_device device probably can be constructed to
63that to userspace. 44harvest and present that to userspace.
64 45
65 46
66PCI BUS SCANNING 47PCI BUS SCANNING
48----------------
67 49
68In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices 50In addition, PCI devices are scanned for PCI Bus Parity and SERR Errors
69in order to determine if errors are occurring on data transfers. 51in order to determine if errors are occurring during data transfers.
70 52
71The presence of PCI Parity errors must be examined with a grain of salt. 53The presence of PCI Parity errors must be examined with a grain of salt.
72There are several add-in adapters that do NOT follow the PCI specification 54There are several add-in adapters that do *not* follow the PCI specification
73with regards to Parity generation and reporting. The specification says 55with regards to Parity generation and reporting. The specification says
74the vendor should tie the parity status bits to 0 if they do not intend 56the vendor should tie the parity status bits to 0 if they do not intend
75to generate parity. Some vendors do not do this, and thus the parity bit 57to generate parity. Some vendors do not do this, and thus the parity bit
76can "float" giving false positives. 58can "float" giving false positives.
77 59
78In the kernel there is a PCI device attribute located in sysfs that is 60There is a PCI device attribute located in sysfs that is checked by
79checked by the EDAC PCI scanning code. If that attribute is set, 61the EDAC PCI scanning code. If that attribute is set, PCI parity/error
80PCI parity/error scanning is skipped for that device. The attribute 62scanning is skipped for that device. The attribute is:
81is:
82 63
83 broken_parity_status 64 broken_parity_status
84 65
85as is located in /sys/devices/pci<XXX>/0000:XX:YY.Z directories for 66and is located in /sys/devices/pci<XXX>/0000:XX:YY.Z directories for
86PCI devices. 67PCI devices.
87 68
88FUTURE HARDWARE SCANNING
89 69
90EDAC will have future error detectors that will be integrated with 70VERSIONING
91EDAC or added to it, in the following list: 71----------
92
93 MCE Machine Check Exception
94 MCA Machine Check Architecture
95 NMI NMI notification of ECC errors
96 MSRs Machine Specific Register error cases
97 and other mechanisms.
98
99These errors are usually bus errors, ECC errors, thermal throttling
100and the like.
101
102
103============================================================================
104EDAC VERSIONING
105 72
106EDAC is composed of a "core" module (edac_core.ko) and several Memory 73EDAC is composed of a "core" module (edac_core.ko) and several Memory
107Controller (MC) driver modules. On a given system, the CORE 74Controller (MC) driver modules. On a given system, the CORE is loaded
108is loaded and one MC driver will be loaded. Both the CORE and 75and one MC driver will be loaded. Both the CORE and the MC driver (or
109the MC driver (or edac_device driver) have individual versions that reflect 76edac_device driver) have individual versions that reflect current
110current release level of their respective modules. 77release level of their respective modules.
111 78
112Thus, to "report" on what version a system is running, one must report both 79Thus, to "report" on what version a system is running, one must report
113the CORE's and the MC driver's versions. 80both the CORE's and the MC driver's versions.
114 81
115 82
116LOADING 83LOADING
84-------
117 85
118If 'edac' was statically linked with the kernel then no loading is 86If 'edac' was statically linked with the kernel then no loading
119necessary. If 'edac' was built as modules then simply modprobe the 87is necessary. If 'edac' was built as modules then simply modprobe
120'edac' pieces that you need. You should be able to modprobe 88the 'edac' pieces that you need. You should be able to modprobe
121hardware-specific modules and have the dependencies load the necessary core 89hardware-specific modules and have the dependencies load the necessary
122modules. 90core modules.
123 91
124Example: 92Example:
125 93
@@ -129,35 +97,33 @@ loads both the amd76x_edac.ko memory controller module and the edac_mc.ko
129core module. 97core module.
130 98
131 99
132============================================================================ 100SYSFS INTERFACE
133EDAC sysfs INTERFACE 101---------------
134
135EDAC presents a 'sysfs' interface for control, reporting and attribute
136reporting purposes.
137 102
138EDAC lives in the /sys/devices/system/edac directory. 103EDAC presents a 'sysfs' interface for control and reporting purposes. It
104lives in the /sys/devices/system/edac directory.
139 105
140Within this directory there currently reside 2 'edac' components: 106Within this directory there currently reside 2 components:
141 107
142 mc memory controller(s) system 108 mc memory controller(s) system
143 pci PCI control and status system 109 pci PCI control and status system
144 110
145 111
146============================================================================ 112
147Memory Controller (mc) Model 113Memory Controller (mc) Model
114----------------------------
148 115
149First a background on the memory controller's model abstracted in EDAC. 116Each 'mc' device controls a set of DIMM memory modules. These modules
150Each 'mc' device controls a set of DIMM memory modules. These modules are 117are laid out in a Chip-Select Row (csrowX) and Channel table (chX).
151laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can 118There can be multiple csrows and multiple channels.
152be multiple csrows and multiple channels.
153 119
154Memory controllers allow for several csrows, with 8 csrows being a typical value. 120Memory controllers allow for several csrows, with 8 csrows being a
155Yet, the actual number of csrows depends on the electrical "loading" 121typical value. Yet, the actual number of csrows depends on the layout of
156of a given motherboard, memory controller and DIMM characteristics. 122a given motherboard, memory controller and DIMM characteristics.
157 123
158Dual channels allows for 128 bit data transfers to the CPU from memory. 124Dual channels allows for 128 bit data transfers to/from the CPU from/to
159Some newer chipsets allow for more than 2 channels, like Fully Buffered DIMMs 125memory. Some newer chipsets allow for more than 2 channels, like Fully
160(FB-DIMMs). The following example will assume 2 channels: 126Buffered DIMMs (FB-DIMMs). The following example will assume 2 channels:
161 127
162 128
163 Channel 0 Channel 1 129 Channel 0 Channel 1
@@ -179,12 +145,12 @@ for memory DIMMs:
179 DIMM_A1 145 DIMM_A1
180 DIMM_B1 146 DIMM_B1
181 147
182Labels for these slots are usually silk screened on the motherboard. Slots 148Labels for these slots are usually silk-screened on the motherboard.
183labeled 'A' are channel 0 in this example. Slots labeled 'B' 149Slots labeled 'A' are channel 0 in this example. Slots labeled 'B' are
184are channel 1. Notice that there are two csrows possible on a 150channel 1. Notice that there are two csrows possible on a physical DIMM.
185physical DIMM. These csrows are allocated their csrow assignment 151These csrows are allocated their csrow assignment based on the slot into
186based on the slot into which the memory DIMM is placed. Thus, when 1 DIMM 152which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
187is placed in each Channel, the csrows cross both DIMMs. 153Channel, the csrows cross both DIMMs.
188 154
189Memory DIMMs come single or dual "ranked". A rank is a populated csrow. 155Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
190Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above 156Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
@@ -193,8 +159,8 @@ when 2 dual ranked DIMMs are similarly placed, then both csrow0 and
193csrow1 will be populated. The pattern repeats itself for csrow2 and 159csrow1 will be populated. The pattern repeats itself for csrow2 and
194csrow3. 160csrow3.
195 161
196The representation of the above is reflected in the directory tree 162The representation of the above is reflected in the directory
197in EDAC's sysfs interface. Starting in directory 163tree in EDAC's sysfs interface. Starting in directory
198/sys/devices/system/edac/mc each memory controller will be represented 164/sys/devices/system/edac/mc each memory controller will be represented
199by its own 'mcX' directory, where 'X' is the index of the MC. 165by its own 'mcX' directory, where 'X' is the index of the MC.
200 166
@@ -217,34 +183,35 @@ Under each 'mcX' directory each 'csrowX' is again represented by a
217 |->csrow3 183 |->csrow3
218 .... 184 ....
219 185
220Notice that there is no csrow1, which indicates that csrow0 is 186Notice that there is no csrow1, which indicates that csrow0 is composed
221composed of a single ranked DIMMs. This should also apply in both 187of a single ranked DIMMs. This should also apply in both Channels, in
222Channels, in order to have dual-channel mode be operational. Since 188order to have dual-channel mode be operational. Since both csrow2 and
223both csrow2 and csrow3 are populated, this indicates a dual ranked 189csrow3 are populated, this indicates a dual ranked set of DIMMs for
224set of DIMMs for channels 0 and 1. 190channels 0 and 1.
225 191
226 192
227Within each of the 'mcX' and 'csrowX' directories are several 193Within each of the 'mcX' and 'csrowX' directories are several EDAC
228EDAC control and attribute files. 194control and attribute files.
229 195
230============================================================================
231'mcX' DIRECTORIES
232 196
197'mcX' directories
198-----------------
233 199
234In 'mcX' directories are EDAC control and attribute files for 200In 'mcX' directories are EDAC control and attribute files for
235this 'X' instance of the memory controllers. 201this 'X' instance of the memory controllers.
236 202
237For a description of the sysfs API, please see: 203For a description of the sysfs API, please see:
238 Documentation/ABI/testing/sysfs/devices-edac 204 Documentation/ABI/testing/sysfs-devices-edac
205
239 206
240 207
241============================================================================ 208'csrowX' directories
242'csrowX' DIRECTORIES 209--------------------
243 210
244When CONFIG_EDAC_LEGACY_SYSFS is enabled, the sysfs will contain the 211When CONFIG_EDAC_LEGACY_SYSFS is enabled, sysfs will contain the csrowX
245csrowX directories. As this API doesn't work properly for Rambus, FB-DIMMs 212directories. As this API doesn't work properly for Rambus, FB-DIMMs and
246and modern Intel Memory Controllers, this is being deprecated in favor 213modern Intel Memory Controllers, this is being deprecated in favor of
247of dimmX directories. 214dimmX directories.
248 215
249In the 'csrowX' directories are EDAC control and attribute files for 216In the 'csrowX' directories are EDAC control and attribute files for
250this 'X' instance of csrow: 217this 'X' instance of csrow:
@@ -265,18 +232,18 @@ Total Correctable Errors count attribute file:
265 'ce_count' 232 'ce_count'
266 233
267 This attribute file displays the total count of correctable 234 This attribute file displays the total count of correctable
268 errors that have occurred on this csrow. This 235 errors that have occurred on this csrow. This count is very
269 count is very important to examine. CEs provide early 236 important to examine. CEs provide early indications that a
270 indications that a DIMM is beginning to fail. This count 237 DIMM is beginning to fail. This count field should be
271 field should be monitored for non-zero values and report 238 monitored for non-zero values and report such information
272 such information to the system administrator. 239 to the system administrator.
273 240
274 241
275Total memory managed by this csrow attribute file: 242Total memory managed by this csrow attribute file:
276 243
277 'size_mb' 244 'size_mb'
278 245
279 This attribute file displays, in count of megabytes, of memory 246 This attribute file displays, in count of megabytes, the memory
280 that this csrow contains. 247 that this csrow contains.
281 248
282 249
@@ -377,11 +344,13 @@ Channel 1 DIMM Label control file:
377 motherboard specific and determination of this information 344 motherboard specific and determination of this information
378 must occur in userland at this time. 345 must occur in userland at this time.
379 346
380============================================================================ 347
348
381SYSTEM LOGGING 349SYSTEM LOGGING
350--------------
382 351
383If logging for UEs and CEs are enabled then system logs will have 352If logging for UEs and CEs is enabled, then system logs will contain
384error notices indicating errors that have been detected: 353information indicating that errors have been detected:
385 354
386EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, 355EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0,
387channel 1 "DIMM_B1": amd76x_edac 356channel 1 "DIMM_B1": amd76x_edac
@@ -404,24 +373,23 @@ The structure of the message is:
404 and then an optional, driver-specific message that may 373 and then an optional, driver-specific message that may
405 have additional information. 374 have additional information.
406 375
407Both UEs and CEs with no info will lack all but memory controller, 376Both UEs and CEs with no info will lack all but memory controller, error
408error type, a notice of "no info" and then an optional, 377type, a notice of "no info" and then an optional, driver-specific error
409driver-specific error message. 378message.
410 379
411 380
412============================================================================
413PCI Bus Parity Detection 381PCI Bus Parity Detection
382------------------------
414 383
415 384On Header Type 00 devices, the primary status is looked at for any
416On Header Type 00 devices the primary status is looked at 385parity error regardless of whether parity is enabled on the device or
417for any parity error regardless of whether Parity is enabled on the 386not. (The spec indicates parity is generated in some cases). On Header
418device. (The spec indicates parity is generated in some cases). 387Type 01 bridges, the secondary status register is also looked at to see
419On Header Type 01 bridges, the secondary status register is also 388if parity occurred on the bus on the other side of the bridge.
420looked at to see if parity occurred on the bus on the other side of
421the bridge.
422 389
423 390
424SYSFS CONFIGURATION 391SYSFS CONFIGURATION
392-------------------
425 393
426Under /sys/devices/system/edac/pci are control and attribute files as follows: 394Under /sys/devices/system/edac/pci are control and attribute files as follows:
427 395
@@ -450,8 +418,9 @@ Parity Count:
450 have been detected. 418 have been detected.
451 419
452 420
453============================================================================ 421
454MODULE PARAMETERS 422MODULE PARAMETERS
423-----------------
455 424
456Panic on UE control file: 425Panic on UE control file:
457 426
@@ -516,7 +485,7 @@ Panic on PCI PARITY Error:
516 'panic_on_pci_parity' 485 'panic_on_pci_parity'
517 486
518 487
519 This control files enables or disables panicking when a parity 488 This control file enables or disables panicking when a parity
520 error has been detected. 489 error has been detected.
521 490
522 491
@@ -530,10 +499,8 @@ Panic on PCI PARITY Error:
530 499
531 500
532 501
533======================================================================= 502EDAC device type
534 503----------------
535
536EDAC_DEVICE type of device
537 504
538In the header file, edac_core.h, there is a series of edac_device structures 505In the header file, edac_core.h, there is a series of edac_device structures
539and APIs for the EDAC_DEVICE. 506and APIs for the EDAC_DEVICE.
@@ -573,6 +540,7 @@ The test_device_edac device adds at least one of its own custom control:
573The symlink points to the 'struct dev' that is registered for this edac_device. 540The symlink points to the 'struct dev' that is registered for this edac_device.
574 541
575INSTANCES 542INSTANCES
543---------
576 544
577One or more instance directories are present. For the 'test_device_edac' case: 545One or more instance directories are present. For the 'test_device_edac' case:
578 546
@@ -586,6 +554,7 @@ counter in deeper subdirectories.
586 ue_count total of UE events of subdirectories 554 ue_count total of UE events of subdirectories
587 555
588BLOCKS 556BLOCKS
557------
589 558
590At the lowest directory level is the 'block' directory. There can be 0, 1 559At the lowest directory level is the 'block' directory. There can be 0, 1
591or more blocks specified in each instance. 560or more blocks specified in each instance.
@@ -617,14 +586,15 @@ The 'test_device_edac' device adds 4 attributes and 1 control:
617 reset all the above counters. 586 reset all the above counters.
618 587
619 588
620Use of the 'test_device_edac' driver should any others to create their own 589Use of the 'test_device_edac' driver should enable any others to create their own
621unique drivers for their hardware systems. 590unique drivers for their hardware systems.
622 591
623The 'test_device_edac' sample driver is located at the 592The 'test_device_edac' sample driver is located at the
624bluesmoke.sourceforge.net project site for EDAC. 593bluesmoke.sourceforge.net project site for EDAC.
625 594
626======================================================================= 595
627NEHALEM USAGE OF EDAC APIs 596NEHALEM USAGE OF EDAC APIs
597--------------------------
628 598
629This chapter documents some EXPERIMENTAL mappings for EDAC API to handle 599This chapter documents some EXPERIMENTAL mappings for EDAC API to handle
630Nehalem EDAC driver. They will likely be changed on future versions 600Nehalem EDAC driver. They will likely be changed on future versions
@@ -633,7 +603,7 @@ of the driver.
633Due to the way Nehalem exports Memory Controller data, some adjustments 603Due to the way Nehalem exports Memory Controller data, some adjustments
634were done at i7core_edac driver. This chapter will cover those differences 604were done at i7core_edac driver. This chapter will cover those differences
635 605
6361) On Nehalem, there are one Memory Controller per Quick Patch Interconnect 6061) On Nehalem, there is one Memory Controller per Quick Patch Interconnect
637 (QPI). At the driver, the term "socket" means one QPI. This is 607 (QPI). At the driver, the term "socket" means one QPI. This is
638 associated with a physical CPU socket. 608 associated with a physical CPU socket.
639 609
@@ -642,7 +612,7 @@ were done at i7core_edac driver. This chapter will cover those differences
642 Each channel can have up to 3 DIMMs. 612 Each channel can have up to 3 DIMMs.
643 613
644 The minimum known unity is DIMMs. There are no information about csrows. 614 The minimum known unity is DIMMs. There are no information about csrows.
645 As EDAC API maps the minimum unity is csrows, the driver sequencially 615 As EDAC API maps the minimum unity is csrows, the driver sequentially
646 maps channel/dimm into different csrows. 616 maps channel/dimm into different csrows.
647 617
648 For example, supposing the following layout: 618 For example, supposing the following layout:
@@ -664,7 +634,7 @@ exports one
664 634
665 Each QPI is exported as a different memory controller. 635 Each QPI is exported as a different memory controller.
666 636
6672) Nehalem MC has the hability to generate errors. The driver implements this 6372) Nehalem MC has the ability to generate errors. The driver implements this
668 functionality via some error injection nodes: 638 functionality via some error injection nodes:
669 639
670 For injecting a memory error, there are some sysfs nodes, under 640 For injecting a memory error, there are some sysfs nodes, under
@@ -771,5 +741,22 @@ exports one
771 741
772 The standard error counters are generated when an mcelog error is received 742 The standard error counters are generated when an mcelog error is received
773 by the driver. Since, with udimm, this is counted by software, it is 743 by the driver. Since, with udimm, this is counted by software, it is
774 possible that some errors could be lost. With rdimm's, they displays the 744 possible that some errors could be lost. With rdimm's, they display the
775 contents of the registers 745 contents of the registers
746
747CREDITS:
748========
749
750Written by Doug Thompson <dougthompson@xmission.com>
7517 Dec 2005
75217 Jul 2007 Updated
753
754(c) Mauro Carvalho Chehab
75505 Aug 2009 Nehalem interface
756
757EDAC authors/maintainers:
758
759 Doug Thompson, Dave Jiang, Dave Peterson et al,
760 Mauro Carvalho Chehab
761 Borislav Petkov
762 original author: Thayne Harbaugh
diff --git a/MAINTAINERS b/MAINTAINERS
index 7ee651eb0b79..d7c53502aa86 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3777,7 +3777,7 @@ S: Maintained
3777F: drivers/edac/ie31200_edac.c 3777F: drivers/edac/ie31200_edac.c
3778 3778
3779EDAC-MPC85XX 3779EDAC-MPC85XX
3780M: Johannes Thumshirn <johannes.thumshirn@men.de> 3780M: Johannes Thumshirn <morbidrsa@gmail.com>
3781L: linux-edac@vger.kernel.org 3781L: linux-edac@vger.kernel.org
3782W: bluesmoke.sourceforge.net 3782W: bluesmoke.sourceforge.net
3783S: Maintained 3783S: Maintained
@@ -3804,6 +3804,13 @@ W: bluesmoke.sourceforge.net
3804S: Maintained 3804S: Maintained
3805F: drivers/edac/sb_edac.c 3805F: drivers/edac/sb_edac.c
3806 3806
3807EDAC-XGENE
3808APPLIED MICRO (APM) X-GENE SOC EDAC
3809M: Loc Ho <lho@apm.com>
3810S: Supported
3811F: drivers/edac/xgene_edac.c
3812F: Documentation/devicetree/bindings/edac/apm-xgene-edac.txt
3813
3807EDIROL UA-101/UA-1000 DRIVER 3814EDIROL UA-101/UA-1000 DRIVER
3808M: Clemens Ladisch <clemens@ladisch.de> 3815M: Clemens Ladisch <clemens@ladisch.de>
3809L: alsa-devel@alsa-project.org (moderated for non-subscribers) 3816L: alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -6488,14 +6495,14 @@ F: include/linux/mtd/
6488F: include/uapi/mtd/ 6495F: include/uapi/mtd/
6489 6496
6490MEN A21 WATCHDOG DRIVER 6497MEN A21 WATCHDOG DRIVER
6491M: Johannes Thumshirn <johannes.thumshirn@men.de> 6498M: Johannes Thumshirn <morbidrsa@gmail.com>
6492L: linux-watchdog@vger.kernel.org 6499L: linux-watchdog@vger.kernel.org
6493S: Supported 6500S: Maintained
6494F: drivers/watchdog/mena21_wdt.c 6501F: drivers/watchdog/mena21_wdt.c
6495 6502
6496MEN CHAMELEON BUS (mcb) 6503MEN CHAMELEON BUS (mcb)
6497M: Johannes Thumshirn <johannes.thumshirn@men.de> 6504M: Johannes Thumshirn <morbidrsa@gmail.com>
6498S: Supported 6505S: Maintained
6499F: drivers/mcb/ 6506F: drivers/mcb/
6500F: include/linux/mcb.h 6507F: include/linux/mcb.h
6501 6508
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 45df48ba0b12..325d6f3a596a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -15,6 +15,8 @@ config ARM
15 select CLONE_BACKWARDS 15 select CLONE_BACKWARDS
16 select CPU_PM if (SUSPEND || CPU_IDLE) 16 select CPU_PM if (SUSPEND || CPU_IDLE)
17 select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS 17 select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
18 select EDAC_SUPPORT
19 select EDAC_ATOMIC_SCRUB
18 select GENERIC_ALLOCATOR 20 select GENERIC_ALLOCATOR
19 select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) 21 select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
20 select GENERIC_CLOCKEVENTS_BROADCAST if SMP 22 select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi
index 8a05c47fd57f..4be75960a603 100644
--- a/arch/arm/boot/dts/socfpga_arria10.dtsi
+++ b/arch/arm/boot/dts/socfpga_arria10.dtsi
@@ -253,6 +253,17 @@
253 status = "disabled"; 253 status = "disabled";
254 }; 254 };
255 255
256 sdr: sdr@ffc25000 {
257 compatible = "syscon";
258 reg = <0xffcfb100 0x80>;
259 };
260
261 sdramedac {
262 compatible = "altr,sdram-edac-a10";
263 altr,sdr-syscon = <&sdr>;
264 interrupts = <0 2 4>, <0 0 4>;
265 };
266
256 L2: l2-cache@fffff000 { 267 L2: l2-cache@fffff000 {
257 compatible = "arm,pl310-cache"; 268 compatible = "arm,pl310-cache";
258 reg = <0xfffff000 0x1000>; 269 reg = <0xfffff000 0x1000>;
diff --git a/arch/arm/include/asm/edac.h b/arch/arm/include/asm/edac.h
index 0df7a2c1fc3d..5189fa819b60 100644
--- a/arch/arm/include/asm/edac.h
+++ b/arch/arm/include/asm/edac.h
@@ -18,11 +18,12 @@
18#define ASM_EDAC_H 18#define ASM_EDAC_H
19/* 19/*
20 * ECC atomic, DMA, SMP and interrupt safe scrub function. 20 * ECC atomic, DMA, SMP and interrupt safe scrub function.
21 * Implements the per arch atomic_scrub() that EDAC use for software 21 * Implements the per arch edac_atomic_scrub() that EDAC use for software
22 * ECC scrubbing. It reads memory and then writes back the original 22 * ECC scrubbing. It reads memory and then writes back the original
23 * value, allowing the hardware to detect and correct memory errors. 23 * value, allowing the hardware to detect and correct memory errors.
24 */ 24 */
25static inline void atomic_scrub(void *va, u32 size) 25
26static inline void edac_atomic_scrub(void *va, u32 size)
26{ 27{
27#if __LINUX_ARM_ARCH__ >= 6 28#if __LINUX_ARM_ARCH__ >= 6
28 unsigned int *virt_addr = va; 29 unsigned int *virt_addr = va;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 802400f2a69e..290ed648aa11 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,6 +23,7 @@ config ARM64
23 select BUILDTIME_EXTABLE_SORT 23 select BUILDTIME_EXTABLE_SORT
24 select CLONE_BACKWARDS 24 select CLONE_BACKWARDS
25 select COMMON_CLK 25 select COMMON_CLK
26 select EDAC_SUPPORT
26 select CPU_PM if (SUSPEND || CPU_IDLE) 27 select CPU_PM if (SUSPEND || CPU_IDLE)
27 select DCACHE_WORD_ACCESS 28 select DCACHE_WORD_ACCESS
28 select GENERIC_ALLOCATOR 29 select GENERIC_ALLOCATOR
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index d8f3a1c65ecd..0bb287ca0a98 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -396,6 +396,89 @@
396 0x0 0x1f 0x4>; 396 0x0 0x1f 0x4>;
397 }; 397 };
398 398
399 csw: csw@7e200000 {
400 compatible = "apm,xgene-csw", "syscon";
401 reg = <0x0 0x7e200000 0x0 0x1000>;
402 };
403
404 mcba: mcba@7e700000 {
405 compatible = "apm,xgene-mcb", "syscon";
406 reg = <0x0 0x7e700000 0x0 0x1000>;
407 };
408
409 mcbb: mcbb@7e720000 {
410 compatible = "apm,xgene-mcb", "syscon";
411 reg = <0x0 0x7e720000 0x0 0x1000>;
412 };
413
414 efuse: efuse@1054a000 {
415 compatible = "apm,xgene-efuse", "syscon";
416 reg = <0x0 0x1054a000 0x0 0x20>;
417 };
418
419 edac@78800000 {
420 compatible = "apm,xgene-edac";
421 #address-cells = <2>;
422 #size-cells = <2>;
423 ranges;
424 regmap-csw = <&csw>;
425 regmap-mcba = <&mcba>;
426 regmap-mcbb = <&mcbb>;
427 regmap-efuse = <&efuse>;
428 reg = <0x0 0x78800000 0x0 0x100>;
429 interrupts = <0x0 0x20 0x4>,
430 <0x0 0x21 0x4>,
431 <0x0 0x27 0x4>;
432
433 edacmc@7e800000 {
434 compatible = "apm,xgene-edac-mc";
435 reg = <0x0 0x7e800000 0x0 0x1000>;
436 memory-controller = <0>;
437 };
438
439 edacmc@7e840000 {
440 compatible = "apm,xgene-edac-mc";
441 reg = <0x0 0x7e840000 0x0 0x1000>;
442 memory-controller = <1>;
443 };
444
445 edacmc@7e880000 {
446 compatible = "apm,xgene-edac-mc";
447 reg = <0x0 0x7e880000 0x0 0x1000>;
448 memory-controller = <2>;
449 };
450
451 edacmc@7e8c0000 {
452 compatible = "apm,xgene-edac-mc";
453 reg = <0x0 0x7e8c0000 0x0 0x1000>;
454 memory-controller = <3>;
455 };
456
457 edacpmd@7c000000 {
458 compatible = "apm,xgene-edac-pmd";
459 reg = <0x0 0x7c000000 0x0 0x200000>;
460 pmd-controller = <0>;
461 };
462
463 edacpmd@7c200000 {
464 compatible = "apm,xgene-edac-pmd";
465 reg = <0x0 0x7c200000 0x0 0x200000>;
466 pmd-controller = <1>;
467 };
468
469 edacpmd@7c400000 {
470 compatible = "apm,xgene-edac-pmd";
471 reg = <0x0 0x7c400000 0x0 0x200000>;
472 pmd-controller = <2>;
473 };
474
475 edacpmd@7c600000 {
476 compatible = "apm,xgene-edac-pmd";
477 reg = <0x0 0x7c600000 0x0 0x200000>;
478 pmd-controller = <3>;
479 };
480 };
481
399 pcie0: pcie@1f2b0000 { 482 pcie0: pcie@1f2b0000 {
400 status = "disabled"; 483 status = "disabled";
401 device_type = "pci"; 484 device_type = "pci";
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5016656494f..b65edf514b40 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -819,6 +819,7 @@ config CAVIUM_OCTEON_SOC
819 select SYS_SUPPORTS_64BIT_KERNEL 819 select SYS_SUPPORTS_64BIT_KERNEL
820 select SYS_SUPPORTS_BIG_ENDIAN 820 select SYS_SUPPORTS_BIG_ENDIAN
821 select EDAC_SUPPORT 821 select EDAC_SUPPORT
822 select EDAC_ATOMIC_SCRUB
822 select SYS_SUPPORTS_LITTLE_ENDIAN 823 select SYS_SUPPORTS_LITTLE_ENDIAN
823 select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN 824 select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN
824 select SYS_HAS_EARLY_PRINTK 825 select SYS_HAS_EARLY_PRINTK
diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h
index 94105d3f58f4..980b16527374 100644
--- a/arch/mips/include/asm/edac.h
+++ b/arch/mips/include/asm/edac.h
@@ -5,7 +5,7 @@
5 5
6/* ECC atomic, DMA, SMP and interrupt safe scrub function */ 6/* ECC atomic, DMA, SMP and interrupt safe scrub function */
7 7
8static inline void atomic_scrub(void *va, u32 size) 8static inline void edac_atomic_scrub(void *va, u32 size)
9{ 9{
10 unsigned long *virt_addr = va; 10 unsigned long *virt_addr = va;
11 unsigned long temp; 11 unsigned long temp;
@@ -21,7 +21,7 @@ static inline void atomic_scrub(void *va, u32 size)
21 21
22 __asm__ __volatile__ ( 22 __asm__ __volatile__ (
23 " .set mips2 \n" 23 " .set mips2 \n"
24 "1: ll %0, %1 # atomic_scrub \n" 24 "1: ll %0, %1 # edac_atomic_scrub \n"
25 " addu %0, $0 \n" 25 " addu %0, $0 \n"
26 " sc %0, %1 \n" 26 " sc %0, %1 \n"
27 " beqz %0, 1b \n" 27 " beqz %0, 1b \n"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 190cc48abc0c..5ef27113b898 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -153,6 +153,8 @@ config PPC
153 select NO_BOOTMEM 153 select NO_BOOTMEM
154 select HAVE_GENERIC_RCU_GUP 154 select HAVE_GENERIC_RCU_GUP
155 select HAVE_PERF_EVENTS_NMI if PPC64 155 select HAVE_PERF_EVENTS_NMI if PPC64
156 select EDAC_SUPPORT
157 select EDAC_ATOMIC_SCRUB
156 158
157config GENERIC_CSUM 159config GENERIC_CSUM
158 def_bool CPU_LITTLE_ENDIAN 160 def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/edac.h b/arch/powerpc/include/asm/edac.h
index 6ead88bbfbb8..5571e23d253e 100644
--- a/arch/powerpc/include/asm/edac.h
+++ b/arch/powerpc/include/asm/edac.h
@@ -12,11 +12,11 @@
12#define ASM_EDAC_H 12#define ASM_EDAC_H
13/* 13/*
14 * ECC atomic, DMA, SMP and interrupt safe scrub function. 14 * ECC atomic, DMA, SMP and interrupt safe scrub function.
15 * Implements the per arch atomic_scrub() that EDAC use for software 15 * Implements the per arch edac_atomic_scrub() that EDAC use for software
16 * ECC scrubbing. It reads memory and then writes back the original 16 * ECC scrubbing. It reads memory and then writes back the original
17 * value, allowing the hardware to detect and correct memory errors. 17 * value, allowing the hardware to detect and correct memory errors.
18 */ 18 */
19static __inline__ void atomic_scrub(void *va, u32 size) 19static __inline__ void edac_atomic_scrub(void *va, u32 size)
20{ 20{
21 unsigned int *virt_addr = va; 21 unsigned int *virt_addr = va;
22 unsigned int temp; 22 unsigned int temp;
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index a07e31b50d3f..59cf0b911898 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -28,6 +28,7 @@ config TILE
28 select HAVE_DEBUG_STACKOVERFLOW 28 select HAVE_DEBUG_STACKOVERFLOW
29 select ARCH_WANT_FRAME_POINTERS 29 select ARCH_WANT_FRAME_POINTERS
30 select HAVE_CONTEXT_TRACKING 30 select HAVE_CONTEXT_TRACKING
31 select EDAC_SUPPORT
31 32
32# FIXME: investigate whether we need/want these options. 33# FIXME: investigate whether we need/want these options.
33# select HAVE_IOREMAP_PROT 34# select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h
deleted file mode 100644
index 87fc83eeaffd..000000000000
--- a/arch/tile/include/asm/edac.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#ifndef _ASM_TILE_EDAC_H
16#define _ASM_TILE_EDAC_H
17
18/* ECC atomic, DMA, SMP and interrupt safe scrub function */
19
20static inline void atomic_scrub(void *va, u32 size)
21{
22 /*
23 * These is nothing to be done here because CE is
24 * corrected by the mshim.
25 */
26 return;
27}
28
29#endif /* _ASM_TILE_EDAC_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7e39f9b22705..8e0b76ad8350 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,8 @@ config X86
50 select CLONE_BACKWARDS if X86_32 50 select CLONE_BACKWARDS if X86_32
51 select COMPAT_OLD_SIGACTION if IA32_EMULATION 51 select COMPAT_OLD_SIGACTION if IA32_EMULATION
52 select DCACHE_WORD_ACCESS 52 select DCACHE_WORD_ACCESS
53 select EDAC_ATOMIC_SCRUB
54 select EDAC_SUPPORT
53 select GENERIC_CLOCKEVENTS 55 select GENERIC_CLOCKEVENTS
54 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) 56 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
55 select GENERIC_CLOCKEVENTS_MIN_ADJUST 57 select GENERIC_CLOCKEVENTS_MIN_ADJUST
diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h
index e9b57ecc70c5..cf8fdf83b231 100644
--- a/arch/x86/include/asm/edac.h
+++ b/arch/x86/include/asm/edac.h
@@ -3,7 +3,7 @@
3 3
4/* ECC atomic, DMA, SMP and interrupt safe scrub function */ 4/* ECC atomic, DMA, SMP and interrupt safe scrub function */
5 5
6static inline void atomic_scrub(void *va, u32 size) 6static inline void edac_atomic_scrub(void *va, u32 size)
7{ 7{
8 u32 i, *virt_addr = va; 8 u32 i, *virt_addr = va;
9 9
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index cb59619df23f..8677ead2a8e1 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -2,15 +2,16 @@
2# EDAC Kconfig 2# EDAC Kconfig
3# Copyright (c) 2008 Doug Thompson www.softwarebitmaker.com 3# Copyright (c) 2008 Doug Thompson www.softwarebitmaker.com
4# Licensed and distributed under the GPL 4# Licensed and distributed under the GPL
5# 5
6config EDAC_ATOMIC_SCRUB
7 bool
6 8
7config EDAC_SUPPORT 9config EDAC_SUPPORT
8 bool 10 bool
9 11
10menuconfig EDAC 12menuconfig EDAC
11 bool "EDAC (Error Detection And Correction) reporting" 13 bool "EDAC (Error Detection And Correction) reporting"
12 depends on HAS_IOMEM 14 depends on HAS_IOMEM && EDAC_SUPPORT
13 depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT
14 help 15 help
15 EDAC is designed to report errors in the core system. 16 EDAC is designed to report errors in the core system.
16 These are low-level errors that are reported in the CPU or 17 These are low-level errors that are reported in the CPU or
@@ -262,10 +263,10 @@ config EDAC_SBRIDGE
262 263
263config EDAC_MPC85XX 264config EDAC_MPC85XX
264 tristate "Freescale MPC83xx / MPC85xx" 265 tristate "Freescale MPC83xx / MPC85xx"
265 depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) 266 depends on EDAC_MM_EDAC && FSL_SOC
266 help 267 help
267 Support for error detection and correction on the Freescale 268 Support for error detection and correction on the Freescale
268 MPC8349, MPC8560, MPC8540, MPC8548 269 MPC8349, MPC8560, MPC8540, MPC8548, T4240
269 270
270config EDAC_MV64X60 271config EDAC_MV64X60
271 tristate "Marvell MV64x60" 272 tristate "Marvell MV64x60"
@@ -377,8 +378,8 @@ config EDAC_OCTEON_PCI
377 Cavium Octeon family of SOCs. 378 Cavium Octeon family of SOCs.
378 379
379config EDAC_ALTERA_MC 380config EDAC_ALTERA_MC
380 tristate "Altera SDRAM Memory Controller EDAC" 381 bool "Altera SDRAM Memory Controller EDAC"
381 depends on EDAC_MM_EDAC && ARCH_SOCFPGA 382 depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
382 help 383 help
383 Support for error detection and correction on the 384 Support for error detection and correction on the
384 Altera SDRAM memory controller. Note that the 385 Altera SDRAM memory controller. Note that the
@@ -392,4 +393,11 @@ config EDAC_SYNOPSYS
392 Support for error detection and correction on the Synopsys DDR 393 Support for error detection and correction on the Synopsys DDR
393 memory controller. 394 memory controller.
394 395
396config EDAC_XGENE
397 tristate "APM X-Gene SoC"
398 depends on EDAC_MM_EDAC && (ARM64 || COMPILE_TEST)
399 help
400 Support for error detection and correction on the
401 APM X-Gene family of SOCs.
402
395endif # EDAC 403endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index b255f362b1db..28ef2a519f65 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -68,3 +68,4 @@ obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
68 68
69obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o 69obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o
70obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o 70obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
71obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 3c4929fda9d5..23ef0917483c 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright Altera Corporation (C) 2014. All rights reserved. 2 * Copyright Altera Corporation (C) 2014-2015. All rights reserved.
3 * Copyright 2011-2012 Calxeda, Inc. 3 * Copyright 2011-2012 Calxeda, Inc.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
@@ -28,113 +28,92 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/uaccess.h> 29#include <linux/uaccess.h>
30 30
31#include "altera_edac.h"
31#include "edac_core.h" 32#include "edac_core.h"
32#include "edac_module.h" 33#include "edac_module.h"
33 34
34#define EDAC_MOD_STR "altera_edac" 35#define EDAC_MOD_STR "altera_edac"
35#define EDAC_VERSION "1" 36#define EDAC_VERSION "1"
36 37
37/* SDRAM Controller CtrlCfg Register */ 38static const struct altr_sdram_prv_data c5_data = {
38#define CTLCFG_OFST 0x00 39 .ecc_ctrl_offset = CV_CTLCFG_OFST,
39 40 .ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN,
40/* SDRAM Controller CtrlCfg Register Bit Masks */ 41 .ecc_stat_offset = CV_DRAMSTS_OFST,
41#define CTLCFG_ECC_EN 0x400 42 .ecc_stat_ce_mask = CV_DRAMSTS_SBEERR,
42#define CTLCFG_ECC_CORR_EN 0x800 43 .ecc_stat_ue_mask = CV_DRAMSTS_DBEERR,
43#define CTLCFG_GEN_SB_ERR 0x2000 44 .ecc_saddr_offset = CV_ERRADDR_OFST,
44#define CTLCFG_GEN_DB_ERR 0x4000 45 .ecc_daddr_offset = CV_ERRADDR_OFST,
45 46 .ecc_cecnt_offset = CV_SBECOUNT_OFST,
46#define CTLCFG_ECC_AUTO_EN (CTLCFG_ECC_EN | \ 47 .ecc_uecnt_offset = CV_DBECOUNT_OFST,
47 CTLCFG_ECC_CORR_EN) 48 .ecc_irq_en_offset = CV_DRAMINTR_OFST,
48 49 .ecc_irq_en_mask = CV_DRAMINTR_INTREN,
49/* SDRAM Controller Address Width Register */ 50 .ecc_irq_clr_offset = CV_DRAMINTR_OFST,
50#define DRAMADDRW_OFST 0x2C 51 .ecc_irq_clr_mask = (CV_DRAMINTR_INTRCLR | CV_DRAMINTR_INTREN),
51 52 .ecc_cnt_rst_offset = CV_DRAMINTR_OFST,
52/* SDRAM Controller Address Widths Field Register */ 53 .ecc_cnt_rst_mask = CV_DRAMINTR_INTRCLR,
53#define DRAMADDRW_COLBIT_MASK 0x001F 54#ifdef CONFIG_EDAC_DEBUG
54#define DRAMADDRW_COLBIT_SHIFT 0 55 .ce_ue_trgr_offset = CV_CTLCFG_OFST,
55#define DRAMADDRW_ROWBIT_MASK 0x03E0 56 .ce_set_mask = CV_CTLCFG_GEN_SB_ERR,
56#define DRAMADDRW_ROWBIT_SHIFT 5 57 .ue_set_mask = CV_CTLCFG_GEN_DB_ERR,
57#define DRAMADDRW_BANKBIT_MASK 0x1C00 58#endif
58#define DRAMADDRW_BANKBIT_SHIFT 10 59};
59#define DRAMADDRW_CSBIT_MASK 0xE000
60#define DRAMADDRW_CSBIT_SHIFT 13
61
62/* SDRAM Controller Interface Data Width Register */
63#define DRAMIFWIDTH_OFST 0x30
64
65/* SDRAM Controller Interface Data Width Defines */
66#define DRAMIFWIDTH_16B_ECC 24
67#define DRAMIFWIDTH_32B_ECC 40
68
69/* SDRAM Controller DRAM Status Register */
70#define DRAMSTS_OFST 0x38
71
72/* SDRAM Controller DRAM Status Register Bit Masks */
73#define DRAMSTS_SBEERR 0x04
74#define DRAMSTS_DBEERR 0x08
75#define DRAMSTS_CORR_DROP 0x10
76
77/* SDRAM Controller DRAM IRQ Register */
78#define DRAMINTR_OFST 0x3C
79
80/* SDRAM Controller DRAM IRQ Register Bit Masks */
81#define DRAMINTR_INTREN 0x01
82#define DRAMINTR_SBEMASK 0x02
83#define DRAMINTR_DBEMASK 0x04
84#define DRAMINTR_CORRDROPMASK 0x08
85#define DRAMINTR_INTRCLR 0x10
86
87/* SDRAM Controller Single Bit Error Count Register */
88#define SBECOUNT_OFST 0x40
89
90/* SDRAM Controller Single Bit Error Count Register Bit Masks */
91#define SBECOUNT_MASK 0x0F
92
93/* SDRAM Controller Double Bit Error Count Register */
94#define DBECOUNT_OFST 0x44
95
96/* SDRAM Controller Double Bit Error Count Register Bit Masks */
97#define DBECOUNT_MASK 0x0F
98
99/* SDRAM Controller ECC Error Address Register */
100#define ERRADDR_OFST 0x48
101
102/* SDRAM Controller ECC Error Address Register Bit Masks */
103#define ERRADDR_MASK 0xFFFFFFFF
104 60
105/* Altera SDRAM Memory Controller data */ 61static const struct altr_sdram_prv_data a10_data = {
106struct altr_sdram_mc_data { 62 .ecc_ctrl_offset = A10_ECCCTRL1_OFST,
107 struct regmap *mc_vbase; 63 .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN,
64 .ecc_stat_offset = A10_INTSTAT_OFST,
65 .ecc_stat_ce_mask = A10_INTSTAT_SBEERR,
66 .ecc_stat_ue_mask = A10_INTSTAT_DBEERR,
67 .ecc_saddr_offset = A10_SERRADDR_OFST,
68 .ecc_daddr_offset = A10_DERRADDR_OFST,
69 .ecc_irq_en_offset = A10_ERRINTEN_OFST,
70 .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK,
71 .ecc_irq_clr_offset = A10_INTSTAT_OFST,
72 .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR),
73 .ecc_cnt_rst_offset = A10_ECCCTRL1_OFST,
74 .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK,
75#ifdef CONFIG_EDAC_DEBUG
76 .ce_ue_trgr_offset = A10_DIAGINTTEST_OFST,
77 .ce_set_mask = A10_DIAGINT_TSERRA_MASK,
78 .ue_set_mask = A10_DIAGINT_TDERRA_MASK,
79#endif
108}; 80};
109 81
110static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) 82static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
111{ 83{
112 struct mem_ctl_info *mci = dev_id; 84 struct mem_ctl_info *mci = dev_id;
113 struct altr_sdram_mc_data *drvdata = mci->pvt_info; 85 struct altr_sdram_mc_data *drvdata = mci->pvt_info;
114 u32 status, err_count, err_addr; 86 const struct altr_sdram_prv_data *priv = drvdata->data;
87 u32 status, err_count = 1, err_addr;
115 88
116 /* Error Address is shared by both SBE & DBE */ 89 regmap_read(drvdata->mc_vbase, priv->ecc_stat_offset, &status);
117 regmap_read(drvdata->mc_vbase, ERRADDR_OFST, &err_addr);
118 90
119 regmap_read(drvdata->mc_vbase, DRAMSTS_OFST, &status); 91 if (status & priv->ecc_stat_ue_mask) {
120 92 regmap_read(drvdata->mc_vbase, priv->ecc_daddr_offset,
121 if (status & DRAMSTS_DBEERR) { 93 &err_addr);
122 regmap_read(drvdata->mc_vbase, DBECOUNT_OFST, &err_count); 94 if (priv->ecc_uecnt_offset)
95 regmap_read(drvdata->mc_vbase, priv->ecc_uecnt_offset,
96 &err_count);
123 panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n", 97 panic("\nEDAC: [%d Uncorrectable errors @ 0x%08X]\n",
124 err_count, err_addr); 98 err_count, err_addr);
125 } 99 }
126 if (status & DRAMSTS_SBEERR) { 100 if (status & priv->ecc_stat_ce_mask) {
127 regmap_read(drvdata->mc_vbase, SBECOUNT_OFST, &err_count); 101 regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset,
102 &err_addr);
103 if (priv->ecc_uecnt_offset)
104 regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset,
105 &err_count);
128 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, 106 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count,
129 err_addr >> PAGE_SHIFT, 107 err_addr >> PAGE_SHIFT,
130 err_addr & ~PAGE_MASK, 0, 108 err_addr & ~PAGE_MASK, 0,
131 0, 0, -1, mci->ctl_name, ""); 109 0, 0, -1, mci->ctl_name, "");
132 } 110 /* Clear IRQ to resume */
133 111 regmap_write(drvdata->mc_vbase, priv->ecc_irq_clr_offset,
134 regmap_write(drvdata->mc_vbase, DRAMINTR_OFST, 112 priv->ecc_irq_clr_mask);
135 (DRAMINTR_INTRCLR | DRAMINTR_INTREN));
136 113
137 return IRQ_HANDLED; 114 return IRQ_HANDLED;
115 }
116 return IRQ_NONE;
138} 117}
139 118
140#ifdef CONFIG_EDAC_DEBUG 119#ifdef CONFIG_EDAC_DEBUG
@@ -144,6 +123,7 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
144{ 123{
145 struct mem_ctl_info *mci = file->private_data; 124 struct mem_ctl_info *mci = file->private_data;
146 struct altr_sdram_mc_data *drvdata = mci->pvt_info; 125 struct altr_sdram_mc_data *drvdata = mci->pvt_info;
126 const struct altr_sdram_prv_data *priv = drvdata->data;
147 u32 *ptemp; 127 u32 *ptemp;
148 dma_addr_t dma_handle; 128 dma_addr_t dma_handle;
149 u32 reg, read_reg; 129 u32 reg, read_reg;
@@ -156,8 +136,9 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
156 return -ENOMEM; 136 return -ENOMEM;
157 } 137 }
158 138
159 regmap_read(drvdata->mc_vbase, CTLCFG_OFST, &read_reg); 139 regmap_read(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
160 read_reg &= ~(CTLCFG_GEN_SB_ERR | CTLCFG_GEN_DB_ERR); 140 &read_reg);
141 read_reg &= ~(priv->ce_set_mask | priv->ue_set_mask);
161 142
162 /* Error are injected by writing a word while the SBE or DBE 143 /* Error are injected by writing a word while the SBE or DBE
163 * bit in the CTLCFG register is set. Reading the word will 144 * bit in the CTLCFG register is set. Reading the word will
@@ -166,20 +147,20 @@ static ssize_t altr_sdr_mc_err_inject_write(struct file *file,
166 if (count == 3) { 147 if (count == 3) {
167 edac_printk(KERN_ALERT, EDAC_MC, 148 edac_printk(KERN_ALERT, EDAC_MC,
168 "Inject Double bit error\n"); 149 "Inject Double bit error\n");
169 regmap_write(drvdata->mc_vbase, CTLCFG_OFST, 150 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
170 (read_reg | CTLCFG_GEN_DB_ERR)); 151 (read_reg | priv->ue_set_mask));
171 } else { 152 } else {
172 edac_printk(KERN_ALERT, EDAC_MC, 153 edac_printk(KERN_ALERT, EDAC_MC,
173 "Inject Single bit error\n"); 154 "Inject Single bit error\n");
174 regmap_write(drvdata->mc_vbase, CTLCFG_OFST, 155 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset,
175 (read_reg | CTLCFG_GEN_SB_ERR)); 156 (read_reg | priv->ce_set_mask));
176 } 157 }
177 158
178 ptemp[0] = 0x5A5A5A5A; 159 ptemp[0] = 0x5A5A5A5A;
179 ptemp[1] = 0xA5A5A5A5; 160 ptemp[1] = 0xA5A5A5A5;
180 161
181 /* Clear the error injection bits */ 162 /* Clear the error injection bits */
182 regmap_write(drvdata->mc_vbase, CTLCFG_OFST, read_reg); 163 regmap_write(drvdata->mc_vbase, priv->ce_ue_trgr_offset, read_reg);
183 /* Ensure it has been written out */ 164 /* Ensure it has been written out */
184 wmb(); 165 wmb();
185 166
@@ -219,50 +200,106 @@ static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
219{} 200{}
220#endif 201#endif
221 202
222/* Get total memory size in bytes */ 203/* Get total memory size from Open Firmware DTB */
223static u32 altr_sdram_get_total_mem_size(struct regmap *mc_vbase) 204static unsigned long get_total_mem(void)
224{ 205{
225 u32 size, read_reg, row, bank, col, cs, width; 206 struct device_node *np = NULL;
226 207 const unsigned int *reg, *reg_end;
227 if (regmap_read(mc_vbase, DRAMADDRW_OFST, &read_reg) < 0) 208 int len, sw, aw;
228 return 0; 209 unsigned long start, size, total_mem = 0;
229 210
230 if (regmap_read(mc_vbase, DRAMIFWIDTH_OFST, &width) < 0) 211 for_each_node_by_type(np, "memory") {
231 return 0; 212 aw = of_n_addr_cells(np);
232 213 sw = of_n_size_cells(np);
233 col = (read_reg & DRAMADDRW_COLBIT_MASK) >> 214 reg = (const unsigned int *)of_get_property(np, "reg", &len);
234 DRAMADDRW_COLBIT_SHIFT; 215 reg_end = reg + (len / sizeof(u32));
235 row = (read_reg & DRAMADDRW_ROWBIT_MASK) >> 216
236 DRAMADDRW_ROWBIT_SHIFT; 217 total_mem = 0;
237 bank = (read_reg & DRAMADDRW_BANKBIT_MASK) >> 218 do {
238 DRAMADDRW_BANKBIT_SHIFT; 219 start = of_read_number(reg, aw);
239 cs = (read_reg & DRAMADDRW_CSBIT_MASK) >> 220 reg += aw;
240 DRAMADDRW_CSBIT_SHIFT; 221 size = of_read_number(reg, sw);
241 222 reg += sw;
242 /* Correct for ECC as its not addressible */ 223 total_mem += size;
243 if (width == DRAMIFWIDTH_32B_ECC) 224 } while (reg < reg_end);
244 width = 32; 225 }
245 if (width == DRAMIFWIDTH_16B_ECC) 226 edac_dbg(0, "total_mem 0x%lx\n", total_mem);
246 width = 16; 227 return total_mem;
247 228}
248 /* calculate the SDRAM size base on this info */ 229
249 size = 1 << (row + bank + col); 230static const struct of_device_id altr_sdram_ctrl_of_match[] = {
250 size = size * cs * (width / 8); 231 { .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
251 return size; 232 { .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
233 {},
234};
235MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
236
237static int a10_init(struct regmap *mc_vbase)
238{
239 if (regmap_update_bits(mc_vbase, A10_INTMODE_OFST,
240 A10_INTMODE_SB_INT, A10_INTMODE_SB_INT)) {
241 edac_printk(KERN_ERR, EDAC_MC,
242 "Error setting SB IRQ mode\n");
243 return -ENODEV;
244 }
245
246 if (regmap_write(mc_vbase, A10_SERRCNTREG_OFST, 1)) {
247 edac_printk(KERN_ERR, EDAC_MC,
248 "Error setting trigger count\n");
249 return -ENODEV;
250 }
251
252 return 0;
253}
254
255static int a10_unmask_irq(struct platform_device *pdev, u32 mask)
256{
257 void __iomem *sm_base;
258 int ret = 0;
259
260 if (!request_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32),
261 dev_name(&pdev->dev))) {
262 edac_printk(KERN_ERR, EDAC_MC,
263 "Unable to request mem region\n");
264 return -EBUSY;
265 }
266
267 sm_base = ioremap(A10_SYMAN_INTMASK_CLR, sizeof(u32));
268 if (!sm_base) {
269 edac_printk(KERN_ERR, EDAC_MC,
270 "Unable to ioremap device\n");
271
272 ret = -ENOMEM;
273 goto release;
274 }
275
276 iowrite32(mask, sm_base);
277
278 iounmap(sm_base);
279
280release:
281 release_mem_region(A10_SYMAN_INTMASK_CLR, sizeof(u32));
282
283 return ret;
252} 284}
253 285
254static int altr_sdram_probe(struct platform_device *pdev) 286static int altr_sdram_probe(struct platform_device *pdev)
255{ 287{
288 const struct of_device_id *id;
256 struct edac_mc_layer layers[2]; 289 struct edac_mc_layer layers[2];
257 struct mem_ctl_info *mci; 290 struct mem_ctl_info *mci;
258 struct altr_sdram_mc_data *drvdata; 291 struct altr_sdram_mc_data *drvdata;
292 const struct altr_sdram_prv_data *priv;
259 struct regmap *mc_vbase; 293 struct regmap *mc_vbase;
260 struct dimm_info *dimm; 294 struct dimm_info *dimm;
261 u32 read_reg, mem_size; 295 u32 read_reg;
262 int irq; 296 int irq, irq2, res = 0;
263 int res = 0; 297 unsigned long mem_size, irqflags = 0;
298
299 id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
300 if (!id)
301 return -ENODEV;
264 302
265 /* Validate the SDRAM controller has ECC enabled */
266 /* Grab the register range from the sdr controller in device tree */ 303 /* Grab the register range from the sdr controller in device tree */
267 mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, 304 mc_vbase = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
268 "altr,sdr-syscon"); 305 "altr,sdr-syscon");
@@ -272,25 +309,46 @@ static int altr_sdram_probe(struct platform_device *pdev)
272 return -ENODEV; 309 return -ENODEV;
273 } 310 }
274 311
275 if (regmap_read(mc_vbase, CTLCFG_OFST, &read_reg) || 312 /* Check specific dependencies for the module */
276 ((read_reg & CTLCFG_ECC_AUTO_EN) != CTLCFG_ECC_AUTO_EN)) { 313 priv = of_match_node(altr_sdram_ctrl_of_match,
314 pdev->dev.of_node)->data;
315
316 /* Validate the SDRAM controller has ECC enabled */
317 if (regmap_read(mc_vbase, priv->ecc_ctrl_offset, &read_reg) ||
318 ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) {
277 edac_printk(KERN_ERR, EDAC_MC, 319 edac_printk(KERN_ERR, EDAC_MC,
278 "No ECC/ECC disabled [0x%08X]\n", read_reg); 320 "No ECC/ECC disabled [0x%08X]\n", read_reg);
279 return -ENODEV; 321 return -ENODEV;
280 } 322 }
281 323
282 /* Grab memory size from device tree. */ 324 /* Grab memory size from device tree. */
283 mem_size = altr_sdram_get_total_mem_size(mc_vbase); 325 mem_size = get_total_mem();
284 if (!mem_size) { 326 if (!mem_size) {
327 edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n");
328 return -ENODEV;
329 }
330
331 /* Ensure the SDRAM Interrupt is disabled */
332 if (regmap_update_bits(mc_vbase, priv->ecc_irq_en_offset,
333 priv->ecc_irq_en_mask, 0)) {
334 edac_printk(KERN_ERR, EDAC_MC,
335 "Error disabling SDRAM ECC IRQ\n");
336 return -ENODEV;
337 }
338
339 /* Toggle to clear the SDRAM Error count */
340 if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset,
341 priv->ecc_cnt_rst_mask,
342 priv->ecc_cnt_rst_mask)) {
285 edac_printk(KERN_ERR, EDAC_MC, 343 edac_printk(KERN_ERR, EDAC_MC,
286 "Unable to calculate memory size\n"); 344 "Error clearing SDRAM ECC count\n");
287 return -ENODEV; 345 return -ENODEV;
288 } 346 }
289 347
290 /* Ensure the SDRAM Interrupt is disabled and cleared */ 348 if (regmap_update_bits(mc_vbase, priv->ecc_cnt_rst_offset,
291 if (regmap_write(mc_vbase, DRAMINTR_OFST, DRAMINTR_INTRCLR)) { 349 priv->ecc_cnt_rst_mask, 0)) {
292 edac_printk(KERN_ERR, EDAC_MC, 350 edac_printk(KERN_ERR, EDAC_MC,
293 "Error clearing SDRAM ECC IRQ\n"); 351 "Error clearing SDRAM ECC count\n");
294 return -ENODEV; 352 return -ENODEV;
295 } 353 }
296 354
@@ -301,6 +359,9 @@ static int altr_sdram_probe(struct platform_device *pdev)
301 return -ENODEV; 359 return -ENODEV;
302 } 360 }
303 361
362 /* Arria10 has a 2nd IRQ */
363 irq2 = platform_get_irq(pdev, 1);
364
304 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 365 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
305 layers[0].size = 1; 366 layers[0].size = 1;
306 layers[0].is_virt_csrow = true; 367 layers[0].is_virt_csrow = true;
@@ -315,9 +376,12 @@ static int altr_sdram_probe(struct platform_device *pdev)
315 mci->pdev = &pdev->dev; 376 mci->pdev = &pdev->dev;
316 drvdata = mci->pvt_info; 377 drvdata = mci->pvt_info;
317 drvdata->mc_vbase = mc_vbase; 378 drvdata->mc_vbase = mc_vbase;
379 drvdata->data = priv;
318 platform_set_drvdata(pdev, mci); 380 platform_set_drvdata(pdev, mci);
319 381
320 if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { 382 if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
383 edac_printk(KERN_ERR, EDAC_MC,
384 "Unable to get managed device resource\n");
321 res = -ENOMEM; 385 res = -ENOMEM;
322 goto free; 386 goto free;
323 } 387 }
@@ -342,8 +406,32 @@ static int altr_sdram_probe(struct platform_device *pdev)
342 if (res < 0) 406 if (res < 0)
343 goto err; 407 goto err;
344 408
409 /* Only the Arria10 has separate IRQs */
410 if (irq2 > 0) {
411 /* Arria10 specific initialization */
412 res = a10_init(mc_vbase);
413 if (res < 0)
414 goto err2;
415
416 res = devm_request_irq(&pdev->dev, irq2,
417 altr_sdram_mc_err_handler,
418 IRQF_SHARED, dev_name(&pdev->dev), mci);
419 if (res < 0) {
420 edac_mc_printk(mci, KERN_ERR,
421 "Unable to request irq %d\n", irq2);
422 res = -ENODEV;
423 goto err2;
424 }
425
426 res = a10_unmask_irq(pdev, A10_DDR0_IRQ_MASK);
427 if (res < 0)
428 goto err2;
429
430 irqflags = IRQF_SHARED;
431 }
432
345 res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler, 433 res = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler,
346 0, dev_name(&pdev->dev), mci); 434 irqflags, dev_name(&pdev->dev), mci);
347 if (res < 0) { 435 if (res < 0) {
348 edac_mc_printk(mci, KERN_ERR, 436 edac_mc_printk(mci, KERN_ERR,
349 "Unable to request irq %d\n", irq); 437 "Unable to request irq %d\n", irq);
@@ -351,8 +439,9 @@ static int altr_sdram_probe(struct platform_device *pdev)
351 goto err2; 439 goto err2;
352 } 440 }
353 441
354 if (regmap_write(drvdata->mc_vbase, DRAMINTR_OFST, 442 /* Infrastructure ready - enable the IRQ */
355 (DRAMINTR_INTRCLR | DRAMINTR_INTREN))) { 443 if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset,
444 priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) {
356 edac_mc_printk(mci, KERN_ERR, 445 edac_mc_printk(mci, KERN_ERR,
357 "Error enabling SDRAM ECC IRQ\n"); 446 "Error enabling SDRAM ECC IRQ\n");
358 res = -ENODEV; 447 res = -ENODEV;
@@ -388,17 +477,31 @@ static int altr_sdram_remove(struct platform_device *pdev)
388 return 0; 477 return 0;
389} 478}
390 479
391static const struct of_device_id altr_sdram_ctrl_of_match[] = { 480/*
392 { .compatible = "altr,sdram-edac", }, 481 * If you want to suspend, need to disable EDAC by removing it
393 {}, 482 * from the device tree or defconfig.
483 */
484#ifdef CONFIG_PM
485static int altr_sdram_prepare(struct device *dev)
486{
487 pr_err("Suspend not allowed when EDAC is enabled.\n");
488
489 return -EPERM;
490}
491
492static const struct dev_pm_ops altr_sdram_pm_ops = {
493 .prepare = altr_sdram_prepare,
394}; 494};
395MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); 495#endif
396 496
397static struct platform_driver altr_sdram_edac_driver = { 497static struct platform_driver altr_sdram_edac_driver = {
398 .probe = altr_sdram_probe, 498 .probe = altr_sdram_probe,
399 .remove = altr_sdram_remove, 499 .remove = altr_sdram_remove,
400 .driver = { 500 .driver = {
401 .name = "altr_sdram_edac", 501 .name = "altr_sdram_edac",
502#ifdef CONFIG_PM
503 .pm = &altr_sdram_pm_ops,
504#endif
402 .of_match_table = altr_sdram_ctrl_of_match, 505 .of_match_table = altr_sdram_ctrl_of_match,
403 }, 506 },
404}; 507};
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
new file mode 100644
index 000000000000..7b64dc7c4eb7
--- /dev/null
+++ b/drivers/edac/altera_edac.h
@@ -0,0 +1,201 @@
1/*
2 *
3 * Copyright (C) 2015 Altera Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef _ALTERA_EDAC_H
19#define _ALTERA_EDAC_H
20
21#include <linux/edac.h>
22#include <linux/types.h>
23
24/* SDRAM Controller CtrlCfg Register */
25#define CV_CTLCFG_OFST 0x00
26
27/* SDRAM Controller CtrlCfg Register Bit Masks */
28#define CV_CTLCFG_ECC_EN 0x400
29#define CV_CTLCFG_ECC_CORR_EN 0x800
30#define CV_CTLCFG_GEN_SB_ERR 0x2000
31#define CV_CTLCFG_GEN_DB_ERR 0x4000
32
33#define CV_CTLCFG_ECC_AUTO_EN (CV_CTLCFG_ECC_EN | \
34 CV_CTLCFG_ECC_CORR_EN)
35
36/* SDRAM Controller Address Width Register */
37#define CV_DRAMADDRW_OFST 0x2C
38
39/* SDRAM Controller Address Widths Field Register */
40#define DRAMADDRW_COLBIT_MASK 0x001F
41#define DRAMADDRW_COLBIT_SHIFT 0
42#define DRAMADDRW_ROWBIT_MASK 0x03E0
43#define DRAMADDRW_ROWBIT_SHIFT 5
44#define CV_DRAMADDRW_BANKBIT_MASK 0x1C00
45#define CV_DRAMADDRW_BANKBIT_SHIFT 10
46#define CV_DRAMADDRW_CSBIT_MASK 0xE000
47#define CV_DRAMADDRW_CSBIT_SHIFT 13
48
49/* SDRAM Controller Interface Data Width Register */
50#define CV_DRAMIFWIDTH_OFST 0x30
51
52/* SDRAM Controller Interface Data Width Defines */
53#define CV_DRAMIFWIDTH_16B_ECC 24
54#define CV_DRAMIFWIDTH_32B_ECC 40
55
56/* SDRAM Controller DRAM Status Register */
57#define CV_DRAMSTS_OFST 0x38
58
59/* SDRAM Controller DRAM Status Register Bit Masks */
60#define CV_DRAMSTS_SBEERR 0x04
61#define CV_DRAMSTS_DBEERR 0x08
62#define CV_DRAMSTS_CORR_DROP 0x10
63
64/* SDRAM Controller DRAM IRQ Register */
65#define CV_DRAMINTR_OFST 0x3C
66
67/* SDRAM Controller DRAM IRQ Register Bit Masks */
68#define CV_DRAMINTR_INTREN 0x01
69#define CV_DRAMINTR_SBEMASK 0x02
70#define CV_DRAMINTR_DBEMASK 0x04
71#define CV_DRAMINTR_CORRDROPMASK 0x08
72#define CV_DRAMINTR_INTRCLR 0x10
73
74/* SDRAM Controller Single Bit Error Count Register */
75#define CV_SBECOUNT_OFST 0x40
76
77/* SDRAM Controller Double Bit Error Count Register */
78#define CV_DBECOUNT_OFST 0x44
79
80/* SDRAM Controller ECC Error Address Register */
81#define CV_ERRADDR_OFST 0x48
82
83/*-----------------------------------------*/
84
85/* SDRAM Controller EccCtrl Register */
86#define A10_ECCCTRL1_OFST 0x00
87
88/* SDRAM Controller EccCtrl Register Bit Masks */
89#define A10_ECCCTRL1_ECC_EN 0x001
90#define A10_ECCCTRL1_CNT_RST 0x010
91#define A10_ECCCTRL1_AWB_CNT_RST 0x100
92#define A10_ECC_CNT_RESET_MASK (A10_ECCCTRL1_CNT_RST | \
93 A10_ECCCTRL1_AWB_CNT_RST)
94
95/* SDRAM Controller Address Width Register */
96#define CV_DRAMADDRW 0xFFC2502C
97#define A10_DRAMADDRW 0xFFCFA0A8
98
99/* SDRAM Controller Address Widths Field Register */
100#define DRAMADDRW_COLBIT_MASK 0x001F
101#define DRAMADDRW_COLBIT_SHIFT 0
102#define DRAMADDRW_ROWBIT_MASK 0x03E0
103#define DRAMADDRW_ROWBIT_SHIFT 5
104#define CV_DRAMADDRW_BANKBIT_MASK 0x1C00
105#define CV_DRAMADDRW_BANKBIT_SHIFT 10
106#define CV_DRAMADDRW_CSBIT_MASK 0xE000
107#define CV_DRAMADDRW_CSBIT_SHIFT 13
108
109#define A10_DRAMADDRW_BANKBIT_MASK 0x3C00
110#define A10_DRAMADDRW_BANKBIT_SHIFT 10
111#define A10_DRAMADDRW_GRPBIT_MASK 0xC000
112#define A10_DRAMADDRW_GRPBIT_SHIFT 14
113#define A10_DRAMADDRW_CSBIT_MASK 0x70000
114#define A10_DRAMADDRW_CSBIT_SHIFT 16
115
116/* SDRAM Controller Interface Data Width Register */
117#define CV_DRAMIFWIDTH 0xFFC25030
118#define A10_DRAMIFWIDTH 0xFFCFB008
119
120/* SDRAM Controller Interface Data Width Defines */
121#define CV_DRAMIFWIDTH_16B_ECC 24
122#define CV_DRAMIFWIDTH_32B_ECC 40
123
124#define A10_DRAMIFWIDTH_16B 0x0
125#define A10_DRAMIFWIDTH_32B 0x1
126#define A10_DRAMIFWIDTH_64B 0x2
127
128/* SDRAM Controller DRAM IRQ Register */
129#define A10_ERRINTEN_OFST 0x10
130
131/* SDRAM Controller DRAM IRQ Register Bit Masks */
132#define A10_ERRINTEN_SERRINTEN 0x01
133#define A10_ERRINTEN_DERRINTEN 0x02
134#define A10_ECC_IRQ_EN_MASK (A10_ERRINTEN_SERRINTEN | \
135 A10_ERRINTEN_DERRINTEN)
136
137/* SDRAM Interrupt Mode Register */
138#define A10_INTMODE_OFST 0x1C
139#define A10_INTMODE_SB_INT 1
140
141/* SDRAM Controller Error Status Register */
142#define A10_INTSTAT_OFST 0x20
143
144/* SDRAM Controller Error Status Register Bit Masks */
145#define A10_INTSTAT_SBEERR 0x01
146#define A10_INTSTAT_DBEERR 0x02
147
148/* SDRAM Controller ECC Error Address Register */
149#define A10_DERRADDR_OFST 0x2C
150#define A10_SERRADDR_OFST 0x30
151
152/* SDRAM Controller ECC Diagnostic Register */
153#define A10_DIAGINTTEST_OFST 0x24
154
155#define A10_DIAGINT_TSERRA_MASK 0x0001
156#define A10_DIAGINT_TDERRA_MASK 0x0100
157
158#define A10_SBERR_IRQ 34
159#define A10_DBERR_IRQ 32
160
161/* SDRAM Single Bit Error Count Compare Set Register */
162#define A10_SERRCNTREG_OFST 0x3C
163
164#define A10_SYMAN_INTMASK_CLR 0xFFD06098
165#define A10_INTMASK_CLR_OFST 0x10
166#define A10_DDR0_IRQ_MASK BIT(17)
167
168struct altr_sdram_prv_data {
169 int ecc_ctrl_offset;
170 int ecc_ctl_en_mask;
171 int ecc_cecnt_offset;
172 int ecc_uecnt_offset;
173 int ecc_stat_offset;
174 int ecc_stat_ce_mask;
175 int ecc_stat_ue_mask;
176 int ecc_saddr_offset;
177 int ecc_daddr_offset;
178 int ecc_irq_en_offset;
179 int ecc_irq_en_mask;
180 int ecc_irq_clr_offset;
181 int ecc_irq_clr_mask;
182 int ecc_cnt_rst_offset;
183 int ecc_cnt_rst_mask;
184#ifdef CONFIG_EDAC_DEBUG
185 struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr;
186 int ecc_enable_mask;
187 int ce_set_mask;
188 int ue_set_mask;
189 int ce_ue_trgr_offset;
190#endif
191};
192
193/* Altera SDRAM Memory Controller data */
194struct altr_sdram_mc_data {
195 struct regmap *mc_vbase;
196 int sb_irq;
197 int db_irq;
198 const struct altr_sdram_prv_data *data;
199};
200
201#endif /* #ifndef _ALTERA_EDAC_H */
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index af3be1914dbb..943ed8cf71b9 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -30,11 +30,16 @@
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <asm/uaccess.h> 31#include <asm/uaccess.h>
32#include <asm/page.h> 32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h" 33#include "edac_core.h"
35#include "edac_module.h" 34#include "edac_module.h"
36#include <ras/ras_event.h> 35#include <ras/ras_event.h>
37 36
37#ifdef CONFIG_EDAC_ATOMIC_SCRUB
38#include <asm/edac.h>
39#else
40#define edac_atomic_scrub(va, size) do { } while (0)
41#endif
42
38/* lock to memory controller's control array */ 43/* lock to memory controller's control array */
39static DEFINE_MUTEX(mem_ctls_mutex); 44static DEFINE_MUTEX(mem_ctls_mutex);
40static LIST_HEAD(mc_devices); 45static LIST_HEAD(mc_devices);
@@ -874,7 +879,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
874 virt_addr = kmap_atomic(pg); 879 virt_addr = kmap_atomic(pg);
875 880
876 /* Perform architecture specific atomic scrub operation */ 881 /* Perform architecture specific atomic scrub operation */
877 atomic_scrub(virt_addr + offset, size); 882 edac_atomic_scrub(virt_addr + offset, size);
878 883
879 /* Unmap and complete */ 884 /* Unmap and complete */
880 kunmap_atomic(virt_addr); 885 kunmap_atomic(virt_addr);
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 9d9e18aefaaa..ff07aae5b7fb 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -16,7 +16,6 @@
16#include <linux/edac.h> 16#include <linux/edac.h>
17#include <linux/atomic.h> 17#include <linux/atomic.h>
18#include <linux/device.h> 18#include <linux/device.h>
19#include <asm/edac.h>
20 19
21int edac_op_state = EDAC_OPSTATE_INVAL; 20int edac_op_state = EDAC_OPSTATE_INVAL;
22EXPORT_SYMBOL_GPL(edac_op_state); 21EXPORT_SYMBOL_GPL(edac_op_state);
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index f7681b553fd5..4c73e4d03d46 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -15,6 +15,8 @@
15#include <linux/device.h> 15#include <linux/device.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/cpu.h> 17#include <linux/cpu.h>
18#include <linux/string.h>
19#include <linux/uaccess.h>
18#include <asm/mce.h> 20#include <asm/mce.h>
19 21
20#include "mce_amd.h" 22#include "mce_amd.h"
@@ -25,6 +27,25 @@
25static struct mce i_mce; 27static struct mce i_mce;
26static struct dentry *dfs_inj; 28static struct dentry *dfs_inj;
27 29
30static u8 n_banks;
31
32#define MAX_FLAG_OPT_SIZE 3
33
34enum injection_type {
35 SW_INJ = 0, /* SW injection, simply decode the error */
36 HW_INJ, /* Trigger a #MC */
37 N_INJ_TYPES,
38};
39
40static const char * const flags_options[] = {
41 [SW_INJ] = "sw",
42 [HW_INJ] = "hw",
43 NULL
44};
45
46/* Set default injection to SW_INJ */
47static enum injection_type inj_type = SW_INJ;
48
28#define MCE_INJECT_SET(reg) \ 49#define MCE_INJECT_SET(reg) \
29static int inj_##reg##_set(void *data, u64 val) \ 50static int inj_##reg##_set(void *data, u64 val) \
30{ \ 51{ \
@@ -79,24 +100,66 @@ static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
79 return err; 100 return err;
80} 101}
81 102
82static int flags_get(void *data, u64 *val) 103static int __set_inj(const char *buf)
83{ 104{
84 struct mce *m = (struct mce *)data; 105 int i;
85 106
86 *val = m->inject_flags; 107 for (i = 0; i < N_INJ_TYPES; i++) {
108 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
109 inj_type = i;
110 return 0;
111 }
112 }
113 return -EINVAL;
114}
87 115
88 return 0; 116static ssize_t flags_read(struct file *filp, char __user *ubuf,
117 size_t cnt, loff_t *ppos)
118{
119 char buf[MAX_FLAG_OPT_SIZE];
120 int n;
121
122 n = sprintf(buf, "%s\n", flags_options[inj_type]);
123
124 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
89} 125}
90 126
91static int flags_set(void *data, u64 val) 127static ssize_t flags_write(struct file *filp, const char __user *ubuf,
128 size_t cnt, loff_t *ppos)
92{ 129{
93 struct mce *m = (struct mce *)data; 130 char buf[MAX_FLAG_OPT_SIZE], *__buf;
131 int err;
132 size_t ret;
94 133
95 m->inject_flags = (u8)val; 134 if (cnt > MAX_FLAG_OPT_SIZE)
96 return 0; 135 cnt = MAX_FLAG_OPT_SIZE;
136
137 ret = cnt;
138
139 if (copy_from_user(&buf, ubuf, cnt))
140 return -EFAULT;
141
142 buf[cnt - 1] = 0;
143
144 /* strip whitespace */
145 __buf = strstrip(buf);
146
147 err = __set_inj(__buf);
148 if (err) {
149 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
150 return err;
151 }
152
153 *ppos += ret;
154
155 return ret;
97} 156}
98 157
99DEFINE_SIMPLE_ATTRIBUTE(flags_fops, flags_get, flags_set, "%llu\n"); 158static const struct file_operations flags_fops = {
159 .read = flags_read,
160 .write = flags_write,
161 .llseek = generic_file_llseek,
162};
100 163
101/* 164/*
102 * On which CPU to inject? 165 * On which CPU to inject?
@@ -128,21 +191,24 @@ static void do_inject(void)
128 unsigned int cpu = i_mce.extcpu; 191 unsigned int cpu = i_mce.extcpu;
129 u8 b = i_mce.bank; 192 u8 b = i_mce.bank;
130 193
131 if (!(i_mce.inject_flags & MCJ_EXCEPTION)) { 194 if (i_mce.misc)
195 i_mce.status |= MCI_STATUS_MISCV;
196
197 if (inj_type == SW_INJ) {
132 amd_decode_mce(NULL, 0, &i_mce); 198 amd_decode_mce(NULL, 0, &i_mce);
133 return; 199 return;
134 } 200 }
135 201
136 get_online_cpus();
137 if (!cpu_online(cpu))
138 goto err;
139
140 /* prep MCE global settings for the injection */ 202 /* prep MCE global settings for the injection */
141 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; 203 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
142 204
143 if (!(i_mce.status & MCI_STATUS_PCC)) 205 if (!(i_mce.status & MCI_STATUS_PCC))
144 mcg_status |= MCG_STATUS_RIPV; 206 mcg_status |= MCG_STATUS_RIPV;
145 207
208 get_online_cpus();
209 if (!cpu_online(cpu))
210 goto err;
211
146 toggle_hw_mce_inject(cpu, true); 212 toggle_hw_mce_inject(cpu, true);
147 213
148 wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, 214 wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
@@ -174,11 +240,9 @@ static int inj_bank_set(void *data, u64 val)
174{ 240{
175 struct mce *m = (struct mce *)data; 241 struct mce *m = (struct mce *)data;
176 242
177 if (val > 5) { 243 if (val >= n_banks) {
178 if (boot_cpu_data.x86 != 0x15 || val > 6) { 244 pr_err("Non-existent MCE bank: %llu\n", val);
179 pr_err("Non-existent MCE bank: %llu\n", val); 245 return -EINVAL;
180 return -EINVAL;
181 }
182 } 246 }
183 247
184 m->bank = val; 248 m->bank = val;
@@ -187,32 +251,81 @@ static int inj_bank_set(void *data, u64 val)
187 return 0; 251 return 0;
188} 252}
189 253
190static int inj_bank_get(void *data, u64 *val) 254MCE_INJECT_GET(bank);
191{
192 struct mce *m = (struct mce *)data;
193 255
194 *val = m->bank; 256DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
195 return 0; 257
258static const char readme_msg[] =
259"Description of the files and their usages:\n"
260"\n"
261"Note1: i refers to the bank number below.\n"
262"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
263"as they mirror the hardware registers.\n"
264"\n"
265"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
266"\t attributes of the error which caused the MCE.\n"
267"\n"
268"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
269"\t used for error thresholding purposes and its validity is indicated by\n"
270"\t MCi_STATUS[MiscV].\n"
271"\n"
272"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
273"\t associated with the error.\n"
274"\n"
275"cpu:\t The CPU to inject the error on.\n"
276"\n"
277"bank:\t Specify the bank you want to inject the error into: the number of\n"
278"\t banks in a processor varies and is family/model-specific, therefore, the\n"
279"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
280"\t injection.\n"
281"\n"
282"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
283"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
284"\t for AMD processors.\n"
285"\n"
286"\t Allowed error injection types:\n"
287"\t - \"sw\": Software error injection. Decode error to a human-readable \n"
288"\t format only. Safe to use.\n"
289"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
290"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
291"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
292"\t before injecting.\n"
293"\n";
294
295static ssize_t
296inj_readme_read(struct file *filp, char __user *ubuf,
297 size_t cnt, loff_t *ppos)
298{
299 return simple_read_from_buffer(ubuf, cnt, ppos,
300 readme_msg, strlen(readme_msg));
196} 301}
197 302
198DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n"); 303static const struct file_operations readme_fops = {
304 .read = inj_readme_read,
305};
199 306
200static struct dfs_node { 307static struct dfs_node {
201 char *name; 308 char *name;
202 struct dentry *d; 309 struct dentry *d;
203 const struct file_operations *fops; 310 const struct file_operations *fops;
311 umode_t perm;
204} dfs_fls[] = { 312} dfs_fls[] = {
205 { .name = "status", .fops = &status_fops }, 313 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
206 { .name = "misc", .fops = &misc_fops }, 314 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
207 { .name = "addr", .fops = &addr_fops }, 315 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
208 { .name = "bank", .fops = &bank_fops }, 316 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
209 { .name = "flags", .fops = &flags_fops }, 317 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
210 { .name = "cpu", .fops = &extcpu_fops }, 318 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
319 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
211}; 320};
212 321
213static int __init init_mce_inject(void) 322static int __init init_mce_inject(void)
214{ 323{
215 int i; 324 int i;
325 u64 cap;
326
327 rdmsrl(MSR_IA32_MCG_CAP, cap);
328 n_banks = cap & MCG_BANKCNT_MASK;
216 329
217 dfs_inj = debugfs_create_dir("mce-inject", NULL); 330 dfs_inj = debugfs_create_dir("mce-inject", NULL);
218 if (!dfs_inj) 331 if (!dfs_inj)
@@ -220,7 +333,7 @@ static int __init init_mce_inject(void)
220 333
221 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { 334 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
222 dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, 335 dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
223 S_IRUSR | S_IWUSR, 336 dfs_fls[i].perm,
224 dfs_inj, 337 dfs_inj,
225 &i_mce, 338 &i_mce,
226 dfs_fls[i].fops); 339 dfs_fls[i].fops);
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 68bf234bdfe6..23ef8e9f2c9a 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -811,6 +811,8 @@ static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc,
811 } 811 }
812} 812}
813 813
814#define make64(high, low) (((u64)(high) << 32) | (low))
815
814static void mpc85xx_mc_check(struct mem_ctl_info *mci) 816static void mpc85xx_mc_check(struct mem_ctl_info *mci)
815{ 817{
816 struct mpc85xx_mc_pdata *pdata = mci->pvt_info; 818 struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
@@ -818,7 +820,7 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
818 u32 bus_width; 820 u32 bus_width;
819 u32 err_detect; 821 u32 err_detect;
820 u32 syndrome; 822 u32 syndrome;
821 u32 err_addr; 823 u64 err_addr;
822 u32 pfn; 824 u32 pfn;
823 int row_index; 825 int row_index;
824 u32 cap_high; 826 u32 cap_high;
@@ -849,7 +851,9 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
849 else 851 else
850 syndrome &= 0xffff; 852 syndrome &= 0xffff;
851 853
852 err_addr = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS); 854 err_addr = make64(
855 in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_EXT_ADDRESS),
856 in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS));
853 pfn = err_addr >> PAGE_SHIFT; 857 pfn = err_addr >> PAGE_SHIFT;
854 858
855 for (row_index = 0; row_index < mci->nr_csrows; row_index++) { 859 for (row_index = 0; row_index < mci->nr_csrows; row_index++) {
@@ -886,7 +890,7 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
886 mpc85xx_mc_printk(mci, KERN_ERR, 890 mpc85xx_mc_printk(mci, KERN_ERR,
887 "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", 891 "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
888 cap_high, cap_low, syndrome); 892 cap_high, cap_low, syndrome);
889 mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8x\n", err_addr); 893 mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr);
890 mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); 894 mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn);
891 895
892 /* we are out of range */ 896 /* we are out of range */
diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h
index 4498baf9ce05..9352e88d53e5 100644
--- a/drivers/edac/mpc85xx_edac.h
+++ b/drivers/edac/mpc85xx_edac.h
@@ -43,6 +43,7 @@
43#define MPC85XX_MC_ERR_INT_EN 0x0e48 43#define MPC85XX_MC_ERR_INT_EN 0x0e48
44#define MPC85XX_MC_CAPTURE_ATRIBUTES 0x0e4c 44#define MPC85XX_MC_CAPTURE_ATRIBUTES 0x0e4c
45#define MPC85XX_MC_CAPTURE_ADDRESS 0x0e50 45#define MPC85XX_MC_CAPTURE_ADDRESS 0x0e50
46#define MPC85XX_MC_CAPTURE_EXT_ADDRESS 0x0e54
46#define MPC85XX_MC_ERR_SBE 0x0e58 47#define MPC85XX_MC_ERR_SBE 0x0e58
47 48
48#define DSC_MEM_EN 0x80000000 49#define DSC_MEM_EN 0x80000000
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
new file mode 100644
index 000000000000..14636e4b6a08
--- /dev/null
+++ b/drivers/edac/xgene_edac.c
@@ -0,0 +1,1215 @@
1/*
2 * APM X-Gene SoC EDAC (error detection and correction)
3 *
4 * Copyright (c) 2015, Applied Micro Circuits Corporation
5 * Author: Feng Kan <fkan@apm.com>
6 * Loc Ho <lho@apm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <linux/ctype.h>
23#include <linux/edac.h>
24#include <linux/interrupt.h>
25#include <linux/mfd/syscon.h>
26#include <linux/module.h>
27#include <linux/of.h>
28#include <linux/of_address.h>
29#include <linux/regmap.h>
30
31#include "edac_core.h"
32
33#define EDAC_MOD_STR "xgene_edac"
34
35/* Global error configuration status registers (CSR) */
36#define PCPHPERRINTSTS 0x0000
37#define PCPHPERRINTMSK 0x0004
38#define MCU_CTL_ERR_MASK BIT(12)
39#define IOB_PA_ERR_MASK BIT(11)
40#define IOB_BA_ERR_MASK BIT(10)
41#define IOB_XGIC_ERR_MASK BIT(9)
42#define IOB_RB_ERR_MASK BIT(8)
43#define L3C_UNCORR_ERR_MASK BIT(5)
44#define MCU_UNCORR_ERR_MASK BIT(4)
45#define PMD3_MERR_MASK BIT(3)
46#define PMD2_MERR_MASK BIT(2)
47#define PMD1_MERR_MASK BIT(1)
48#define PMD0_MERR_MASK BIT(0)
49#define PCPLPERRINTSTS 0x0008
50#define PCPLPERRINTMSK 0x000C
51#define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
52#define L3C_CORR_ERR_MASK BIT(1)
53#define MCU_CORR_ERR_MASK BIT(0)
54#define MEMERRINTSTS 0x0010
55#define MEMERRINTMSK 0x0014
56
57struct xgene_edac {
58 struct device *dev;
59 struct regmap *csw_map;
60 struct regmap *mcba_map;
61 struct regmap *mcbb_map;
62 struct regmap *efuse_map;
63 void __iomem *pcp_csr;
64 spinlock_t lock;
65 struct dentry *dfs;
66
67 struct list_head mcus;
68 struct list_head pmds;
69
70 struct mutex mc_lock;
71 int mc_active_mask;
72 int mc_registered_mask;
73};
74
75static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
76{
77 *val = readl(edac->pcp_csr + reg);
78}
79
80static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
81 u32 bits_mask)
82{
83 u32 val;
84
85 spin_lock(&edac->lock);
86 val = readl(edac->pcp_csr + reg);
87 val &= ~bits_mask;
88 writel(val, edac->pcp_csr + reg);
89 spin_unlock(&edac->lock);
90}
91
92static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
93 u32 bits_mask)
94{
95 u32 val;
96
97 spin_lock(&edac->lock);
98 val = readl(edac->pcp_csr + reg);
99 val |= bits_mask;
100 writel(val, edac->pcp_csr + reg);
101 spin_unlock(&edac->lock);
102}
103
104/* Memory controller error CSR */
105#define MCU_MAX_RANK 8
106#define MCU_RANK_STRIDE 0x40
107
108#define MCUGECR 0x0110
109#define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
110#define MCU_GECR_BACKUCINTREN_MASK BIT(1)
111#define MCU_GECR_CINTREN_MASK BIT(2)
112#define MUC_GECR_MCUADDRERREN_MASK BIT(9)
113#define MCUGESR 0x0114
114#define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
115#define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
116#define MCU_GESR_PHYP_ERR_MASK BIT(3)
117#define MCUESRR0 0x0314
118#define MCU_ESRR_MULTUCERR_MASK BIT(3)
119#define MCU_ESRR_BACKUCERR_MASK BIT(2)
120#define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
121#define MCU_ESRR_CERR_MASK BIT(0)
122#define MCUESRRA0 0x0318
123#define MCUEBLRR0 0x031c
124#define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
125#define MCUERCRR0 0x0320
126#define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
127#define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
128#define MCUSBECNT0 0x0324
129#define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
130
131#define CSW_CSWCR 0x0000
132#define CSW_CSWCR_DUALMCB_MASK BIT(0)
133
134#define MCBADDRMR 0x0000
135#define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
136#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
137#define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
138#define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
139
140struct xgene_edac_mc_ctx {
141 struct list_head next;
142 char *name;
143 struct mem_ctl_info *mci;
144 struct xgene_edac *edac;
145 void __iomem *mcu_csr;
146 u32 mcu_id;
147};
148
149static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
150 const char __user *data,
151 size_t count, loff_t *ppos)
152{
153 struct mem_ctl_info *mci = file->private_data;
154 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
155 int i;
156
157 for (i = 0; i < MCU_MAX_RANK; i++) {
158 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
159 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
160 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
161 }
162 return count;
163}
164
165static const struct file_operations xgene_edac_mc_debug_inject_fops = {
166 .open = simple_open,
167 .write = xgene_edac_mc_err_inject_write,
168 .llseek = generic_file_llseek,
169};
170
171static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
172{
173 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
174 return;
175#ifdef CONFIG_EDAC_DEBUG
176 if (!mci->debugfs)
177 return;
178 debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
179 &xgene_edac_mc_debug_inject_fops);
180#endif
181}
182
183static void xgene_edac_mc_check(struct mem_ctl_info *mci)
184{
185 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
186 unsigned int pcp_hp_stat;
187 unsigned int pcp_lp_stat;
188 u32 reg;
189 u32 rank;
190 u32 bank;
191 u32 count;
192 u32 col_row;
193
194 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
195 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
196 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
197 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
198 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
199 return;
200
201 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
202 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
203
204 /* Detect uncorrectable memory error */
205 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
206 MCU_ESRR_BACKUCERR_MASK)) {
207 /* Detected uncorrectable memory error */
208 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
209 "MCU uncorrectable error at rank %d\n", rank);
210
211 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
212 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
213 }
214
215 /* Detect correctable memory error */
216 if (reg & MCU_ESRR_CERR_MASK) {
217 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
218 rank * MCU_RANK_STRIDE);
219 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
220 rank * MCU_RANK_STRIDE);
221 count = readl(ctx->mcu_csr + MCUSBECNT0 +
222 rank * MCU_RANK_STRIDE);
223 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
224 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
225 rank, MCU_EBLRR_ERRBANK_RD(bank),
226 MCU_ERCRR_ERRCOL_RD(col_row),
227 MCU_ERCRR_ERRROW_RD(col_row),
228 MCU_SBECNT_COUNT(count));
229
230 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
231 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
232 }
233
234 /* Clear all error registers */
235 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
236 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
237 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
238 rank * MCU_RANK_STRIDE);
239 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
240 }
241
242 /* Detect memory controller error */
243 reg = readl(ctx->mcu_csr + MCUGESR);
244 if (reg) {
245 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
246 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
247 "MCU address miss-match error\n");
248 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
249 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
250 "MCU address multi-match error\n");
251
252 writel(reg, ctx->mcu_csr + MCUGESR);
253 }
254}
255
256static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
257{
258 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
259 unsigned int val;
260
261 if (edac_op_state != EDAC_OPSTATE_INT)
262 return;
263
264 mutex_lock(&ctx->edac->mc_lock);
265
266 /*
267 * As there is only single bit for enable error and interrupt mask,
268 * we must only enable top level interrupt after all MCUs are
269 * registered. Otherwise, if there is an error and the corresponding
270 * MCU has not registered, the interrupt will never get cleared. To
271 * determine all MCU have registered, we will keep track of active
272 * MCUs and registered MCUs.
273 */
274 if (enable) {
275 /* Set registered MCU bit */
276 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
277
278 /* Enable interrupt after all active MCU registered */
279 if (ctx->edac->mc_registered_mask ==
280 ctx->edac->mc_active_mask) {
281 /* Enable memory controller top level interrupt */
282 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
283 MCU_UNCORR_ERR_MASK |
284 MCU_CTL_ERR_MASK);
285 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
286 MCU_CORR_ERR_MASK);
287 }
288
289 /* Enable MCU interrupt and error reporting */
290 val = readl(ctx->mcu_csr + MCUGECR);
291 val |= MCU_GECR_DEMANDUCINTREN_MASK |
292 MCU_GECR_BACKUCINTREN_MASK |
293 MCU_GECR_CINTREN_MASK |
294 MUC_GECR_MCUADDRERREN_MASK;
295 writel(val, ctx->mcu_csr + MCUGECR);
296 } else {
297 /* Disable MCU interrupt */
298 val = readl(ctx->mcu_csr + MCUGECR);
299 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
300 MCU_GECR_BACKUCINTREN_MASK |
301 MCU_GECR_CINTREN_MASK |
302 MUC_GECR_MCUADDRERREN_MASK);
303 writel(val, ctx->mcu_csr + MCUGECR);
304
305 /* Disable memory controller top level interrupt */
306 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
307 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
308 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
309 MCU_CORR_ERR_MASK);
310
311 /* Clear registered MCU bit */
312 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
313 }
314
315 mutex_unlock(&ctx->edac->mc_lock);
316}
317
318static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
319{
320 unsigned int reg;
321 u32 mcu_mask;
322
323 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
324 return 0;
325
326 if (reg & CSW_CSWCR_DUALMCB_MASK) {
327 /*
328 * Dual MCB active - Determine if all 4 active or just MCU0
329 * and MCU2 active
330 */
331 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
332 return 0;
333 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
334 } else {
335 /*
336 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
337 * active
338 */
339 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
340 return 0;
341 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
342 }
343
344 /* Save active MC mask if hasn't set already */
345 if (!ctx->edac->mc_active_mask)
346 ctx->edac->mc_active_mask = mcu_mask;
347
348 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
349}
350
351static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
352{
353 struct mem_ctl_info *mci;
354 struct edac_mc_layer layers[2];
355 struct xgene_edac_mc_ctx tmp_ctx;
356 struct xgene_edac_mc_ctx *ctx;
357 struct resource res;
358 int rc;
359
360 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
361 tmp_ctx.edac = edac;
362
363 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
364 return -ENOMEM;
365
366 rc = of_address_to_resource(np, 0, &res);
367 if (rc < 0) {
368 dev_err(edac->dev, "no MCU resource address\n");
369 goto err_group;
370 }
371 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
372 if (IS_ERR(tmp_ctx.mcu_csr)) {
373 dev_err(edac->dev, "unable to map MCU resource\n");
374 rc = PTR_ERR(tmp_ctx.mcu_csr);
375 goto err_group;
376 }
377
378 /* Ignore non-active MCU */
379 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
380 dev_err(edac->dev, "no memory-controller property\n");
381 rc = -ENODEV;
382 goto err_group;
383 }
384 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
385 rc = -ENODEV;
386 goto err_group;
387 }
388
389 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
390 layers[0].size = 4;
391 layers[0].is_virt_csrow = true;
392 layers[1].type = EDAC_MC_LAYER_CHANNEL;
393 layers[1].size = 2;
394 layers[1].is_virt_csrow = false;
395 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
396 sizeof(*ctx));
397 if (!mci) {
398 rc = -ENOMEM;
399 goto err_group;
400 }
401
402 ctx = mci->pvt_info;
403 *ctx = tmp_ctx; /* Copy over resource value */
404 ctx->name = "xgene_edac_mc_err";
405 ctx->mci = mci;
406 mci->pdev = &mci->dev;
407 mci->ctl_name = ctx->name;
408 mci->dev_name = ctx->name;
409
410 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
411 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
412 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
413 mci->edac_cap = EDAC_FLAG_SECDED;
414 mci->mod_name = EDAC_MOD_STR;
415 mci->mod_ver = "0.1";
416 mci->ctl_page_to_phys = NULL;
417 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
418 mci->scrub_mode = SCRUB_HW_SRC;
419
420 if (edac_op_state == EDAC_OPSTATE_POLL)
421 mci->edac_check = xgene_edac_mc_check;
422
423 if (edac_mc_add_mc(mci)) {
424 dev_err(edac->dev, "edac_mc_add_mc failed\n");
425 rc = -EINVAL;
426 goto err_free;
427 }
428
429 xgene_edac_mc_create_debugfs_node(mci);
430
431 list_add(&ctx->next, &edac->mcus);
432
433 xgene_edac_mc_irq_ctl(mci, true);
434
435 devres_remove_group(edac->dev, xgene_edac_mc_add);
436
437 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
438 return 0;
439
440err_free:
441 edac_mc_free(mci);
442err_group:
443 devres_release_group(edac->dev, xgene_edac_mc_add);
444 return rc;
445}
446
447static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
448{
449 xgene_edac_mc_irq_ctl(mcu->mci, false);
450 edac_mc_del_mc(&mcu->mci->dev);
451 edac_mc_free(mcu->mci);
452 return 0;
453}
454
455/* CPU L1/L2 error CSR */
456#define MAX_CPU_PER_PMD 2
457#define CPU_CSR_STRIDE 0x00100000
458#define CPU_L2C_PAGE 0x000D0000
459#define CPU_MEMERR_L2C_PAGE 0x000E0000
460#define CPU_MEMERR_CPU_PAGE 0x000F0000
461
462#define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
463#define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
464#define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
465#define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
466#define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
467#define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
468#define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
469#define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
470#define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
471#define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
472#define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
473#define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
474#define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
475#define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
476#define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
477#define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
478#define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
479#define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
480#define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
481#define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
482#define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
483#define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
484#define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
485#define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
486#define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
487#define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
488#define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
489#define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
490
491#define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
492#define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
493#define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
494#define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
495#define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
496#define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
497#define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
498#define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
499#define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
500#define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
501#define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
502#define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
503#define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
504#define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
505#define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
506#define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
507#define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
508#define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
509#define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
510#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
511#define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
512
513/*
514 * Processor Module Domain (PMD) context - Context for a pair of processsors.
515 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
516 * its own L1 cache.
517 */
518struct xgene_edac_pmd_ctx {
519 struct list_head next;
520 struct device ddev;
521 char *name;
522 struct xgene_edac *edac;
523 struct edac_device_ctl_info *edac_dev;
524 void __iomem *pmd_csr;
525 u32 pmd;
526 int version;
527};
528
529static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
530 int cpu_idx)
531{
532 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
533 void __iomem *pg_f;
534 u32 val;
535
536 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
537
538 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
539 if (val) {
540 dev_err(edac_dev->dev,
541 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
542 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
543 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
544 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
545 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
546 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
547 dev_err(edac_dev->dev,
548 "One or more correctable error\n");
549 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
550 dev_err(edac_dev->dev, "Multiple correctable error\n");
551 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
552 case 1:
553 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
554 break;
555 case 2:
556 dev_err(edac_dev->dev, "Way select multiple hit\n");
557 break;
558 case 3:
559 dev_err(edac_dev->dev, "Physical tag parity error\n");
560 break;
561 case 4:
562 case 5:
563 dev_err(edac_dev->dev, "L1 data parity error\n");
564 break;
565 case 6:
566 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
567 break;
568 }
569
570 /* Clear any HW errors */
571 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
572
573 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
574 MEMERR_CPU_ICFESR_MULTCERR_MASK))
575 edac_device_handle_ce(edac_dev, 0, 0,
576 edac_dev->ctl_name);
577 }
578
579 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
580 if (val) {
581 dev_err(edac_dev->dev,
582 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
583 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
584 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
585 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
586 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
587 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
588 dev_err(edac_dev->dev,
589 "One or more correctable error\n");
590 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
591 dev_err(edac_dev->dev, "Multiple correctable error\n");
592 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
593 case 0:
594 dev_err(edac_dev->dev, "Load tag error\n");
595 break;
596 case 1:
597 dev_err(edac_dev->dev, "Load data error\n");
598 break;
599 case 2:
600 dev_err(edac_dev->dev, "WSL multihit error\n");
601 break;
602 case 3:
603 dev_err(edac_dev->dev, "Store tag error\n");
604 break;
605 case 4:
606 dev_err(edac_dev->dev,
607 "DTB multihit from load pipeline error\n");
608 break;
609 case 5:
610 dev_err(edac_dev->dev,
611 "DTB multihit from store pipeline error\n");
612 break;
613 }
614
615 /* Clear any HW errors */
616 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
617
618 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
619 MEMERR_CPU_LSUESR_MULTCERR_MASK))
620 edac_device_handle_ce(edac_dev, 0, 0,
621 edac_dev->ctl_name);
622 }
623
624 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
625 if (val) {
626 dev_err(edac_dev->dev,
627 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
628 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
629 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
630 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
631 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
632 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" :
633 "ICF");
634 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
635 dev_err(edac_dev->dev,
636 "One or more correctable error\n");
637 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
638 dev_err(edac_dev->dev, "Multiple correctable error\n");
639 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
640 case 0:
641 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
642 break;
643 case 1:
644 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
645 break;
646 case 2:
647 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
648 break;
649 case 3:
650 dev_err(edac_dev->dev,
651 "TMO operation single bank error\n");
652 break;
653 case 4:
654 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
655 break;
656 case 5:
657 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
658 break;
659 case 6:
660 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
661 break;
662 case 7:
663 dev_err(edac_dev->dev,
664 "TMO operation multiple bank error\n");
665 break;
666 }
667
668 /* Clear any HW errors */
669 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
670
671 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
672 }
673}
674
675static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
676{
677 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
678 void __iomem *pg_d;
679 void __iomem *pg_e;
680 u32 val_hi;
681 u32 val_lo;
682 u32 val;
683
684 /* Check L2 */
685 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
686 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
687 if (val) {
688 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
689 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
690 dev_err(edac_dev->dev,
691 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
692 ctx->pmd, val, val_hi, val_lo);
693 dev_err(edac_dev->dev,
694 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
695 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
696 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
697 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
698 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
699 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
700
701 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
702 dev_err(edac_dev->dev,
703 "One or more correctable error\n");
704 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
705 dev_err(edac_dev->dev, "Multiple correctable error\n");
706 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
707 dev_err(edac_dev->dev,
708 "One or more uncorrectable error\n");
709 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
710 dev_err(edac_dev->dev,
711 "Multiple uncorrectable error\n");
712
713 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
714 case 0:
715 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
716 break;
717 case 1:
718 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
719 break;
720 case 2:
721 dev_err(edac_dev->dev, "Tag ECC error\n");
722 break;
723 case 3:
724 dev_err(edac_dev->dev, "Data ECC error\n");
725 break;
726 }
727
728 /* Clear any HW errors */
729 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
730
731 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
732 MEMERR_L2C_L2ESR_MULTICERR_MASK))
733 edac_device_handle_ce(edac_dev, 0, 0,
734 edac_dev->ctl_name);
735 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
736 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
737 edac_device_handle_ue(edac_dev, 0, 0,
738 edac_dev->ctl_name);
739 }
740
741 /* Check if any memory request timed out on L2 cache */
742 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
743 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
744 if (val) {
745 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
746 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
747 dev_err(edac_dev->dev,
748 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
749 ctx->pmd, val, val_hi, val_lo);
750 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
751 }
752}
753
754static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
755{
756 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
757 unsigned int pcp_hp_stat;
758 int i;
759
760 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
761 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
762 return;
763
764 /* Check CPU L1 error */
765 for (i = 0; i < MAX_CPU_PER_PMD; i++)
766 xgene_edac_pmd_l1_check(edac_dev, i);
767
768 /* Check CPU L2 error */
769 xgene_edac_pmd_l2_check(edac_dev);
770}
771
772static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
773 int cpu)
774{
775 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
776 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
777 CPU_MEMERR_CPU_PAGE;
778
779 /*
780 * Enable CPU memory error:
781 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
782 */
783 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
784 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
785 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
786}
787
788static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
789{
790 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
791 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
792 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
793
794 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
795 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
796 /* Configure L2C HW request time out feature if supported */
797 if (ctx->version > 1)
798 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
799}
800
801static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
802 bool enable)
803{
804 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
805 int i;
806
807 /* Enable PMD error interrupt */
808 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
809 if (enable)
810 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
811 PMD0_MERR_MASK << ctx->pmd);
812 else
813 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
814 PMD0_MERR_MASK << ctx->pmd);
815 }
816
817 if (enable) {
818 xgene_edac_pmd_hw_cfg(edac_dev);
819
820 /* Two CPUs per a PMD */
821 for (i = 0; i < MAX_CPU_PER_PMD; i++)
822 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
823 }
824}
825
826static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
827 const char __user *data,
828 size_t count, loff_t *ppos)
829{
830 struct edac_device_ctl_info *edac_dev = file->private_data;
831 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
832 void __iomem *cpux_pg_f;
833 int i;
834
835 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
836 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
837 CPU_MEMERR_CPU_PAGE;
838
839 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
840 MEMERR_CPU_ICFESR_CERR_MASK,
841 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
842 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
843 MEMERR_CPU_LSUESR_CERR_MASK,
844 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
845 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
846 MEMERR_CPU_MMUESR_CERR_MASK,
847 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
848 }
849 return count;
850}
851
852static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
853 const char __user *data,
854 size_t count, loff_t *ppos)
855{
856 struct edac_device_ctl_info *edac_dev = file->private_data;
857 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
858 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
859
860 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
861 MEMERR_L2C_L2ESR_MULTICERR_MASK |
862 MEMERR_L2C_L2ESR_UCERR_MASK |
863 MEMERR_L2C_L2ESR_ERR_MASK,
864 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
865 return count;
866}
867
868static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
869 {
870 .open = simple_open,
871 .write = xgene_edac_pmd_l1_inject_ctrl_write,
872 .llseek = generic_file_llseek, },
873 {
874 .open = simple_open,
875 .write = xgene_edac_pmd_l2_inject_ctrl_write,
876 .llseek = generic_file_llseek, },
877 { }
878};
879
880static void xgene_edac_pmd_create_debugfs_nodes(
881 struct edac_device_ctl_info *edac_dev)
882{
883 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
884 struct dentry *edac_debugfs;
885 char name[30];
886
887 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
888 return;
889
890 /*
891 * Todo: Switch to common EDAC debug file system for edac device
892 * when available.
893 */
894 if (!ctx->edac->dfs) {
895 ctx->edac->dfs = debugfs_create_dir(edac_dev->dev->kobj.name,
896 NULL);
897 if (!ctx->edac->dfs)
898 return;
899 }
900 sprintf(name, "PMD%d", ctx->pmd);
901 edac_debugfs = debugfs_create_dir(name, ctx->edac->dfs);
902 if (!edac_debugfs)
903 return;
904
905 debugfs_create_file("l1_inject_ctrl", S_IWUSR, edac_debugfs, edac_dev,
906 &xgene_edac_pmd_debug_inject_fops[0]);
907 debugfs_create_file("l2_inject_ctrl", S_IWUSR, edac_debugfs, edac_dev,
908 &xgene_edac_pmd_debug_inject_fops[1]);
909}
910
911static int xgene_edac_pmd_available(u32 efuse, int pmd)
912{
913 return (efuse & (1 << pmd)) ? 0 : 1;
914}
915
916static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
917 int version)
918{
919 struct edac_device_ctl_info *edac_dev;
920 struct xgene_edac_pmd_ctx *ctx;
921 struct resource res;
922 char edac_name[10];
923 u32 pmd;
924 int rc;
925 u32 val;
926
927 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
928 return -ENOMEM;
929
930 /* Determine if this PMD is disabled */
931 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
932 dev_err(edac->dev, "no pmd-controller property\n");
933 rc = -ENODEV;
934 goto err_group;
935 }
936 rc = regmap_read(edac->efuse_map, 0, &val);
937 if (rc)
938 goto err_group;
939 if (!xgene_edac_pmd_available(val, pmd)) {
940 rc = -ENODEV;
941 goto err_group;
942 }
943
944 sprintf(edac_name, "l2c%d", pmd);
945 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
946 edac_name, 1, "l2c", 1, 2, NULL,
947 0, edac_device_alloc_index());
948 if (!edac_dev) {
949 rc = -ENOMEM;
950 goto err_group;
951 }
952
953 ctx = edac_dev->pvt_info;
954 ctx->name = "xgene_pmd_err";
955 ctx->pmd = pmd;
956 ctx->edac = edac;
957 ctx->edac_dev = edac_dev;
958 ctx->ddev = *edac->dev;
959 ctx->version = version;
960 edac_dev->dev = &ctx->ddev;
961 edac_dev->ctl_name = ctx->name;
962 edac_dev->dev_name = ctx->name;
963 edac_dev->mod_name = EDAC_MOD_STR;
964
965 rc = of_address_to_resource(np, 0, &res);
966 if (rc < 0) {
967 dev_err(edac->dev, "no PMD resource address\n");
968 goto err_free;
969 }
970 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
971 if (IS_ERR(ctx->pmd_csr)) {
972 dev_err(edac->dev,
973 "devm_ioremap_resource failed for PMD resource address\n");
974 rc = PTR_ERR(ctx->pmd_csr);
975 goto err_free;
976 }
977
978 if (edac_op_state == EDAC_OPSTATE_POLL)
979 edac_dev->edac_check = xgene_edac_pmd_check;
980
981 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
982
983 rc = edac_device_add_device(edac_dev);
984 if (rc > 0) {
985 dev_err(edac->dev, "edac_device_add_device failed\n");
986 rc = -ENOMEM;
987 goto err_free;
988 }
989
990 if (edac_op_state == EDAC_OPSTATE_INT)
991 edac_dev->op_state = OP_RUNNING_INTERRUPT;
992
993 list_add(&ctx->next, &edac->pmds);
994
995 xgene_edac_pmd_hw_ctl(edac_dev, 1);
996
997 devres_remove_group(edac->dev, xgene_edac_pmd_add);
998
999 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
1000 return 0;
1001
1002err_free:
1003 edac_device_free_ctl_info(edac_dev);
1004err_group:
1005 devres_release_group(edac->dev, xgene_edac_pmd_add);
1006 return rc;
1007}
1008
1009static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
1010{
1011 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
1012
1013 xgene_edac_pmd_hw_ctl(edac_dev, 0);
1014 edac_device_del_device(edac_dev->dev);
1015 edac_device_free_ctl_info(edac_dev);
1016 return 0;
1017}
1018
1019static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1020{
1021 struct xgene_edac *ctx = dev_id;
1022 struct xgene_edac_pmd_ctx *pmd;
1023 unsigned int pcp_hp_stat;
1024 unsigned int pcp_lp_stat;
1025
1026 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1027 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1028 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1029 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1030 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1031 struct xgene_edac_mc_ctx *mcu;
1032
1033 list_for_each_entry(mcu, &ctx->mcus, next) {
1034 xgene_edac_mc_check(mcu->mci);
1035 }
1036 }
1037
1038 list_for_each_entry(pmd, &ctx->pmds, next) {
1039 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1040 xgene_edac_pmd_check(pmd->edac_dev);
1041 }
1042
1043 return IRQ_HANDLED;
1044}
1045
1046static int xgene_edac_probe(struct platform_device *pdev)
1047{
1048 struct xgene_edac *edac;
1049 struct device_node *child;
1050 struct resource *res;
1051 int rc;
1052
1053 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1054 if (!edac)
1055 return -ENOMEM;
1056
1057 edac->dev = &pdev->dev;
1058 platform_set_drvdata(pdev, edac);
1059 INIT_LIST_HEAD(&edac->mcus);
1060 INIT_LIST_HEAD(&edac->pmds);
1061 spin_lock_init(&edac->lock);
1062 mutex_init(&edac->mc_lock);
1063
1064 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1065 "regmap-csw");
1066 if (IS_ERR(edac->csw_map)) {
1067 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1068 rc = PTR_ERR(edac->csw_map);
1069 goto out_err;
1070 }
1071
1072 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1073 "regmap-mcba");
1074 if (IS_ERR(edac->mcba_map)) {
1075 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1076 rc = PTR_ERR(edac->mcba_map);
1077 goto out_err;
1078 }
1079
1080 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1081 "regmap-mcbb");
1082 if (IS_ERR(edac->mcbb_map)) {
1083 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1084 rc = PTR_ERR(edac->mcbb_map);
1085 goto out_err;
1086 }
1087 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1088 "regmap-efuse");
1089 if (IS_ERR(edac->efuse_map)) {
1090 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1091 rc = PTR_ERR(edac->efuse_map);
1092 goto out_err;
1093 }
1094
1095 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1096 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1097 if (IS_ERR(edac->pcp_csr)) {
1098 dev_err(&pdev->dev, "no PCP resource address\n");
1099 rc = PTR_ERR(edac->pcp_csr);
1100 goto out_err;
1101 }
1102
1103 if (edac_op_state == EDAC_OPSTATE_INT) {
1104 int irq;
1105 int i;
1106
1107 for (i = 0; i < 3; i++) {
1108 irq = platform_get_irq(pdev, i);
1109 if (irq < 0) {
1110 dev_err(&pdev->dev, "No IRQ resource\n");
1111 rc = -EINVAL;
1112 goto out_err;
1113 }
1114 rc = devm_request_irq(&pdev->dev, irq,
1115 xgene_edac_isr, IRQF_SHARED,
1116 dev_name(&pdev->dev), edac);
1117 if (rc) {
1118 dev_err(&pdev->dev,
1119 "Could not request IRQ %d\n", irq);
1120 goto out_err;
1121 }
1122 }
1123 }
1124
1125 for_each_child_of_node(pdev->dev.of_node, child) {
1126 if (!of_device_is_available(child))
1127 continue;
1128 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1129 xgene_edac_mc_add(edac, child);
1130 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1131 xgene_edac_pmd_add(edac, child, 1);
1132 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1133 xgene_edac_pmd_add(edac, child, 2);
1134 }
1135
1136 return 0;
1137
1138out_err:
1139 return rc;
1140}
1141
1142static int xgene_edac_remove(struct platform_device *pdev)
1143{
1144 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1145 struct xgene_edac_mc_ctx *mcu;
1146 struct xgene_edac_mc_ctx *temp_mcu;
1147 struct xgene_edac_pmd_ctx *pmd;
1148 struct xgene_edac_pmd_ctx *temp_pmd;
1149
1150 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next) {
1151 xgene_edac_mc_remove(mcu);
1152 }
1153
1154 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next) {
1155 xgene_edac_pmd_remove(pmd);
1156 }
1157 return 0;
1158}
1159
1160static const struct of_device_id xgene_edac_of_match[] = {
1161 { .compatible = "apm,xgene-edac" },
1162 {},
1163};
1164MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1165
1166static struct platform_driver xgene_edac_driver = {
1167 .probe = xgene_edac_probe,
1168 .remove = xgene_edac_remove,
1169 .driver = {
1170 .name = "xgene-edac",
1171 .owner = THIS_MODULE,
1172 .of_match_table = xgene_edac_of_match,
1173 },
1174};
1175
1176static int __init xgene_edac_init(void)
1177{
1178 int rc;
1179
1180 /* Make sure error reporting method is sane */
1181 switch (edac_op_state) {
1182 case EDAC_OPSTATE_POLL:
1183 case EDAC_OPSTATE_INT:
1184 break;
1185 default:
1186 edac_op_state = EDAC_OPSTATE_INT;
1187 break;
1188 }
1189
1190 rc = platform_driver_register(&xgene_edac_driver);
1191 if (rc) {
1192 edac_printk(KERN_ERR, EDAC_MOD_STR,
1193 "EDAC fails to register\n");
1194 goto reg_failed;
1195 }
1196
1197 return 0;
1198
1199reg_failed:
1200 return rc;
1201}
1202module_init(xgene_edac_init);
1203
1204static void __exit xgene_edac_exit(void)
1205{
1206 platform_driver_unregister(&xgene_edac_driver);
1207}
1208module_exit(xgene_edac_exit);
1209
1210MODULE_LICENSE("GPL");
1211MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
1212MODULE_DESCRIPTION("APM X-Gene EDAC driver");
1213module_param(edac_op_state, int, 0444);
1214MODULE_PARM_DESC(edac_op_state,
1215 "EDAC error reporting state: 0=Poll, 2=Interrupt");