aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChannagoud Kadabi <ckadabi@codeaurora.org>2018-09-12 14:06:34 -0400
committerAndy Gross <andy.gross@linaro.org>2018-09-13 16:54:05 -0400
commit27450653f1db0b9d5b5048a246c850c52ee4aa61 (patch)
tree19e4535d7999ee456f897a33cf2bb293a1c92b54
parentc081f3060fab316fcf103967a24e502d58488849 (diff)
drivers: edac: Add EDAC driver support for QCOM SoCs
Add error reporting driver for Single Bit Errors (SBEs) and Double Bit Errors (DBEs). As of now, this driver supports error reporting for Last Level Cache Controller (LLCC) of Tag RAM and Data RAM. Interrupts are triggered when the errors happen in the cache, the driver handles those interrupts and dumps the syndrome registers. Signed-off-by: Channagoud Kadabi <ckadabi@codeaurora.org> Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org> Co-developed-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org> Acked-by: Borislav Petkov <bp@suse.de> Signed-off-by: Andy Gross <andy.gross@linaro.org>
-rw-r--r--MAINTAINERS8
-rw-r--r--drivers/edac/Kconfig14
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/qcom_edac.c414
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h24
5 files changed, 461 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index a5b256b25905..f7d7213ca293 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5346,6 +5346,14 @@ L: linux-edac@vger.kernel.org
5346S: Maintained 5346S: Maintained
5347F: drivers/edac/ti_edac.c 5347F: drivers/edac/ti_edac.c
5348 5348
5349EDAC-QCOM
5350M: Channagoud Kadabi <ckadabi@codeaurora.org>
5351M: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
5352L: linux-arm-msm@vger.kernel.org
5353L: linux-edac@vger.kernel.org
5354S: Maintained
5355F: drivers/edac/qcom_edac.c
5356
5349EDIROL UA-101/UA-1000 DRIVER 5357EDIROL UA-101/UA-1000 DRIVER
5350M: Clemens Ladisch <clemens@ladisch.de> 5358M: Clemens Ladisch <clemens@ladisch.de>
5351L: alsa-devel@alsa-project.org (moderated for non-subscribers) 5359L: alsa-devel@alsa-project.org (moderated for non-subscribers)
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2e989f..df9467eef32a 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -460,4 +460,18 @@ config EDAC_TI
460 Support for error detection and correction on the 460 Support for error detection and correction on the
461 TI SoCs. 461 TI SoCs.
462 462
463config EDAC_QCOM
464 tristate "QCOM EDAC Controller"
465 depends on ARCH_QCOM && QCOM_LLCC
466 help
467 Support for error detection and correction on the
468 Qualcomm Technologies, Inc. SoCs.
469
470 This driver reports Single Bit Errors (SBEs) and Double Bit Errors (DBEs).
471 As of now, it supports error reporting for Last Level Cache Controller (LLCC)
472 of Tag RAM and Data RAM.
473
474 For debugging issues having to do with stability and overall system
475 health, you should probably say 'Y' here.
476
463endif # EDAC 477endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 02b43a7d8c3e..716096d08ea0 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -77,3 +77,4 @@ obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o
77obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o 77obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
78obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o 78obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
79obj-$(CONFIG_EDAC_TI) += ti_edac.o 79obj-$(CONFIG_EDAC_TI) += ti_edac.o
80obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
new file mode 100644
index 000000000000..82bd775124f2
--- /dev/null
+++ b/drivers/edac/qcom_edac.c
@@ -0,0 +1,414 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2018, The Linux Foundation. All rights reserved.
4 */
5
6#include <linux/edac.h>
7#include <linux/interrupt.h>
8#include <linux/kernel.h>
9#include <linux/of.h>
10#include <linux/platform_device.h>
11#include <linux/regmap.h>
12#include <linux/soc/qcom/llcc-qcom.h>
13
14#include "edac_mc.h"
15#include "edac_device.h"
16
17#define EDAC_LLCC "qcom_llcc"
18
19#define LLCC_ERP_PANIC_ON_UE 1
20
21#define TRP_SYN_REG_CNT 6
22#define DRP_SYN_REG_CNT 8
23
24#define LLCC_COMMON_STATUS0 0x0003000c
25#define LLCC_LB_CNT_MASK GENMASK(31, 28)
26#define LLCC_LB_CNT_SHIFT 28
27
28/* Single & double bit syndrome register offsets */
29#define TRP_ECC_SB_ERR_SYN0 0x0002304c
30#define TRP_ECC_DB_ERR_SYN0 0x00020370
31#define DRP_ECC_SB_ERR_SYN0 0x0004204c
32#define DRP_ECC_DB_ERR_SYN0 0x00042070
33
34/* Error register offsets */
35#define TRP_ECC_ERROR_STATUS1 0x00020348
36#define TRP_ECC_ERROR_STATUS0 0x00020344
37#define DRP_ECC_ERROR_STATUS1 0x00042048
38#define DRP_ECC_ERROR_STATUS0 0x00042044
39
40/* TRP, DRP interrupt register offsets */
41#define DRP_INTERRUPT_STATUS 0x00041000
42#define TRP_INTERRUPT_0_STATUS 0x00020480
43#define DRP_INTERRUPT_CLEAR 0x00041008
44#define DRP_ECC_ERROR_CNTR_CLEAR 0x00040004
45#define TRP_INTERRUPT_0_CLEAR 0x00020484
46#define TRP_ECC_ERROR_CNTR_CLEAR 0x00020440
47
48/* Mask and shift macros */
49#define ECC_DB_ERR_COUNT_MASK GENMASK(4, 0)
50#define ECC_DB_ERR_WAYS_MASK GENMASK(31, 16)
51#define ECC_DB_ERR_WAYS_SHIFT BIT(4)
52
53#define ECC_SB_ERR_COUNT_MASK GENMASK(23, 16)
54#define ECC_SB_ERR_COUNT_SHIFT BIT(4)
55#define ECC_SB_ERR_WAYS_MASK GENMASK(15, 0)
56
57#define SB_ECC_ERROR BIT(0)
58#define DB_ECC_ERROR BIT(1)
59
60#define DRP_TRP_INT_CLEAR GENMASK(1, 0)
61#define DRP_TRP_CNT_CLEAR GENMASK(1, 0)
62
63/* Config registers offsets*/
64#define DRP_ECC_ERROR_CFG 0x00040000
65
66/* Tag RAM, Data RAM interrupt register offsets */
67#define CMN_INTERRUPT_0_ENABLE 0x0003001c
68#define CMN_INTERRUPT_2_ENABLE 0x0003003c
69#define TRP_INTERRUPT_0_ENABLE 0x00020488
70#define DRP_INTERRUPT_ENABLE 0x0004100c
71
72#define SB_ERROR_THRESHOLD 0x1
73#define SB_ERROR_THRESHOLD_SHIFT 24
74#define SB_DB_TRP_INTERRUPT_ENABLE 0x3
75#define TRP0_INTERRUPT_ENABLE 0x1
76#define DRP0_INTERRUPT_ENABLE BIT(6)
77#define SB_DB_DRP_INTERRUPT_ENABLE 0x3
78
79enum {
80 LLCC_DRAM_CE = 0,
81 LLCC_DRAM_UE,
82 LLCC_TRAM_CE,
83 LLCC_TRAM_UE,
84};
85
86static const struct llcc_edac_reg_data edac_reg_data[] = {
87 [LLCC_DRAM_CE] = {
88 .name = "DRAM Single-bit",
89 .synd_reg = DRP_ECC_SB_ERR_SYN0,
90 .count_status_reg = DRP_ECC_ERROR_STATUS1,
91 .ways_status_reg = DRP_ECC_ERROR_STATUS0,
92 .reg_cnt = DRP_SYN_REG_CNT,
93 .count_mask = ECC_SB_ERR_COUNT_MASK,
94 .ways_mask = ECC_SB_ERR_WAYS_MASK,
95 .count_shift = ECC_SB_ERR_COUNT_SHIFT,
96 },
97 [LLCC_DRAM_UE] = {
98 .name = "DRAM Double-bit",
99 .synd_reg = DRP_ECC_DB_ERR_SYN0,
100 .count_status_reg = DRP_ECC_ERROR_STATUS1,
101 .ways_status_reg = DRP_ECC_ERROR_STATUS0,
102 .reg_cnt = DRP_SYN_REG_CNT,
103 .count_mask = ECC_DB_ERR_COUNT_MASK,
104 .ways_mask = ECC_DB_ERR_WAYS_MASK,
105 .ways_shift = ECC_DB_ERR_WAYS_SHIFT,
106 },
107 [LLCC_TRAM_CE] = {
108 .name = "TRAM Single-bit",
109 .synd_reg = TRP_ECC_SB_ERR_SYN0,
110 .count_status_reg = TRP_ECC_ERROR_STATUS1,
111 .ways_status_reg = TRP_ECC_ERROR_STATUS0,
112 .reg_cnt = TRP_SYN_REG_CNT,
113 .count_mask = ECC_SB_ERR_COUNT_MASK,
114 .ways_mask = ECC_SB_ERR_WAYS_MASK,
115 .count_shift = ECC_SB_ERR_COUNT_SHIFT,
116 },
117 [LLCC_TRAM_UE] = {
118 .name = "TRAM Double-bit",
119 .synd_reg = TRP_ECC_DB_ERR_SYN0,
120 .count_status_reg = TRP_ECC_ERROR_STATUS1,
121 .ways_status_reg = TRP_ECC_ERROR_STATUS0,
122 .reg_cnt = TRP_SYN_REG_CNT,
123 .count_mask = ECC_DB_ERR_COUNT_MASK,
124 .ways_mask = ECC_DB_ERR_WAYS_MASK,
125 .ways_shift = ECC_DB_ERR_WAYS_SHIFT,
126 },
127};
128
129static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
130{
131 u32 sb_err_threshold;
132 int ret;
133
134 /*
135 * Configure interrupt enable registers such that Tag, Data RAM related
136 * interrupts are propagated to interrupt controller for servicing
137 */
138 ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
139 TRP0_INTERRUPT_ENABLE,
140 TRP0_INTERRUPT_ENABLE);
141 if (ret)
142 return ret;
143
144 ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
145 SB_DB_TRP_INTERRUPT_ENABLE,
146 SB_DB_TRP_INTERRUPT_ENABLE);
147 if (ret)
148 return ret;
149
150 sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
151 ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
152 sb_err_threshold);
153 if (ret)
154 return ret;
155
156 ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
157 DRP0_INTERRUPT_ENABLE,
158 DRP0_INTERRUPT_ENABLE);
159 if (ret)
160 return ret;
161
162 ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
163 SB_DB_DRP_INTERRUPT_ENABLE);
164 return ret;
165}
166
167/* Clear the error interrupt and counter registers */
168static int
169qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
170{
171 int ret = 0;
172
173 switch (err_type) {
174 case LLCC_DRAM_CE:
175 case LLCC_DRAM_UE:
176 ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
177 DRP_TRP_INT_CLEAR);
178 if (ret)
179 return ret;
180
181 ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
182 DRP_TRP_CNT_CLEAR);
183 if (ret)
184 return ret;
185 break;
186 case LLCC_TRAM_CE:
187 case LLCC_TRAM_UE:
188 ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
189 DRP_TRP_INT_CLEAR);
190 if (ret)
191 return ret;
192
193 ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
194 DRP_TRP_CNT_CLEAR);
195 if (ret)
196 return ret;
197 break;
198 default:
199 ret = -EINVAL;
200 edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
201 err_type);
202 }
203 return ret;
204}
205
206/* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
207static int
208dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
209{
210 struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
211 int err_cnt, err_ways, ret, i;
212 u32 synd_reg, synd_val;
213
214 for (i = 0; i < reg_data.reg_cnt; i++) {
215 synd_reg = reg_data.synd_reg + (i * 4);
216 ret = regmap_read(drv->regmap, drv->offsets[bank] + synd_reg,
217 &synd_val);
218 if (ret)
219 goto clear;
220
221 edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n",
222 reg_data.name, i, synd_val);
223 }
224
225 ret = regmap_read(drv->regmap,
226 drv->offsets[bank] + reg_data.count_status_reg,
227 &err_cnt);
228 if (ret)
229 goto clear;
230
231 err_cnt &= reg_data.count_mask;
232 err_cnt >>= reg_data.count_shift;
233 edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
234 reg_data.name, err_cnt);
235
236 ret = regmap_read(drv->regmap,
237 drv->offsets[bank] + reg_data.ways_status_reg,
238 &err_ways);
239 if (ret)
240 goto clear;
241
242 err_ways &= reg_data.ways_mask;
243 err_ways >>= reg_data.ways_shift;
244
245 edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n",
246 reg_data.name, err_ways);
247
248clear:
249 return qcom_llcc_clear_error_status(err_type, drv);
250}
251
252static int
253dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
254{
255 struct llcc_drv_data *drv = edev_ctl->pvt_info;
256 int ret;
257
258 ret = dump_syn_reg_values(drv, bank, err_type);
259 if (ret)
260 return ret;
261
262 switch (err_type) {
263 case LLCC_DRAM_CE:
264 edac_device_handle_ce(edev_ctl, 0, bank,
265 "LLCC Data RAM correctable Error");
266 break;
267 case LLCC_DRAM_UE:
268 edac_device_handle_ue(edev_ctl, 0, bank,
269 "LLCC Data RAM uncorrectable Error");
270 break;
271 case LLCC_TRAM_CE:
272 edac_device_handle_ce(edev_ctl, 0, bank,
273 "LLCC Tag RAM correctable Error");
274 break;
275 case LLCC_TRAM_UE:
276 edac_device_handle_ue(edev_ctl, 0, bank,
277 "LLCC Tag RAM uncorrectable Error");
278 break;
279 default:
280 ret = -EINVAL;
281 edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
282 err_type);
283 }
284
285 return ret;
286}
287
288static irqreturn_t
289llcc_ecc_irq_handler(int irq, void *edev_ctl)
290{
291 struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
292 struct llcc_drv_data *drv = edac_dev_ctl->pvt_info;
293 irqreturn_t irq_rc = IRQ_NONE;
294 u32 drp_error, trp_error, i;
295 bool irq_handled;
296 int ret;
297
298 /* Iterate over the banks and look for Tag RAM or Data RAM errors */
299 for (i = 0; i < drv->num_banks; i++) {
300 ret = regmap_read(drv->regmap,
301 drv->offsets[i] + DRP_INTERRUPT_STATUS,
302 &drp_error);
303
304 if (!ret && (drp_error & SB_ECC_ERROR)) {
305 edac_printk(KERN_CRIT, EDAC_LLCC,
306 "Single Bit Error detected in Data RAM\n");
307 ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i);
308 } else if (!ret && (drp_error & DB_ECC_ERROR)) {
309 edac_printk(KERN_CRIT, EDAC_LLCC,
310 "Double Bit Error detected in Data RAM\n");
311 ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i);
312 }
313 if (!ret)
314 irq_handled = true;
315
316 ret = regmap_read(drv->regmap,
317 drv->offsets[i] + TRP_INTERRUPT_0_STATUS,
318 &trp_error);
319
320 if (!ret && (trp_error & SB_ECC_ERROR)) {
321 edac_printk(KERN_CRIT, EDAC_LLCC,
322 "Single Bit Error detected in Tag RAM\n");
323 ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i);
324 } else if (!ret && (trp_error & DB_ECC_ERROR)) {
325 edac_printk(KERN_CRIT, EDAC_LLCC,
326 "Double Bit Error detected in Tag RAM\n");
327 ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i);
328 }
329 if (!ret)
330 irq_handled = true;
331 }
332
333 if (irq_handled)
334 irq_rc = IRQ_HANDLED;
335
336 return irq_rc;
337}
338
339static int qcom_llcc_edac_probe(struct platform_device *pdev)
340{
341 struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
342 struct edac_device_ctl_info *edev_ctl;
343 struct device *dev = &pdev->dev;
344 int ecc_irq;
345 int rc;
346
347 rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
348 if (rc)
349 return rc;
350
351 /* Allocate edac control info */
352 edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
353 llcc_driv_data->num_banks, 1,
354 NULL, 0,
355 edac_device_alloc_index());
356
357 if (!edev_ctl)
358 return -ENOMEM;
359
360 edev_ctl->dev = dev;
361 edev_ctl->mod_name = dev_name(dev);
362 edev_ctl->dev_name = dev_name(dev);
363 edev_ctl->ctl_name = "llcc";
364 edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
365 edev_ctl->pvt_info = llcc_driv_data;
366
367 rc = edac_device_add_device(edev_ctl);
368 if (rc)
369 goto out_mem;
370
371 platform_set_drvdata(pdev, edev_ctl);
372
373 /* Request for ecc irq */
374 ecc_irq = llcc_driv_data->ecc_irq;
375 if (ecc_irq < 0) {
376 rc = -ENODEV;
377 goto out_dev;
378 }
379 rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
380 IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
381 if (rc)
382 goto out_dev;
383
384 return rc;
385
386out_dev:
387 edac_device_del_device(edev_ctl->dev);
388out_mem:
389 edac_device_free_ctl_info(edev_ctl);
390
391 return rc;
392}
393
394static int qcom_llcc_edac_remove(struct platform_device *pdev)
395{
396 struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
397
398 edac_device_del_device(edev_ctl->dev);
399 edac_device_free_ctl_info(edev_ctl);
400
401 return 0;
402}
403
404static struct platform_driver qcom_llcc_edac_driver = {
405 .probe = qcom_llcc_edac_probe,
406 .remove = qcom_llcc_edac_remove,
407 .driver = {
408 .name = "qcom_llcc_edac",
409 },
410};
411module_platform_driver(qcom_llcc_edac_driver);
412
413MODULE_DESCRIPTION("QCOM EDAC driver");
414MODULE_LICENSE("GPL v2");
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 2e4b34d2617e..69c285b1c990 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -93,6 +93,30 @@ struct llcc_drv_data {
93 int ecc_irq; 93 int ecc_irq;
94}; 94};
95 95
96/**
97 * llcc_edac_reg_data - llcc edac registers data for each error type
98 * @name: Name of the error
99 * @synd_reg: Syndrome register address
100 * @count_status_reg: Status register address to read the error count
101 * @ways_status_reg: Status register address to read the error ways
102 * @reg_cnt: Number of registers
103 * @count_mask: Mask value to get the error count
104 * @ways_mask: Mask value to get the error ways
105 * @count_shift: Shift value to get the error count
106 * @ways_shift: Shift value to get the error ways
107 */
108struct llcc_edac_reg_data {
109 char *name;
110 u64 synd_reg;
111 u64 count_status_reg;
112 u64 ways_status_reg;
113 u32 reg_cnt;
114 u32 count_mask;
115 u32 ways_mask;
116 u8 count_shift;
117 u8 ways_shift;
118};
119
96#if IS_ENABLED(CONFIG_QCOM_LLCC) 120#if IS_ENABLED(CONFIG_QCOM_LLCC)
97/** 121/**
98 * llcc_slice_getd - get llcc slice descriptor 122 * llcc_slice_getd - get llcc slice descriptor