aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThor Thayer <thor.thayer@linux.intel.com>2018-05-11 19:00:10 -0400
committerBorislav Petkov <bp@suse.de>2018-05-12 06:29:41 -0400
commite9918d7fafae2ec5365b193654960f6158751f8c (patch)
treeb17e0df16f20521df19240b02facab73dac300fa
parentf8eb0edeb8c19aba667a087b80706bf4f61f8256 (diff)
EDAC, altera: Handle SDRAM Uncorrectable Errors on Stratix10
On Stratix10, uncorrectable errors are routed to the SError exception instead of the IRQ exceptions. In Stratix10, uncorrectable SErrors must be treated as fatal and will cause a panic. Older Altera/Intel parts printed out a message for UE so do that here using the notifier framework. Record the UE in sticky registers that retain the state through a reset. Check these registers on probe and printout the error on startup. Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com> Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac <linux-edac@vger.kernel.org> Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1526079610-5527-1-git-send-email-thor.thayer@linux.intel.com [ Remove unused var in s10_edac_dberr_handler(), reorder args. ] Signed-off-by: Borislav Petkov <bp@suse.de>
-rw-r--r--drivers/edac/altera_edac.c67
-rw-r--r--drivers/edac/altera_edac.h8
2 files changed, 64 insertions, 11 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 0ee6d5969ef2..1f12fdf2ed00 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -14,6 +14,7 @@
14#include <linux/irqchip/chained_irq.h> 14#include <linux/irqchip/chained_irq.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/mfd/syscon.h> 16#include <linux/mfd/syscon.h>
17#include <linux/notifier.h>
17#include <linux/of_address.h> 18#include <linux/of_address.h>
18#include <linux/of_irq.h> 19#include <linux/of_irq.h>
19#include <linux/of_platform.h> 20#include <linux/of_platform.h>
@@ -725,6 +726,13 @@ static int altr_s10_sdram_probe(struct platform_device *pdev)
725 goto err2; 726 goto err2;
726 } 727 }
727 728
729 if (regmap_write(regmap, S10_SYSMGR_ECC_INTMASK_CLR_OFST,
730 S10_DDR0_IRQ_MASK)) {
731 edac_printk(KERN_ERR, EDAC_MC,
732 "Error clearing SDRAM ECC count\n");
733 return -ENODEV;
734 }
735
728 if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset, 736 if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset,
729 priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) { 737 priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) {
730 edac_mc_printk(mci, KERN_ERR, 738 edac_mc_printk(mci, KERN_ERR,
@@ -2228,23 +2236,50 @@ module_platform_driver(altr_edac_a10_driver);
2228 2236
2229/************** Stratix 10 EDAC Device Controller Functions> ************/ 2237/************** Stratix 10 EDAC Device Controller Functions> ************/
2230 2238
2239#define to_s10edac(p, m) container_of(p, struct altr_stratix10_edac, m)
2240
2241/*
2242 * The double bit error is handled through SError which is fatal. This is
2243 * called as a panic notifier to printout ECC error info as part of the panic.
2244 */
2245static int s10_edac_dberr_handler(struct notifier_block *this,
2246 unsigned long event, void *ptr)
2247{
2248 struct altr_stratix10_edac *edac = to_s10edac(this, panic_notifier);
2249 int err_addr, dberror;
2250
2251 s10_protected_reg_read(edac, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
2252 &dberror);
2253 /* Remember the UE Errors for a reboot */
2254 s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, dberror);
2255 if (dberror & S10_DDR0_IRQ_MASK) {
2256 s10_protected_reg_read(edac, S10_DERRADDR_OFST, &err_addr);
2257 /* Remember the UE Error address */
2258 s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST,
2259 err_addr);
2260 edac_printk(KERN_ERR, EDAC_MC,
2261 "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
2262 err_addr);
2263 }
2264
2265 return NOTIFY_DONE;
2266}
2267
2231static void altr_edac_s10_irq_handler(struct irq_desc *desc) 2268static void altr_edac_s10_irq_handler(struct irq_desc *desc)
2232{ 2269{
2233 int dberr, bit, sm_offset, irq_status;
2234 struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc); 2270 struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc);
2235 struct irq_chip *chip = irq_desc_get_chip(desc); 2271 struct irq_chip *chip = irq_desc_get_chip(desc);
2236 int irq = irq_desc_get_irq(desc); 2272 int irq = irq_desc_get_irq(desc);
2273 int bit, sm_offset, irq_status;
2237 2274
2238 dberr = (irq == edac->db_irq) ? 1 : 0; 2275 sm_offset = S10_SYSMGR_ECC_INTSTAT_SERR_OFST;
2239 sm_offset = dberr ? S10_SYSMGR_ECC_INTSTAT_DERR_OFST :
2240 S10_SYSMGR_ECC_INTSTAT_SERR_OFST;
2241 2276
2242 chained_irq_enter(chip, desc); 2277 chained_irq_enter(chip, desc);
2243 2278
2244 s10_protected_reg_read(NULL, sm_offset, &irq_status); 2279 s10_protected_reg_read(NULL, sm_offset, &irq_status);
2245 2280
2246 for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { 2281 for_each_set_bit(bit, (unsigned long *)&irq_status, 32) {
2247 irq = irq_linear_revmap(edac->domain, dberr * 32 + bit); 2282 irq = irq_linear_revmap(edac->domain, bit);
2248 if (irq) 2283 if (irq)
2249 generic_handle_irq(irq); 2284 generic_handle_irq(irq);
2250 } 2285 }
@@ -2289,6 +2324,7 @@ static int altr_edac_s10_probe(struct platform_device *pdev)
2289{ 2324{
2290 struct altr_stratix10_edac *edac; 2325 struct altr_stratix10_edac *edac;
2291 struct device_node *child; 2326 struct device_node *child;
2327 int dberror, err_addr;
2292 2328
2293 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); 2329 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
2294 if (!edac) 2330 if (!edac)
@@ -2318,11 +2354,22 @@ static int altr_edac_s10_probe(struct platform_device *pdev)
2318 altr_edac_s10_irq_handler, 2354 altr_edac_s10_irq_handler,
2319 edac); 2355 edac);
2320 2356
2321 edac->db_irq = platform_get_irq(pdev, 1); 2357 edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
2322 if (edac->db_irq >= 0) 2358 atomic_notifier_chain_register(&panic_notifier_list,
2323 irq_set_chained_handler_and_data(edac->db_irq, 2359 &edac->panic_notifier);
2324 altr_edac_s10_irq_handler, 2360
2325 edac); 2361 /* Printout a message if uncorrectable error previously. */
2362 s10_protected_reg_read(edac, S10_SYSMGR_UE_VAL_OFST, &dberror);
2363 if (dberror) {
2364 s10_protected_reg_read(edac, S10_SYSMGR_UE_ADDR_OFST,
2365 &err_addr);
2366 edac_printk(KERN_ERR, EDAC_DEVICE,
2367 "Previous Boot UE detected[0x%X] @ 0x%X\n",
2368 dberror, err_addr);
2369 /* Reset the sticky registers */
2370 s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, 0);
2371 s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, 0);
2372 }
2326 2373
2327 for_each_child_of_node(pdev->dev.of_node, child) { 2374 for_each_child_of_node(pdev->dev.of_node, child) {
2328 if (!of_device_is_available(child)) 2375 if (!of_device_is_available(child))
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 747481081072..81f0554e09de 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -180,6 +180,10 @@
180/* SDRAM Single Bit Error Count Compare Set Register */ 180/* SDRAM Single Bit Error Count Compare Set Register */
181#define S10_SERRCNTREG_OFST 0xF801113C 181#define S10_SERRCNTREG_OFST 0xF801113C
182 182
183/* Sticky registers for Uncorrected Errors */
184#define S10_SYSMGR_UE_VAL_OFST 0xFFD12220
185#define S10_SYSMGR_UE_ADDR_OFST 0xFFD12224
186
183struct altr_sdram_prv_data { 187struct altr_sdram_prv_data {
184 int ecc_ctrl_offset; 188 int ecc_ctrl_offset;
185 int ecc_ctl_en_mask; 189 int ecc_ctl_en_mask;
@@ -322,6 +326,8 @@ struct altr_sdram_mc_data {
322#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C 326#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C
323#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0 327#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0
324 328
329#define S10_DDR0_IRQ_MASK BIT(16)
330
325struct altr_edac_device_dev; 331struct altr_edac_device_dev;
326 332
327struct edac_device_prv_data { 333struct edac_device_prv_data {
@@ -434,10 +440,10 @@ struct altr_arria10_edac {
434struct altr_stratix10_edac { 440struct altr_stratix10_edac {
435 struct device *dev; 441 struct device *dev;
436 int sb_irq; 442 int sb_irq;
437 int db_irq;
438 struct irq_domain *domain; 443 struct irq_domain *domain;
439 struct irq_chip irq_chip; 444 struct irq_chip irq_chip;
440 struct list_head s10_ecc_devices; 445 struct list_head s10_ecc_devices;
446 struct notifier_block panic_notifier;
441}; 447};
442 448
443#endif /* #ifndef _ALTERA_EDAC_H */ 449#endif /* #ifndef _ALTERA_EDAC_H */