From 258ae4471296bcee03987778e3b7c79d3a027e53 Mon Sep 17 00:00:00 2001 From: David Nieto Date: Tue, 5 Dec 2017 15:20:18 -0800 Subject: gpu: nvgpu: gv11b: PMU parity HWW ECC support Adding support for ISR handling of ECC parity errors for PMU unit and setting the initial IRQDST mask to deliver ECC interrupts to host in the non-stall PMU irq path JIRA: GPUT19X-83 Change-Id: I8efae6777811893ecce79d0e32ba81b62c27b1ef Signed-off-by: David Nieto Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1611625 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/ecc_gv11b.h | 2 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 + drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | 117 ++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gv11b/pmu_gv11b.h | 2 + 4 files changed, 123 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b') diff --git a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h index 94b25c02..ebce46ce 100644 --- a/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/ecc_gv11b.h @@ -59,6 +59,8 @@ struct ecc_eng_t19x { struct gk20a_ecc_stat mmu_fillunit_corrected_err_count; struct gk20a_ecc_stat mmu_fillunit_uncorrected_err_count; /* PMU */ + struct gk20a_ecc_stat pmu_corrected_err_count; + struct gk20a_ecc_stat pmu_uncorrected_err_count; }; #endif diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 6a21eb2d..f6bdf6e5 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -611,6 +611,8 @@ static const struct gpu_ops gv11b_ops = { .pmu_nsbootstrap = gv11b_pmu_bootstrap, .pmu_pg_set_sub_feature_mask = gv11b_pg_set_subfeature_mask, .is_pmu_supported = gv11b_is_pmu_supported, + .get_irqdest = gv11b_pmu_get_irqdest, + .handle_ext_irq = gv11b_pmu_handle_ext_irq, }, .regops = { .get_global_whitelist_ranges = diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c index 4b244f5a..a972510f 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c @@ -287,6 +287,123 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu) return 0; } +void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) +{ + u32 intr1; + u32 ecc_status, ecc_addr, corrected_cnt, uncorrected_cnt; + u32 corrected_delta, uncorrected_delta; + u32 corrected_overflow, uncorrected_overflow; + + /* + * handle the ECC interrupt + */ + if (intr0 & pwr_falcon_irqstat_ext_ecc_parity_true_f()) { + intr1 = gk20a_readl(g, pwr_pmu_ecc_intr_status_r()); + if (intr1 & (pwr_pmu_ecc_intr_status_corrected_m() | + pwr_pmu_ecc_intr_status_uncorrected_m())) { + + ecc_status = gk20a_readl(g, + pwr_pmu_falcon_ecc_status_r()); + ecc_addr = gk20a_readl(g, + pwr_pmu_falcon_ecc_address_r()); + corrected_cnt = gk20a_readl(g, + pwr_pmu_falcon_ecc_corrected_err_count_r()); + uncorrected_cnt = gk20a_readl(g, + pwr_pmu_falcon_ecc_uncorrected_err_count_r()); + + corrected_delta = + pwr_pmu_falcon_ecc_corrected_err_count_total_v(corrected_cnt); + uncorrected_delta = + pwr_pmu_falcon_ecc_uncorrected_err_count_total_v(uncorrected_cnt); + corrected_overflow = ecc_status & + pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m(); + + uncorrected_overflow = ecc_status & + pwr_pmu_falcon_ecc_status_uncorrected_err_total_counter_overflow_m(); + corrected_overflow = ecc_status & + pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m(); + + /* clear the interrupt */ + if ((intr1 & pwr_pmu_ecc_intr_status_corrected_m()) || + corrected_overflow) { + gk20a_writel(g, pwr_pmu_falcon_ecc_corrected_err_count_r(), 0); + } + if ((intr1 & pwr_pmu_ecc_intr_status_uncorrected_m()) || + uncorrected_overflow) { + gk20a_writel(g, + pwr_pmu_falcon_ecc_uncorrected_err_count_r(), 0); + } + + gk20a_writel(g, pwr_pmu_falcon_ecc_status_r(), + pwr_pmu_falcon_ecc_status_reset_task_f()); + + /* update counters per slice */ + if (corrected_overflow) + corrected_delta += (0x1UL << pwr_pmu_falcon_ecc_corrected_err_count_total_s()); + if (uncorrected_overflow) + uncorrected_delta += (0x1UL << pwr_pmu_falcon_ecc_uncorrected_err_count_total_s()); + + g->ecc.eng.t19x.pmu_corrected_err_count.counters[0] += corrected_delta; + g->ecc.eng.t19x.pmu_uncorrected_err_count.counters[0] += uncorrected_delta; + + nvgpu_log(g, gpu_dbg_intr, + "pmu ecc interrupt intr1: 0x%x", intr1); + + if (ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) + nvgpu_log(g, gpu_dbg_intr, + "imem ecc error corrected"); + if (ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) + nvgpu_log(g, gpu_dbg_intr, + "imem ecc error uncorrected"); + if (ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) + nvgpu_log(g, gpu_dbg_intr, + "dmem ecc error corrected"); + if (ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) + nvgpu_log(g, gpu_dbg_intr, + "dmem ecc error uncorrected"); + + if (corrected_overflow || uncorrected_overflow) + nvgpu_info(g, "ecc counter overflow!"); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error row address: 0x%x", + pwr_pmu_falcon_ecc_address_row_address_v(ecc_addr)); + + nvgpu_log(g, gpu_dbg_intr, + "ecc error count corrected: %d, uncorrected %d", + g->ecc.eng.t19x.pmu_corrected_err_count.counters[0], + g->ecc.eng.t19x.pmu_uncorrected_err_count.counters[0]); + } + } +} + +u32 gv11b_pmu_get_irqdest(struct gk20a *g) +{ + u32 intr_dest; + + /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 */ + intr_dest = pwr_falcon_irqdest_host_gptmr_f(0) | + pwr_falcon_irqdest_host_wdtmr_f(1) | + pwr_falcon_irqdest_host_mthd_f(0) | + pwr_falcon_irqdest_host_ctxsw_f(0) | + pwr_falcon_irqdest_host_halt_f(1) | + pwr_falcon_irqdest_host_exterr_f(0) | + pwr_falcon_irqdest_host_swgen0_f(1) | + pwr_falcon_irqdest_host_swgen1_f(0) | + pwr_falcon_irqdest_host_ext_ecc_parity_f(1) | + pwr_falcon_irqdest_target_gptmr_f(1) | + pwr_falcon_irqdest_target_wdtmr_f(0) | + pwr_falcon_irqdest_target_mthd_f(0) | + pwr_falcon_irqdest_target_ctxsw_f(0) | + pwr_falcon_irqdest_target_halt_f(0) | + pwr_falcon_irqdest_target_exterr_f(0) | + pwr_falcon_irqdest_target_swgen0_f(0) | + pwr_falcon_irqdest_target_swgen1_f(0) | + pwr_falcon_irqdest_target_ext_ecc_parity_f(0); + + return intr_dest; +} + static void pmu_handle_pg_sub_feature_msg(struct gk20a *g, struct pmu_msg *msg, void *param, u32 handle, u32 status) { diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.h b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.h index e917188d..dd6db10c 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.h @@ -35,4 +35,6 @@ bool gv11b_is_lazy_bootstrap(u32 falcon_id); bool gv11b_is_priv_load(u32 falcon_id); int gv11b_pmu_setup_elpg(struct gk20a *g); +u32 gv11b_pmu_get_irqdest(struct gk20a *g); +void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0); #endif /*__PMU_GV11B_H_*/ -- cgit v1.2.2