From fd97ed15d6a5285f59ca37dba8086f7a2780d3af Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 13 Oct 2015 13:09:07 -0700 Subject: gpu: nvgpu: Trigger recovery on HWW errors Trigger recovery on DS and MEMFMT HWW errors, and write an error line to UART for each HWW error. Also capture the channel id before clearing the exception. Bug 1683059 Change-Id: Ia00d88a76371a4bd7e047915dde0bf0d4b84bc10 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/816983 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ea06bd20..8c667ae7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -5414,12 +5414,14 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event) int gk20a_gr_isr(struct gk20a *g) { + struct device *dev = dev_from_gk20a(g); struct gr_isr_data isr_data; u32 grfifo_ctl; u32 obj_table; int need_reset = 0; u32 gr_intr = gk20a_readl(g, gr_intr_r()); struct channel_gk20a *ch = NULL; + int tsgid = NVGPU_INVALID_TSG_ID; gk20a_dbg_fn(""); gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); @@ -5445,7 +5447,7 @@ int gk20a_gr_isr(struct gk20a *g) gr_fe_object_table_r(isr_data.sub_chan)) : 0; isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); - ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL); + ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, &tsgid); if (!ch) { gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", isr_data.curr_ctx); @@ -5538,16 +5540,16 @@ int gk20a_gr_isr(struct gk20a *g) if (exception & gr_exception_fe_m()) { u32 fe = gk20a_readl(g, gr_fe_hww_esr_r()); - gk20a_dbg(gpu_dbg_intr, "fe warning %08x\n", fe); + gk20a_err(dev, "fe warning %08x", fe); gk20a_writel(g, gr_fe_hww_esr_r(), fe); need_reset |= -EFAULT; } if (exception & gr_exception_memfmt_m()) { u32 memfmt = gk20a_readl(g, gr_memfmt_hww_esr_r()); - gk20a_dbg(gpu_dbg_intr, "memfmt exception %08x\n", - memfmt); + gk20a_err(dev, "memfmt exception %08x", memfmt); gk20a_writel(g, gr_memfmt_hww_esr_r(), memfmt); + need_reset |= -EFAULT; } /* check if a gpc exception has occurred */ @@ -5582,17 +5584,23 @@ int gk20a_gr_isr(struct gk20a *g) if (exception & gr_exception_ds_m()) { u32 ds = gk20a_readl(g, gr_ds_hww_esr_r()); - gk20a_dbg(gpu_dbg_intr, "ds exception %08x\n", ds); + gk20a_err(dev, "ds exception %08x", ds); gk20a_writel(g, gr_ds_hww_esr_r(), ds); + need_reset |= -EFAULT; } gk20a_writel(g, gr_intr_r(), gr_intr_exception_reset_f()); gr_intr &= ~gr_intr_exception_pending_f(); } - if (need_reset) - gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), - ~(u32)0, false, false, true); + if (need_reset) { + if (tsgid != NVGPU_INVALID_TSG_ID) + gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), + tsgid, true, true, true); + else + gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), + ch->hw_chid, false, true, true); + } clean_up: if (gr_intr && !ch) { -- cgit v1.2.2