From 5f8cfaa250f08499f587da0097f6accaa5eedf15 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 27 Sep 2017 11:15:38 -0700 Subject: gpu: nvgpu: gp10b: enhance priv ring error reporting -Add start_conn, disconnect and overflow fault type priv error detection -For busy looping in interrupt context, use nvgpu_udelay() instead of nvgpu_usleep_range() Bug 200350539 Change-Id: I0d0da86d5688bca36817d445151818632c5ea4f1 Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1569589 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: svc-mobile-coverity Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c | 58 +++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c index 1a282a10..b780457f 100644 --- a/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/priv_ring_gp10b.c @@ -39,10 +39,12 @@ void gp10b_priv_ring_isr(struct gk20a *g) u32 cmd; s32 retry = 100; u32 gpc; - u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 gpc_stride, offset; - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + nvgpu_info(g, "unhandled priv ring intr"); return; + } status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); @@ -50,6 +52,16 @@ void gp10b_priv_ring_isr(struct gk20a *g) nvgpu_err(g, "ringmaster intr status0: 0x%08x," "status1: 0x%08x", status0, status1); + if (pri_ringmaster_intr_status0_ring_start_conn_fault_v(status0) != 0) + nvgpu_err(g, + "BUG: connectivity problem on the startup sequence"); + + if (pri_ringmaster_intr_status0_disconnect_fault_v(status0) != 0) + nvgpu_err(g, "ring disconnected"); + + if (pri_ringmaster_intr_status0_overflow_fault_v(status0) != 0) + nvgpu_err(g, "ring overflowed"); + if (pri_ringmaster_intr_status0_gbl_write_error_sys_v(status0) != 0) { nvgpu_err(g, "SYS write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gk20a_readl(g, pri_ringstation_sys_priv_error_adr_r()), @@ -58,27 +70,43 @@ void gp10b_priv_ring_isr(struct gk20a *g) gk20a_readl(g, pri_ringstation_sys_priv_error_code_r())); } - for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { - if (status1 & BIT(gpc)) { - nvgpu_err(g, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc, - gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_stride), - gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_wrdat_r() + gpc * gpc_stride), - gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_info_r() + gpc * gpc_stride), - gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_code_r() + gpc * gpc_stride)); + if (status1) { + gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { + offset = gpc * gpc_stride; + if (status1 & BIT(gpc)) { + nvgpu_err(g, "GPC%u write error. ADR %08x " + "WRDAT %08x INFO %08x, CODE %08x", gpc, + gk20a_readl(g, + pri_ringstation_gpc_gpc0_priv_error_adr_r() + offset), + gk20a_readl(g, + pri_ringstation_gpc_gpc0_priv_error_wrdat_r() + offset), + gk20a_readl(g, + pri_ringstation_gpc_gpc0_priv_error_info_r() + offset), + gk20a_readl(g, + pri_ringstation_gpc_gpc0_priv_error_code_r() + offset)); + status1 = status1 & (~(BIT(gpc))); + if (!status1) + break; + } } } - + /* clear interrupt */ cmd = gk20a_readl(g, pri_ringmaster_command_r()); cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), pri_ringmaster_command_cmd_ack_interrupt_f()); gk20a_writel(g, pri_ringmaster_command_r(), cmd); - do { + /* poll for clear interrupt done */ + cmd = pri_ringmaster_command_cmd_v( + gk20a_readl(g, pri_ringmaster_command_r())); + while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && retry) { + nvgpu_udelay(20); cmd = pri_ringmaster_command_cmd_v( gk20a_readl(g, pri_ringmaster_command_r())); - nvgpu_usleep_range(20, 40); - } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); + retry--; + } - if (retry <= 0) - nvgpu_warn(g, "priv ringmaster cmd ack too many retries"); + if (retry == 0) + nvgpu_err(g, "priv ringmaster intr ack failed"); } -- cgit v1.2.2