From 89e0745fa024891b988508c3baa20c453230a80b Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 2 Apr 2018 19:10:42 +0530 Subject: gpu: nvgpu: handle misaligned_addr SM exception We right now do not handle misaligned_addr SM exception explicitly and hence we incorrectly initiate CILP on this exception Handle this exception explicitly in this sequence - - set error notifier first - clear the interrupt - return error from gr_gv11b_handle_warp_esr_error_misaligned_addr() so that RC recovery is triggered by gk20a_gr_isr() Ensure that the error value is propagated back to gk20a_gr_isr() correctly Use nvgpu_set_error_notifier_if_empty() to set error notifier since this will prevent overwriting of error notifier value in case gk20a_gr_isr() also tries to write to some error notifier value Bug 200388475 Jira NVGPU-554 Change-Id: I84c4d202a8068e738567ccd344e05d9d5f6ad2f0 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1686781 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 40 +++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 7f6d1906..c43c6e83 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/gr_gk20a.h" @@ -2090,6 +2091,41 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, return 0; } +static int gr_gv11b_handle_warp_esr_error_misaligned_addr(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, + u32 warp_esr, + struct channel_gk20a *fault_ch) +{ + struct tsg_gk20a *tsg; + u32 offset; + struct channel_gk20a *ch_tsg; + + if (fault_ch) { + tsg = &g->fifo.tsg[fault_ch->tsgid]; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, + channel_gk20a, ch_entry) { + if (gk20a_channel_get(ch_tsg)) { + g->ops.fifo.set_error_notifier(ch_tsg, + NVGPU_ERR_NOTIFIER_GR_EXCEPTION); + gk20a_channel_put(ch_tsg); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + } + + /* clear interrupt */ + offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + nvgpu_writel(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); + + /* return error so that recovery is triggered by gk20a_gr_isr() */ + return -EFAULT; +} + /* @brief pre-process work on the SM exceptions to determine if we clear them or not. * * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing @@ -2118,6 +2154,10 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, warp_esr, fault_ch); + if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f()) + return gr_gv11b_handle_warp_esr_error_misaligned_addr(g, gpc, tpc, sm, + warp_esr, fault_ch); + if (fault_ch) { tsg = tsg_gk20a_from_ch(fault_ch); if (!tsg) diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 2d6dc9b0..dd4bd55a 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -534,7 +534,7 @@ static const struct gpu_ops gv11b_ops = { .check_ch_ctxsw_timeout = gk20a_fifo_check_ch_ctxsw_timeout, .channel_suspend = gk20a_channel_suspend, .channel_resume = gk20a_channel_resume, - .set_error_notifier = nvgpu_set_error_notifier, + .set_error_notifier = nvgpu_set_error_notifier_if_empty, .setup_sw = gk20a_init_fifo_setup_sw, #ifdef CONFIG_TEGRA_GK20A_NVHOST .alloc_syncpt_buf = gv11b_fifo_alloc_syncpt_buf, -- cgit v1.2.2