summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
authorAparna Das <aparnad@nvidia.com>2018-06-04 22:40:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-15 20:47:06 -0400
commit98d996f4ffb0137d119b5849cae46d7b7e5693e1 (patch)
tree4e3243b91e8e559dd14b8bc30fc2468713182027 /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent1f51620fda57443c77506c354af837a60883d78b (diff)
gpu: nvgpu: recover on first interrupt reported for mmu nack
In case of mmu nack error interrupt is received twice through SM reported mmu nack interrupt and mmu fault in undertermined order. Recover on the first received interrupt to avoid semaphore release and skip doing a second recovery. Also fix NULL pointer dereference in function gv11b_fifo_reset_pbdma_and_eng_faulted when channel reference is invalid in teardown path. Bug 200382235 Change-Id: I361a5725d7b6355ebf02b2870727f647fbd7a37e Signed-off-by: Aparna Das <aparnad@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1739804 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c35
1 files changed, 21 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 694ff8ad..aed45ceb 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -2093,23 +2093,30 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
2093 u32 warp_esr_error, 2093 u32 warp_esr_error,
2094 struct channel_gk20a *fault_ch) 2094 struct channel_gk20a *fault_ch)
2095{ 2095{
2096 struct tsg_gk20a *tsg;
2097 u32 offset; 2096 u32 offset;
2097 int err = 0;
2098 2098
2099 fault_ch = gk20a_channel_get(fault_ch);
2099 if (fault_ch) { 2100 if (fault_ch) {
2100 tsg = &g->fifo.tsg[fault_ch->tsgid]; 2101 if (!fault_ch->mmu_nack_handled) {
2101 2102 /* recovery is not done for the channel implying mmu
2102 /* 2103 * nack interrupt is serviced before mmu fault. Force
2103 * Upon receiving MMU_FAULT error, MMU will forward MMU_NACK 2104 * recovery by returning an error. Also indicate we
2104 * to SM. So MMU_FAULT handling path will take care of 2105 * should skip a second recovery.
2105 * triggering RC recovery 2106 */
2106 * 2107 fault_ch->mmu_nack_handled = true;
2107 * In MMU_NACK handling path, we just set the error notifier 2108 err = -EFAULT;
2108 * and clear the interrupt so that the User Space sees the error 2109 }
2109 * as soon as semaphores are released by SM
2110 */
2111 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
2112 } 2110 }
2111 /* else mmu fault is serviced first and channel is closed */
2112
2113 /* do not release reference to ch as we do not want userspace to close
2114 * this channel on recovery. Otherwise mmu fault handler will enter
2115 * recovery path even if channel is invalid. We want to explicitly check
2116 * for teardown value in mmu fault handler.
2117 */
2118 if (!err)
2119 gk20a_channel_put(fault_ch);
2113 2120
2114 /* clear interrupt */ 2121 /* clear interrupt */
2115 offset = gk20a_gr_gpc_offset(g, gpc) + 2122 offset = gk20a_gr_gpc_offset(g, gpc) +
@@ -2122,7 +2129,7 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
2122 "ESR %s(0x%x)", 2129 "ESR %s(0x%x)",
2123 "MMU NACK ERROR", 2130 "MMU NACK ERROR",
2124 warp_esr_error); 2131 warp_esr_error);
2125 return 0; 2132 return err;
2126} 2133}
2127 2134
2128static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) 2135static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error)