From 5b10690479d016dd27d4592d94e7bd12a9277a34 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 4 Jan 2018 05:02:46 -0800 Subject: gpu: nvgpu: handle SM reported MMU_NACK exception Upon receiving MMU_FAULT error, MMU will forward MMU_NACK to SM If MMU_NACK is masked out, SM will simply release the semaphores And if semaphores are released before MMU fault is handled, user space could see that operation as successful incorrectly Fix this by handling SM reported MMU_NACK exception Enable MMU_NACK reporting in gv11b_gr_set_hww_esr_report_mask In MMU_NACK handling path, we just set the error notifier and clear the interrupt so that the User Space sees the error as soon as semaphores are released by SM And MMU_FAULT handling path will take care of triggering RC recovery anyways Also add necessary h/w accessors for mmu_nack Bug 2040594 Jira NVGPU-473 Change-Id: Ic925c2d3f3069016c57d177713066c29ab39dc3d Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1631708 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 44 +++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index baad5e47..d5924169 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1994,6 +1994,39 @@ void gr_gv11b_get_access_map(struct gk20a *g, *num_entries = ARRAY_SIZE(wl_addr_gv11b); } +static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, + u32 gpc, u32 tpc, u32 sm, + u32 warp_esr, + struct channel_gk20a *fault_ch) +{ + struct tsg_gk20a *tsg; + u32 offset; + + if (fault_ch) { + tsg = &g->fifo.tsg[fault_ch->tsgid]; + + /* + * Upon receiving MMU_FAULT error, MMU will forward MMU_NACK + * to SM. So MMU_FAULT handling path will take care of + * triggering RC recovery + * + * In MMU_NACK handling path, we just set the error notifier + * and clear the interrupt so that the User Space sees the error + * as soon as semaphores are released by SM + */ + gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); + } + + /* clear interrupt */ + offset = gk20a_gr_gpc_offset(g, gpc) + + gk20a_gr_tpc_offset(g, tpc) + + gv11b_gr_sm_offset(g, sm); + nvgpu_writel(g, + gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); + + return 0; +} + /* @brief pre-process work on the SM exceptions to determine if we clear them or not. * * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing @@ -2013,6 +2046,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, *early_exit = false; *ignore_debugger = false; + /* + * We don't need to trigger CILP in case of MMU_NACK + * So just handle MMU_NACK and return + */ + if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f()) + return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, + warp_esr, fault_ch); + if (fault_ch) cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP); @@ -2992,7 +3033,8 @@ void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | - gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()); + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f() | + gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f()); /* setup sm global esr report mask. vat_alarm_report is not enabled */ gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), -- cgit v1.2.2