summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-01-04 08:02:46 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-12 14:05:31 -0500
commit5b10690479d016dd27d4592d94e7bd12a9277a34 (patch)
tree663501e778ad0089c1930c7beff8a62dfac34ea6 /drivers/gpu/nvgpu/gv11b/gr_gv11b.c
parent6170f1eed53b4e6770a6948474ab8a8a5e2621bc (diff)
gpu: nvgpu: handle SM reported MMU_NACK exception
Upon receiving MMU_FAULT error, MMU will forward MMU_NACK to SM If MMU_NACK is masked out, SM will simply release the semaphores And if semaphores are released before MMU fault is handled, user space could see that operation as successful incorrectly Fix this by handling SM reported MMU_NACK exception Enable MMU_NACK reporting in gv11b_gr_set_hww_esr_report_mask In MMU_NACK handling path, we just set the error notifier and clear the interrupt so that the User Space sees the error as soon as semaphores are released by SM And MMU_FAULT handling path will take care of triggering RC recovery anyways Also add necessary h/w accessors for mmu_nack Bug 2040594 Jira NVGPU-473 Change-Id: Ic925c2d3f3069016c57d177713066c29ab39dc3d Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1631708 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c44
1 files changed, 43 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index baad5e47..d5924169 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1994,6 +1994,39 @@ void gr_gv11b_get_access_map(struct gk20a *g,
1994 *num_entries = ARRAY_SIZE(wl_addr_gv11b); 1994 *num_entries = ARRAY_SIZE(wl_addr_gv11b);
1995} 1995}
1996 1996
1997static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
1998 u32 gpc, u32 tpc, u32 sm,
1999 u32 warp_esr,
2000 struct channel_gk20a *fault_ch)
2001{
2002 struct tsg_gk20a *tsg;
2003 u32 offset;
2004
2005 if (fault_ch) {
2006 tsg = &g->fifo.tsg[fault_ch->tsgid];
2007
2008 /*
2009 * Upon receiving MMU_FAULT error, MMU will forward MMU_NACK
2010 * to SM. So MMU_FAULT handling path will take care of
2011 * triggering RC recovery
2012 *
2013 * In MMU_NACK handling path, we just set the error notifier
2014 * and clear the interrupt so that the User Space sees the error
2015 * as soon as semaphores are released by SM
2016 */
2017 gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
2018 }
2019
2020 /* clear interrupt */
2021 offset = gk20a_gr_gpc_offset(g, gpc) +
2022 gk20a_gr_tpc_offset(g, tpc) +
2023 gv11b_gr_sm_offset(g, sm);
2024 nvgpu_writel(g,
2025 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
2026
2027 return 0;
2028}
2029
1997/* @brief pre-process work on the SM exceptions to determine if we clear them or not. 2030/* @brief pre-process work on the SM exceptions to determine if we clear them or not.
1998 * 2031 *
1999 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing 2032 * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing
@@ -2013,6 +2046,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2013 *early_exit = false; 2046 *early_exit = false;
2014 *ignore_debugger = false; 2047 *ignore_debugger = false;
2015 2048
2049 /*
2050 * We don't need to trigger CILP in case of MMU_NACK
2051 * So just handle MMU_NACK and return
2052 */
2053 if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f())
2054 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
2055 warp_esr, fault_ch);
2056
2016 if (fault_ch) 2057 if (fault_ch)
2017 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == 2058 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode ==
2018 NVGPU_PREEMPTION_MODE_COMPUTE_CILP); 2059 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
@@ -2992,7 +3033,8 @@ void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g)
2992 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | 3033 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() |
2993 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | 3034 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() |
2994 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | 3035 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() |
2995 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()); 3036 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f() |
3037 gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f());
2996 3038
2997 /* setup sm global esr report mask. vat_alarm_report is not enabled */ 3039 /* setup sm global esr report mask. vat_alarm_report is not enabled */
2998 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), 3040 gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(),