diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2018-01-04 08:02:46 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-01-12 14:05:31 -0500 |
commit | 5b10690479d016dd27d4592d94e7bd12a9277a34 (patch) | |
tree | 663501e778ad0089c1930c7beff8a62dfac34ea6 /drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |
parent | 6170f1eed53b4e6770a6948474ab8a8a5e2621bc (diff) |
gpu: nvgpu: handle SM reported MMU_NACK exception
Upon receiving MMU_FAULT error, MMU will forward MMU_NACK to SM
If MMU_NACK is masked out, SM will simply release the semaphores
And if semaphores are released before MMU fault is handled, user space
could see that operation as successful incorrectly
Fix this by handling SM reported MMU_NACK exception
Enable MMU_NACK reporting in gv11b_gr_set_hww_esr_report_mask
In MMU_NACK handling path, we just set the error notifier and clear
the interrupt so that the User Space sees the error as soon as
semaphores are released by SM
And MMU_FAULT handling path will take care of triggering RC recovery
anyways
Also add necessary h/w accessors for mmu_nack
Bug 2040594
Jira NVGPU-473
Change-Id: Ic925c2d3f3069016c57d177713066c29ab39dc3d
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1631708
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b/gr_gv11b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 44 |
1 files changed, 43 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index baad5e47..d5924169 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -1994,6 +1994,39 @@ void gr_gv11b_get_access_map(struct gk20a *g, | |||
1994 | *num_entries = ARRAY_SIZE(wl_addr_gv11b); | 1994 | *num_entries = ARRAY_SIZE(wl_addr_gv11b); |
1995 | } | 1995 | } |
1996 | 1996 | ||
1997 | static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, | ||
1998 | u32 gpc, u32 tpc, u32 sm, | ||
1999 | u32 warp_esr, | ||
2000 | struct channel_gk20a *fault_ch) | ||
2001 | { | ||
2002 | struct tsg_gk20a *tsg; | ||
2003 | u32 offset; | ||
2004 | |||
2005 | if (fault_ch) { | ||
2006 | tsg = &g->fifo.tsg[fault_ch->tsgid]; | ||
2007 | |||
2008 | /* | ||
2009 | * Upon receiving MMU_FAULT error, MMU will forward MMU_NACK | ||
2010 | * to SM. So MMU_FAULT handling path will take care of | ||
2011 | * triggering RC recovery | ||
2012 | * | ||
2013 | * In MMU_NACK handling path, we just set the error notifier | ||
2014 | * and clear the interrupt so that the User Space sees the error | ||
2015 | * as soon as semaphores are released by SM | ||
2016 | */ | ||
2017 | gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); | ||
2018 | } | ||
2019 | |||
2020 | /* clear interrupt */ | ||
2021 | offset = gk20a_gr_gpc_offset(g, gpc) + | ||
2022 | gk20a_gr_tpc_offset(g, tpc) + | ||
2023 | gv11b_gr_sm_offset(g, sm); | ||
2024 | nvgpu_writel(g, | ||
2025 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); | ||
2026 | |||
2027 | return 0; | ||
2028 | } | ||
2029 | |||
1997 | /* @brief pre-process work on the SM exceptions to determine if we clear them or not. | 2030 | /* @brief pre-process work on the SM exceptions to determine if we clear them or not. |
1998 | * | 2031 | * |
1999 | * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing | 2032 | * On Pascal, if we are in CILP preemtion mode, preempt the channel and handle errors with special processing |
@@ -2013,6 +2046,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, | |||
2013 | *early_exit = false; | 2046 | *early_exit = false; |
2014 | *ignore_debugger = false; | 2047 | *ignore_debugger = false; |
2015 | 2048 | ||
2049 | /* | ||
2050 | * We don't need to trigger CILP in case of MMU_NACK | ||
2051 | * So just handle MMU_NACK and return | ||
2052 | */ | ||
2053 | if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f()) | ||
2054 | return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, | ||
2055 | warp_esr, fault_ch); | ||
2056 | |||
2016 | if (fault_ch) | 2057 | if (fault_ch) |
2017 | cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == | 2058 | cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == |
2018 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); | 2059 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); |
@@ -2992,7 +3033,8 @@ void gv11b_gr_set_hww_esr_report_mask(struct gk20a *g) | |||
2992 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | 3033 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f() | |
2993 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | 3034 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | |
2994 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | | 3035 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f() | |
2995 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()); | 3036 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f() | |
3037 | gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f()); | ||
2996 | 3038 | ||
2997 | /* setup sm global esr report mask. vat_alarm_report is not enabled */ | 3039 | /* setup sm global esr report mask. vat_alarm_report is not enabled */ |
2998 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), | 3040 | gk20a_writel(g, gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r(), |