summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVinod G <vinodg@nvidia.com>2018-05-25 18:44:34 -0400
committerTejal Kudav <tkudav@nvidia.com>2018-06-14 09:44:07 -0400
commit7aded206bc3eb0f36422e9f6f3dab3e065e7e7e4 (patch)
treebe963b37e3ea18151e41c8d83e237255d25c7849
parentc8c686f8554352fc209fda592ec3b490811532aa (diff)
gpu: nvgpu: gv11b: Handle all SM errors
Add the missing register bits to identify the SM errors. Except for mmu_nack error, all other errors are handled using a single function. That function sets the error notifier with GR_EXCEPTION, clears interrupt and triggers recovery process. bug 200402677 JIRA NVGPU-573 Change-Id: Icfaff1f20f1f35adb4cd35ce288ce694845aed3c Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1730963 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c105
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h76
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h68
3 files changed, 234 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index 84699db7..378bdc13 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -2089,7 +2089,7 @@ void gr_gv11b_get_access_map(struct gk20a *g,
2089 2089
2090static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, 2090static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
2091 u32 gpc, u32 tpc, u32 sm, 2091 u32 gpc, u32 tpc, u32 sm,
2092 u32 warp_esr, 2092 u32 warp_esr_error,
2093 struct channel_gk20a *fault_ch) 2093 struct channel_gk20a *fault_ch)
2094{ 2094{
2095 struct tsg_gk20a *tsg; 2095 struct tsg_gk20a *tsg;
@@ -2117,17 +2117,92 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g,
2117 nvgpu_writel(g, 2117 nvgpu_writel(g,
2118 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); 2118 gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0);
2119 2119
2120 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2121 "ESR %s(0x%x)",
2122 "MMU NACK ERROR",
2123 warp_esr_error);
2120 return 0; 2124 return 0;
2121} 2125}
2122 2126
2123static int gr_gv11b_handle_warp_esr_error_misaligned_addr(struct gk20a *g, 2127static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error)
2124 u32 gpc, u32 tpc, u32 sm, 2128{
2125 u32 warp_esr, 2129 u32 index = 0U;
2126 struct channel_gk20a *fault_ch) 2130 u32 esr_err = gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f();
2131
2132 struct warp_esr_error_table_s {
2133 u32 error_value;
2134 const char *error_name;
2135 };
2136
2137 struct warp_esr_error_table_s warp_esr_error_table[] = {
2138 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(),
2139 "STACK ERROR"},
2140 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(),
2141 "API STACK ERROR"},
2142 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(),
2143 "PC WRAP ERROR"},
2144 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(),
2145 "MISALIGNED PC ERROR"},
2146 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(),
2147 "PC OVERFLOW ERROR"},
2148 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(),
2149 "MISALIGNED REG ERROR"},
2150 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(),
2151 "ILLEGAL INSTRUCTION ENCODING ERROR"},
2152 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(),
2153 "ILLEGAL INSTRUCTION PARAM ERROR"},
2154 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(),
2155 "OOR REG ERROR"},
2156 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(),
2157 "OOR ADDR ERROR"},
2158 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(),
2159 "MISALIGNED ADDR ERROR"},
2160 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(),
2161 "INVALID ADDR SPACE ERROR"},
2162 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(),
2163 "INVALID ADDR LDC ERROR"},
2164 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(),
2165 "STACK OVERFLOW ERROR"},
2166 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(),
2167 "MMU FAULT ERROR"},
2168 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(),
2169 "TEX FORMAT ERROR"},
2170 { gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(),
2171 "TEX LAYOUT ERROR"},
2172 };
2173
2174 for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) {
2175 if (warp_esr_error_table[index].error_value == warp_esr_error) {
2176 esr_err = warp_esr_error_table[index].error_value;
2177 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2178 "ESR %s(0x%x)",
2179 warp_esr_error_table[index].error_name,
2180 esr_err);
2181 break;
2182 }
2183 }
2184
2185 return (esr_err == 0U) ? false : true;
2186}
2187static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g,
2188 u32 gpc, u32 tpc, u32 sm,
2189 u32 warp_esr_error,
2190 struct channel_gk20a *fault_ch)
2127{ 2191{
2128 struct tsg_gk20a *tsg; 2192 struct tsg_gk20a *tsg;
2129 u32 offset;
2130 struct channel_gk20a *ch_tsg; 2193 struct channel_gk20a *ch_tsg;
2194 u32 offset = 0U;
2195 bool is_esr_error = false;
2196
2197 /*
2198 * Check for an esr error
2199 */
2200 is_esr_error = gr_gv11b_check_warp_esr_error(g, warp_esr_error);
2201 if (!is_esr_error) {
2202 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
2203 "No ESR error, Skip RC recovery and Trigeer CILP");
2204 return 0;
2205 }
2131 2206
2132 if (fault_ch) { 2207 if (fault_ch) {
2133 tsg = &g->fifo.tsg[fault_ch->tsgid]; 2208 tsg = &g->fifo.tsg[fault_ch->tsgid];
@@ -2170,8 +2245,10 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2170 u32 offset = gk20a_gr_gpc_offset(g, gpc) + 2245 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
2171 gk20a_gr_tpc_offset(g, tpc) + 2246 gk20a_gr_tpc_offset(g, tpc) +
2172 gv11b_gr_sm_offset(g, sm); 2247 gv11b_gr_sm_offset(g, sm);
2248 u32 warp_esr_error = gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr);
2173 struct tsg_gk20a *tsg; 2249 struct tsg_gk20a *tsg;
2174 2250
2251
2175 *early_exit = false; 2252 *early_exit = false;
2176 *ignore_debugger = false; 2253 *ignore_debugger = false;
2177 2254
@@ -2179,13 +2256,19 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2179 * We don't need to trigger CILP in case of MMU_NACK 2256 * We don't need to trigger CILP in case of MMU_NACK
2180 * So just handle MMU_NACK and return 2257 * So just handle MMU_NACK and return
2181 */ 2258 */
2182 if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f()) 2259 if (warp_esr_error == gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f())
2183 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, 2260 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
2184 warp_esr, fault_ch); 2261 warp_esr_error, fault_ch);
2185 2262
2186 if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f()) 2263 /*
2187 return gr_gv11b_handle_warp_esr_error_misaligned_addr(g, gpc, tpc, sm, 2264 * Proceed to trigger CILP preemption if the return value
2188 warp_esr, fault_ch); 2265 * from this function is zero, else proceed to recovery
2266 */
2267 ret = gr_gv11b_handle_all_warp_esr_errors(g, gpc, tpc, sm,
2268 warp_esr_error, fault_ch);
2269 if (ret) {
2270 return ret;
2271 }
2189 2272
2190 if (fault_ch) { 2273 if (fault_ch) {
2191 tsg = tsg_gk20a_from_ch(fault_ch); 2274 tsg = tsg_gk20a_from_ch(fault_ch);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
index 29fd9a6f..0f83d6ba 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -3632,17 +3632,81 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_v(void)
3632{ 3632{
3633 return 0x00000000U; 3633 return 0x00000000U;
3634} 3634}
3635static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
3636{
3637 return 0x0U;
3638}
3639static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(void)
3640{
3641 return 0x1U;
3642}
3643static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(void)
3644{
3645 return 0x2U;
3646}
3647static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(void)
3648{
3649 return 0x4U;
3650}
3651static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(void)
3652{
3653 return 0x5U;
3654}
3655static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(void)
3656{
3657 return 0x6U;
3658}
3659static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(void)
3660{
3661 return 0x8U;
3662}
3663static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(void)
3664{
3665 return 0x9U;
3666}
3667static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(void)
3668{
3669 return 0xbU;
3670}
3671static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(void)
3672{
3673 return 0xdU;
3674}
3675static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(void)
3676{
3677 return 0xeU;
3678}
3635static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void) 3679static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void)
3636{ 3680{
3637 return 0xfU; 3681 return 0xfU;
3638} 3682}
3639static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void) 3683static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(void)
3640{ 3684{
3641 return 0x20U; 3685 return 0x10U;
3642} 3686}
3643static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void) 3687static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(void)
3644{ 3688{
3645 return 0x0U; 3689 return 0x12U;
3690}
3691static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(void)
3692{
3693 return 0x16U;
3694}
3695static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(void)
3696{
3697 return 0x17U;
3698}
3699static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(void)
3700{
3701 return 0x18U;
3702}
3703static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(void)
3704{
3705 return 0x19U;
3706}
3707static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
3708{
3709 return 0x20U;
3646} 3710}
3647static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m(void) 3711static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_wrap_id_m(void)
3648{ 3712{
@@ -3672,6 +3736,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r(void)
3672{ 3736{
3673 return 0x00504738U; 3737 return 0x00504738U;
3674} 3738}
3739static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r(void)
3740{
3741 return 0x0050473cU;
3742}
3675static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) 3743static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
3676{ 3744{
3677 return 0x005043a0U; 3745 return 0x005043a0U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 17c7e77d..5de691a2 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -4392,10 +4392,74 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(void)
4392{ 4392{
4393 return 0x0U; 4393 return 0x0U;
4394} 4394}
4395static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(void)
4396{
4397 return 0x1U;
4398}
4399static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(void)
4400{
4401 return 0x2U;
4402}
4403static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(void)
4404{
4405 return 0x4U;
4406}
4407static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(void)
4408{
4409 return 0x5U;
4410}
4411static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(void)
4412{
4413 return 0x6U;
4414}
4415static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(void)
4416{
4417 return 0x8U;
4418}
4419static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(void)
4420{
4421 return 0x9U;
4422}
4423static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(void)
4424{
4425 return 0xbU;
4426}
4427static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(void)
4428{
4429 return 0xdU;
4430}
4431static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(void)
4432{
4433 return 0xeU;
4434}
4395static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void) 4435static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(void)
4396{ 4436{
4397 return 0xfU; 4437 return 0xfU;
4398} 4438}
4439static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(void)
4440{
4441 return 0x10U;
4442}
4443static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(void)
4444{
4445 return 0x12U;
4446}
4447static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(void)
4448{
4449 return 0x16U;
4450}
4451static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(void)
4452{
4453 return 0x17U;
4454}
4455static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(void)
4456{
4457 return 0x18U;
4458}
4459static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(void)
4460{
4461 return 0x19U;
4462}
4399static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void) 4463static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(void)
4400{ 4464{
4401 return 0x20U; 4465 return 0x20U;
@@ -4428,6 +4492,10 @@ static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r(void)
4428{ 4492{
4429 return 0x00504738U; 4493 return 0x00504738U;
4430} 4494}
4495static inline u32 gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r(void)
4496{
4497 return 0x0050473cU;
4498}
4431static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void) 4499static inline u32 gr_gpc0_tpc0_sm_halfctl_ctrl_r(void)
4432{ 4500{
4433 return 0x005043a0U; 4501 return 0x005043a0U;