diff options
author | Vinod G <vinodg@nvidia.com> | 2018-05-25 18:44:34 -0400 |
---|---|---|
committer | Tejal Kudav <tkudav@nvidia.com> | 2018-06-14 09:44:07 -0400 |
commit | 7aded206bc3eb0f36422e9f6f3dab3e065e7e7e4 (patch) | |
tree | be963b37e3ea18151e41c8d83e237255d25c7849 /drivers/gpu/nvgpu/gv11b | |
parent | c8c686f8554352fc209fda592ec3b490811532aa (diff) |
gpu: nvgpu: gv11b: Handle all SM errors
Add the missing register bits to identify the
SM errors.
Except for mmu_nack error, all other errors are
handled using a single function.
That function sets the error notifier with GR_EXCEPTION,
clears interrupt and triggers recovery process.
bug 200402677
JIRA NVGPU-573
Change-Id: Icfaff1f20f1f35adb4cd35ce288ce694845aed3c
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1730963
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 105 |
1 files changed, 94 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 84699db7..378bdc13 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -2089,7 +2089,7 @@ void gr_gv11b_get_access_map(struct gk20a *g, | |||
2089 | 2089 | ||
2090 | static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, | 2090 | static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, |
2091 | u32 gpc, u32 tpc, u32 sm, | 2091 | u32 gpc, u32 tpc, u32 sm, |
2092 | u32 warp_esr, | 2092 | u32 warp_esr_error, |
2093 | struct channel_gk20a *fault_ch) | 2093 | struct channel_gk20a *fault_ch) |
2094 | { | 2094 | { |
2095 | struct tsg_gk20a *tsg; | 2095 | struct tsg_gk20a *tsg; |
@@ -2117,17 +2117,92 @@ static int gr_gv11b_handle_warp_esr_error_mmu_nack(struct gk20a *g, | |||
2117 | nvgpu_writel(g, | 2117 | nvgpu_writel(g, |
2118 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); | 2118 | gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset, 0); |
2119 | 2119 | ||
2120 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2121 | "ESR %s(0x%x)", | ||
2122 | "MMU NACK ERROR", | ||
2123 | warp_esr_error); | ||
2120 | return 0; | 2124 | return 0; |
2121 | } | 2125 | } |
2122 | 2126 | ||
2123 | static int gr_gv11b_handle_warp_esr_error_misaligned_addr(struct gk20a *g, | 2127 | static bool gr_gv11b_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error) |
2124 | u32 gpc, u32 tpc, u32 sm, | 2128 | { |
2125 | u32 warp_esr, | 2129 | u32 index = 0U; |
2126 | struct channel_gk20a *fault_ch) | 2130 | u32 esr_err = gr_gpc0_tpc0_sm0_hww_warp_esr_error_none_f(); |
2131 | |||
2132 | struct warp_esr_error_table_s { | ||
2133 | u32 error_value; | ||
2134 | const char *error_name; | ||
2135 | }; | ||
2136 | |||
2137 | struct warp_esr_error_table_s warp_esr_error_table[] = { | ||
2138 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_error_f(), | ||
2139 | "STACK ERROR"}, | ||
2140 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(), | ||
2141 | "API STACK ERROR"}, | ||
2142 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_wrap_f(), | ||
2143 | "PC WRAP ERROR"}, | ||
2144 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(), | ||
2145 | "MISALIGNED PC ERROR"}, | ||
2146 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(), | ||
2147 | "PC OVERFLOW ERROR"}, | ||
2148 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(), | ||
2149 | "MISALIGNED REG ERROR"}, | ||
2150 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(), | ||
2151 | "ILLEGAL INSTRUCTION ENCODING ERROR"}, | ||
2152 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(), | ||
2153 | "ILLEGAL INSTRUCTION PARAM ERROR"}, | ||
2154 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(), | ||
2155 | "OOR REG ERROR"}, | ||
2156 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(), | ||
2157 | "OOR ADDR ERROR"}, | ||
2158 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(), | ||
2159 | "MISALIGNED ADDR ERROR"}, | ||
2160 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(), | ||
2161 | "INVALID ADDR SPACE ERROR"}, | ||
2162 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(), | ||
2163 | "INVALID ADDR LDC ERROR"}, | ||
2164 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_stack_overflow_f(), | ||
2165 | "STACK OVERFLOW ERROR"}, | ||
2166 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(), | ||
2167 | "MMU FAULT ERROR"}, | ||
2168 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(), | ||
2169 | "TEX FORMAT ERROR"}, | ||
2170 | { gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(), | ||
2171 | "TEX LAYOUT ERROR"}, | ||
2172 | }; | ||
2173 | |||
2174 | for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) { | ||
2175 | if (warp_esr_error_table[index].error_value == warp_esr_error) { | ||
2176 | esr_err = warp_esr_error_table[index].error_value; | ||
2177 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2178 | "ESR %s(0x%x)", | ||
2179 | warp_esr_error_table[index].error_name, | ||
2180 | esr_err); | ||
2181 | break; | ||
2182 | } | ||
2183 | } | ||
2184 | |||
2185 | return (esr_err == 0U) ? false : true; | ||
2186 | } | ||
2187 | static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, | ||
2188 | u32 gpc, u32 tpc, u32 sm, | ||
2189 | u32 warp_esr_error, | ||
2190 | struct channel_gk20a *fault_ch) | ||
2127 | { | 2191 | { |
2128 | struct tsg_gk20a *tsg; | 2192 | struct tsg_gk20a *tsg; |
2129 | u32 offset; | ||
2130 | struct channel_gk20a *ch_tsg; | 2193 | struct channel_gk20a *ch_tsg; |
2194 | u32 offset = 0U; | ||
2195 | bool is_esr_error = false; | ||
2196 | |||
2197 | /* | ||
2198 | * Check for an esr error | ||
2199 | */ | ||
2200 | is_esr_error = gr_gv11b_check_warp_esr_error(g, warp_esr_error); | ||
2201 | if (!is_esr_error) { | ||
2202 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, | ||
2203 | "No ESR error, Skip RC recovery and Trigeer CILP"); | ||
2204 | return 0; | ||
2205 | } | ||
2131 | 2206 | ||
2132 | if (fault_ch) { | 2207 | if (fault_ch) { |
2133 | tsg = &g->fifo.tsg[fault_ch->tsgid]; | 2208 | tsg = &g->fifo.tsg[fault_ch->tsgid]; |
@@ -2170,8 +2245,10 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, | |||
2170 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | 2245 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + |
2171 | gk20a_gr_tpc_offset(g, tpc) + | 2246 | gk20a_gr_tpc_offset(g, tpc) + |
2172 | gv11b_gr_sm_offset(g, sm); | 2247 | gv11b_gr_sm_offset(g, sm); |
2248 | u32 warp_esr_error = gr_gpc0_tpc0_sm0_hww_warp_esr_error_v(warp_esr); | ||
2173 | struct tsg_gk20a *tsg; | 2249 | struct tsg_gk20a *tsg; |
2174 | 2250 | ||
2251 | |||
2175 | *early_exit = false; | 2252 | *early_exit = false; |
2176 | *ignore_debugger = false; | 2253 | *ignore_debugger = false; |
2177 | 2254 | ||
@@ -2179,13 +2256,19 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, | |||
2179 | * We don't need to trigger CILP in case of MMU_NACK | 2256 | * We don't need to trigger CILP in case of MMU_NACK |
2180 | * So just handle MMU_NACK and return | 2257 | * So just handle MMU_NACK and return |
2181 | */ | 2258 | */ |
2182 | if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f()) | 2259 | if (warp_esr_error == gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f()) |
2183 | return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, | 2260 | return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, |
2184 | warp_esr, fault_ch); | 2261 | warp_esr_error, fault_ch); |
2185 | 2262 | ||
2186 | if (warp_esr & gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f()) | 2263 | /* |
2187 | return gr_gv11b_handle_warp_esr_error_misaligned_addr(g, gpc, tpc, sm, | 2264 | * Proceed to trigger CILP preemption if the return value |
2188 | warp_esr, fault_ch); | 2265 | * from this function is zero, else proceed to recovery |
2266 | */ | ||
2267 | ret = gr_gv11b_handle_all_warp_esr_errors(g, gpc, tpc, sm, | ||
2268 | warp_esr_error, fault_ch); | ||
2269 | if (ret) { | ||
2270 | return ret; | ||
2271 | } | ||
2189 | 2272 | ||
2190 | if (fault_ch) { | 2273 | if (fault_ch) { |
2191 | tsg = tsg_gk20a_from_ch(fault_ch); | 2274 | tsg = tsg_gk20a_from_ch(fault_ch); |