summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-12-24 08:11:15 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-19 11:07:22 -0400
commitc651adbeaacf063b856ef8126b74661b54066477 (patch)
treec402810943925ae5fa4ed824e33943259efc74b0 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent04e45bc943e9703c26f229dfbe558d94418acbe1 (diff)
gpu; nvgpu: IOCTL to write/clear SM error states
Add below IOCTLs to write/clear SM error states NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE Bug 200156699 Change-Id: I89e3ec51c33b8e131a67d28807d5acf57b3a48fd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1120330 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c111
1 files changed, 111 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index eeb70d76..204a90f3 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1219,6 +1219,115 @@ static int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
1219 return 0; 1219 return 0;
1220} 1220}
1221 1221
1222static int gm20b_gr_update_sm_error_state(struct gk20a *g,
1223 struct channel_gk20a *ch, u32 sm_id,
1224 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state)
1225{
1226 u32 gpc, tpc, offset;
1227 struct gr_gk20a *gr = &g->gr;
1228 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
1229 int err = 0;
1230
1231 mutex_lock(&g->dbg_sessions_lock);
1232
1233 gr->sm_error_states[sm_id].hww_global_esr =
1234 sm_error_state->hww_global_esr;
1235 gr->sm_error_states[sm_id].hww_warp_esr =
1236 sm_error_state->hww_warp_esr;
1237 gr->sm_error_states[sm_id].hww_warp_esr_pc =
1238 sm_error_state->hww_warp_esr_pc;
1239 gr->sm_error_states[sm_id].hww_global_esr_report_mask =
1240 sm_error_state->hww_global_esr_report_mask;
1241 gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
1242 sm_error_state->hww_warp_esr_report_mask;
1243
1244 err = gr_gk20a_disable_ctxsw(g);
1245 if (err) {
1246 gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw\n");
1247 goto fail;
1248 }
1249
1250 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1251 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1252
1253 offset = proj_gpc_stride_v() * gpc +
1254 proj_tpc_in_gpc_stride_v() * tpc;
1255
1256 if (gk20a_is_channel_ctx_resident(ch)) {
1257 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1258 gr->sm_error_states[sm_id].hww_global_esr);
1259 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
1260 gr->sm_error_states[sm_id].hww_warp_esr);
1261 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
1262 gr->sm_error_states[sm_id].hww_warp_esr_pc);
1263 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
1264 gr->sm_error_states[sm_id].hww_global_esr_report_mask);
1265 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1266 gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
1267 } else {
1268 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
1269 if (err)
1270 goto enable_ctxsw;
1271
1272 gr_gk20a_ctx_patch_write(g, ch_ctx,
1273 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
1274 gr->sm_error_states[sm_id].hww_global_esr_report_mask,
1275 true);
1276 gr_gk20a_ctx_patch_write(g, ch_ctx,
1277 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
1278 gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
1279 true);
1280
1281 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
1282 }
1283
1284enable_ctxsw:
1285 err = gr_gk20a_enable_ctxsw(g);
1286
1287fail:
1288 mutex_unlock(&g->dbg_sessions_lock);
1289 return err;
1290}
1291
1292static int gm20b_gr_clear_sm_error_state(struct gk20a *g,
1293 struct channel_gk20a *ch, u32 sm_id)
1294{
1295 u32 gpc, tpc, offset;
1296 u32 val;
1297 struct gr_gk20a *gr = &g->gr;
1298 int err = 0;
1299
1300 mutex_lock(&g->dbg_sessions_lock);
1301
1302 memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
1303
1304 err = gr_gk20a_disable_ctxsw(g);
1305 if (err) {
1306 gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw\n");
1307 goto fail;
1308 }
1309
1310 if (gk20a_is_channel_ctx_resident(ch)) {
1311 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1312 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1313
1314 offset = proj_gpc_stride_v() * gpc +
1315 proj_tpc_in_gpc_stride_v() * tpc;
1316
1317 val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
1318 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
1319 val);
1320 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
1321 0);
1322 }
1323
1324 err = gr_gk20a_enable_ctxsw(g);
1325
1326fail:
1327 mutex_unlock(&g->dbg_sessions_lock);
1328 return err;
1329}
1330
1222void gm20b_init_gr(struct gpu_ops *gops) 1331void gm20b_init_gr(struct gpu_ops *gops)
1223{ 1332{
1224 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1333 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1286,4 +1395,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
1286 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; 1395 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
1287 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; 1396 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
1288 gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state; 1397 gops->gr.record_sm_error_state = gm20b_gr_record_sm_error_state;
1398 gops->gr.update_sm_error_state = gm20b_gr_update_sm_error_state;
1399 gops->gr.clear_sm_error_state = gm20b_gr_clear_sm_error_state;
1289} 1400}