diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 59 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 54 |
3 files changed, 65 insertions, 50 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 3c668013..536f00e0 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -329,62 +329,21 @@ static int nvgpu_gpu_ioctl_set_debug_mode( | |||
329 | struct gk20a *g, | 329 | struct gk20a *g, |
330 | struct nvgpu_gpu_sm_debug_mode_args *args) | 330 | struct nvgpu_gpu_sm_debug_mode_args *args) |
331 | { | 331 | { |
332 | int gpc, tpc, err = 0; | ||
333 | u32 sm_id, sm_dbgr_ctrl0; | ||
334 | struct channel_gk20a *ch; | 332 | struct channel_gk20a *ch; |
335 | struct nvgpu_dbg_gpu_reg_op ops; | 333 | int err; |
336 | u32 tpc_offset, gpc_offset, reg_offset; | ||
337 | 334 | ||
338 | ch = gk20a_get_channel_from_file(args->channel_fd); | 335 | ch = gk20a_get_channel_from_file(args->channel_fd); |
336 | if (!ch) | ||
337 | return -EINVAL; | ||
339 | 338 | ||
340 | mutex_lock(&g->dbg_sessions_lock); | 339 | mutex_lock(&g->dbg_sessions_lock); |
341 | 340 | if (g->ops.gr.set_sm_debug_mode) | |
342 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | 341 | err = g->ops.gr.set_sm_debug_mode(g, ch, |
343 | if (args->sms & (1 << sm_id)) { | 342 | args->sms, !!args->enable); |
344 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 343 | else |
345 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 344 | err = -ENOSYS; |
346 | |||
347 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
348 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
349 | reg_offset = tpc_offset + gpc_offset; | ||
350 | |||
351 | ops.op = REGOP(READ_32); | ||
352 | ops.type = REGOP(TYPE_GR_CTX); | ||
353 | ops.status = REGOP(STATUS_SUCCESS); | ||
354 | ops.value_hi = 0; | ||
355 | ops.and_n_mask_lo = 0; | ||
356 | ops.and_n_mask_hi = 0; | ||
357 | ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset; | ||
358 | |||
359 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
360 | sm_dbgr_ctrl0 = ops.value_lo; | ||
361 | |||
362 | if (args->enable) { | ||
363 | sm_dbgr_ctrl0 = set_field(sm_dbgr_ctrl0, | ||
364 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(), | ||
365 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_f()); | ||
366 | sm_dbgr_ctrl0 = set_field(sm_dbgr_ctrl0, | ||
367 | gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m(), | ||
368 | gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f()); | ||
369 | sm_dbgr_ctrl0 = set_field(sm_dbgr_ctrl0, | ||
370 | gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m(), | ||
371 | gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f()); | ||
372 | } else { | ||
373 | sm_dbgr_ctrl0 = set_field(sm_dbgr_ctrl0, | ||
374 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(), | ||
375 | gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_f()); | ||
376 | } | ||
377 | |||
378 | if (!err) { | ||
379 | ops.op = REGOP(WRITE_32); | ||
380 | ops.value_lo = sm_dbgr_ctrl0; | ||
381 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
382 | } else | ||
383 | gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); | ||
384 | } | ||
385 | } | ||
386 | |||
387 | mutex_unlock(&g->dbg_sessions_lock); | 345 | mutex_unlock(&g->dbg_sessions_lock); |
346 | |||
388 | return err; | 347 | return err; |
389 | } | 348 | } |
390 | 349 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 6f2258e9..f7b98e39 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -185,6 +185,8 @@ struct gpu_ops { | |||
185 | u32 expect_delay); | 185 | u32 expect_delay); |
186 | void (*init_cyclestats)(struct gk20a *g); | 186 | void (*init_cyclestats)(struct gk20a *g); |
187 | void (*enable_cde_in_fecs)(void *ctx_ptr); | 187 | void (*enable_cde_in_fecs)(void *ctx_ptr); |
188 | int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, | ||
189 | u64 sms, bool enable); | ||
188 | void (*bpt_reg_info)(struct gk20a *g, | 190 | void (*bpt_reg_info)(struct gk20a *g, |
189 | struct warpstate *w_state); | 191 | struct warpstate *w_state); |
190 | void (*get_access_map)(struct gk20a *g, | 192 | void (*get_access_map)(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 30beb962..73adb071 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -7261,6 +7261,59 @@ static void gr_gk20a_init_cyclestats(struct gk20a *g) | |||
7261 | #endif | 7261 | #endif |
7262 | } | 7262 | } |
7263 | 7263 | ||
7264 | static int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | ||
7265 | struct channel_gk20a *ch, u64 sms, bool enable) | ||
7266 | { | ||
7267 | struct nvgpu_dbg_gpu_reg_op *ops; | ||
7268 | int i = 0, sm_id, err; | ||
7269 | |||
7270 | ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL); | ||
7271 | if (!ops) | ||
7272 | return -ENOMEM; | ||
7273 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | ||
7274 | int gpc, tpc; | ||
7275 | u32 tpc_offset, gpc_offset, reg_offset, reg_mask, reg_val; | ||
7276 | |||
7277 | if (!(sms & (1 << sm_id))) | ||
7278 | continue; | ||
7279 | |||
7280 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
7281 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
7282 | |||
7283 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
7284 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
7285 | reg_offset = tpc_offset + gpc_offset; | ||
7286 | |||
7287 | ops[i].op = REGOP(WRITE_32); | ||
7288 | ops[i].type = REGOP(TYPE_GR_CTX); | ||
7289 | ops[i].offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset; | ||
7290 | |||
7291 | reg_mask = 0; | ||
7292 | reg_val = 0; | ||
7293 | if (enable) { | ||
7294 | reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(); | ||
7295 | reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_f(); | ||
7296 | reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_m(); | ||
7297 | reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(); | ||
7298 | reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_m(); | ||
7299 | reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(); | ||
7300 | } else { | ||
7301 | reg_mask |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_m(); | ||
7302 | reg_val |= gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_f(); | ||
7303 | } | ||
7304 | |||
7305 | ops[i].and_n_mask_lo = reg_mask; | ||
7306 | ops[i].value_lo = reg_val; | ||
7307 | i++; | ||
7308 | } | ||
7309 | |||
7310 | err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0); | ||
7311 | if (err) | ||
7312 | gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); | ||
7313 | kfree(ops); | ||
7314 | return err; | ||
7315 | } | ||
7316 | |||
7264 | static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | 7317 | static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) |
7265 | { | 7318 | { |
7266 | /* Check if we have at least one valid warp */ | 7319 | /* Check if we have at least one valid warp */ |
@@ -7374,6 +7427,7 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7374 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | 7427 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; |
7375 | gops->gr.wait_empty = gr_gk20a_wait_idle; | 7428 | gops->gr.wait_empty = gr_gk20a_wait_idle; |
7376 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; | 7429 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; |
7430 | gops->gr.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode; | ||
7377 | gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info; | 7431 | gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info; |
7378 | gops->gr.get_access_map = gr_gk20a_get_access_map; | 7432 | gops->gr.get_access_map = gr_gk20a_get_access_map; |
7379 | } | 7433 | } |