summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-12-24 08:11:15 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-19 11:07:22 -0400
commitc651adbeaacf063b856ef8126b74661b54066477 (patch)
treec402810943925ae5fa4ed824e33943259efc74b0 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent04e45bc943e9703c26f229dfbe558d94418acbe1 (diff)
gpu; nvgpu: IOCTL to write/clear SM error states
Add below IOCTLs to write/clear SM error states NVGPU_DBG_GPU_IOCTL_CLEAR_SINGLE_SM_ERROR_STATE NVGPU_DBG_GPU_IOCTL_WRITE_SINGLE_SM_ERROR_STATE Bug 200156699 Change-Id: I89e3ec51c33b8e131a67d28807d5acf57b3a48fd Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1120330 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c107
1 files changed, 107 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index c0a25e68..4c88751e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -5535,6 +5535,111 @@ static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc)
5535 return 0; 5535 return 0;
5536} 5536}
5537 5537
5538static int gk20a_gr_update_sm_error_state(struct gk20a *g,
5539 struct channel_gk20a *ch, u32 sm_id,
5540 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state)
5541{
5542 u32 gpc, tpc, offset;
5543 struct gr_gk20a *gr = &g->gr;
5544 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
5545 int err = 0;
5546
5547 mutex_lock(&g->dbg_sessions_lock);
5548
5549 gr->sm_error_states[sm_id].hww_global_esr =
5550 sm_error_state->hww_global_esr;
5551 gr->sm_error_states[sm_id].hww_warp_esr =
5552 sm_error_state->hww_warp_esr;
5553 gr->sm_error_states[sm_id].hww_global_esr_report_mask =
5554 sm_error_state->hww_global_esr_report_mask;
5555 gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
5556 sm_error_state->hww_warp_esr_report_mask;
5557
5558 err = gr_gk20a_disable_ctxsw(g);
5559 if (err) {
5560 gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw\n");
5561 goto fail;
5562 }
5563
5564 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
5565 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
5566
5567 offset = proj_gpc_stride_v() * gpc +
5568 proj_tpc_in_gpc_stride_v() * tpc;
5569
5570 if (gk20a_is_channel_ctx_resident(ch)) {
5571 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
5572 gr->sm_error_states[sm_id].hww_global_esr);
5573 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
5574 gr->sm_error_states[sm_id].hww_warp_esr);
5575 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
5576 gr->sm_error_states[sm_id].hww_global_esr_report_mask);
5577 gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
5578 gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
5579 } else {
5580 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
5581 if (err)
5582 goto enable_ctxsw;
5583
5584 gr_gk20a_ctx_patch_write(g, ch_ctx,
5585 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
5586 gr->sm_error_states[sm_id].hww_global_esr_report_mask,
5587 true);
5588 gr_gk20a_ctx_patch_write(g, ch_ctx,
5589 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
5590 gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
5591 true);
5592
5593 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
5594 }
5595
5596enable_ctxsw:
5597 err = gr_gk20a_enable_ctxsw(g);
5598
5599fail:
5600 mutex_unlock(&g->dbg_sessions_lock);
5601 return err;
5602}
5603
5604static int gk20a_gr_clear_sm_error_state(struct gk20a *g,
5605 struct channel_gk20a *ch, u32 sm_id)
5606{
5607 u32 gpc, tpc, offset;
5608 u32 val;
5609 struct gr_gk20a *gr = &g->gr;
5610 int err = 0;
5611
5612 mutex_lock(&g->dbg_sessions_lock);
5613
5614 memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
5615
5616 err = gr_gk20a_disable_ctxsw(g);
5617 if (err) {
5618 gk20a_err(dev_from_gk20a(g), "unable to stop gr ctxsw\n");
5619 goto fail;
5620 }
5621
5622 if (gk20a_is_channel_ctx_resident(ch)) {
5623 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
5624 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
5625
5626 offset = proj_gpc_stride_v() * gpc +
5627 proj_tpc_in_gpc_stride_v() * tpc;
5628
5629 val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
5630 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
5631 val);
5632 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
5633 0);
5634 }
5635
5636 err = gr_gk20a_enable_ctxsw(g);
5637
5638fail:
5639 mutex_unlock(&g->dbg_sessions_lock);
5640 return err;
5641}
5642
5538int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, 5643int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
5539 bool *post_event, struct channel_gk20a *fault_ch) 5644 bool *post_event, struct channel_gk20a *fault_ch)
5540{ 5645{
@@ -8415,4 +8520,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
8415 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; 8520 gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode;
8416 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; 8521 gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode;
8417 gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state; 8522 gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state;
8523 gops->gr.update_sm_error_state = gk20a_gr_update_sm_error_state;
8524 gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state;
8418} 8525}