summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorsujeet baranwal <sbaranwal@nvidia.com>2015-08-20 20:04:44 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-12-04 16:03:11 -0500
commit397c6d44ed3ee6cc0c24fce7711bda4f0d6cd9bf (patch)
tree4cf5477fd29605022291239e4f060a8030f793d0 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent71c8d62657db7ef40a30b7504632d668f4e64bc6 (diff)
gpu: nvgpu: Wait for pause for SMs
SM locking & register reads Order has been changed. Also, functions have been implemented based on gk20a and gm20b. Change-Id: Iaf720d088130f84c4b2ca318d9860194c07966e1 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Signed-off-by: ashutosh jain <ashutoshj@nvidia.com> Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/837236
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c59
1 files changed, 52 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 090f95a5..ef24e078 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6973,8 +6973,8 @@ static u32 gr_gk20a_get_tpc_num(u32 addr)
6973static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, 6973static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
6974 u32 global_esr_mask, bool check_errors) 6974 u32 global_esr_mask, bool check_errors)
6975{ 6975{
6976 unsigned long end_jiffies = jiffies + 6976 bool locked_down;
6977 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); 6977 bool no_error_pending;
6978 u32 delay = GR_IDLE_CHECK_DEFAULT; 6978 u32 delay = GR_IDLE_CHECK_DEFAULT;
6979 bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); 6979 bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
6980 u32 offset = 6980 u32 offset =
@@ -6991,10 +6991,10 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
6991 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); 6991 gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
6992 u32 dbgr_status0 = gk20a_readl(g, 6992 u32 dbgr_status0 = gk20a_readl(g,
6993 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); 6993 gr_gpc0_tpc0_sm_dbgr_status0_r() + offset);
6994 bool locked_down = 6994 locked_down =
6995 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == 6995 (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) ==
6996 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); 6996 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v());
6997 bool no_error_pending = 6997 no_error_pending =
6998 check_errors && 6998 check_errors &&
6999 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == 6999 (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) ==
7000 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && 7000 gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) &&
@@ -7018,9 +7018,7 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7018 7018
7019 usleep_range(delay, delay * 2); 7019 usleep_range(delay, delay * 2);
7020 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); 7020 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
7021 7021 } while (!locked_down);
7022 } while (time_before(jiffies, end_jiffies)
7023 || !tegra_platform_is_silicon());
7024 7022
7025 gk20a_err(dev_from_gk20a(g), 7023 gk20a_err(dev_from_gk20a(g),
7026 "GPC%d TPC%d: timed out while trying to lock down SM", 7024 "GPC%d TPC%d: timed out while trying to lock down SM",
@@ -7273,6 +7271,52 @@ static void gr_gk20a_init_cyclestats(struct gk20a *g)
7273#endif 7271#endif
7274} 7272}
7275 7273
7274void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
7275{
7276 /* Check if we have at least one valid warp */
7277 struct gr_gk20a *gr = &g->gr;
7278 u32 gpc, tpc, sm_id;
7279 u32 tpc_offset, gpc_offset, reg_offset;
7280 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
7281
7282 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
7283 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
7284 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
7285
7286 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
7287 gpc_offset = proj_gpc_stride_v() * gpc;
7288 reg_offset = tpc_offset + gpc_offset;
7289
7290 /* 64 bit read */
7291 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
7292 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
7293
7294
7295 /* 64 bit read */
7296 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
7297 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
7298
7299 /* 64 bit read */
7300 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
7301 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
7302
7303 w_state[sm_id].valid_warps[0] = warps_valid;
7304 w_state[sm_id].trapped_warps[0] = warps_trapped;
7305 w_state[sm_id].paused_warps[0] = warps_paused;
7306 }
7307
7308 /* Only for debug purpose */
7309 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
7310 gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
7311 sm_id, w_state[sm_id].valid_warps[0]);
7312 gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
7313 sm_id, w_state[sm_id].trapped_warps[0]);
7314 gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
7315 sm_id, w_state[sm_id].paused_warps[0]);
7316 }
7317}
7318
7319
7276void gk20a_init_gr_ops(struct gpu_ops *gops) 7320void gk20a_init_gr_ops(struct gpu_ops *gops)
7277{ 7321{
7278 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; 7322 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -7324,4 +7368,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7324 gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; 7368 gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info;
7325 gops->gr.wait_empty = gr_gk20a_wait_idle; 7369 gops->gr.wait_empty = gr_gk20a_wait_idle;
7326 gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; 7370 gops->gr.init_cyclestats = gr_gk20a_init_cyclestats;
7371 gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info;
7327} 7372}