diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 59 |
1 files changed, 52 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 090f95a5..ef24e078 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6973,8 +6973,8 @@ static u32 gr_gk20a_get_tpc_num(u32 addr) | |||
6973 | static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | 6973 | static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, |
6974 | u32 global_esr_mask, bool check_errors) | 6974 | u32 global_esr_mask, bool check_errors) |
6975 | { | 6975 | { |
6976 | unsigned long end_jiffies = jiffies + | 6976 | bool locked_down; |
6977 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | 6977 | bool no_error_pending; |
6978 | u32 delay = GR_IDLE_CHECK_DEFAULT; | 6978 | u32 delay = GR_IDLE_CHECK_DEFAULT; |
6979 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); | 6979 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); |
6980 | u32 offset = | 6980 | u32 offset = |
@@ -6991,10 +6991,10 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
6991 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | 6991 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); |
6992 | u32 dbgr_status0 = gk20a_readl(g, | 6992 | u32 dbgr_status0 = gk20a_readl(g, |
6993 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); | 6993 | gr_gpc0_tpc0_sm_dbgr_status0_r() + offset); |
6994 | bool locked_down = | 6994 | locked_down = |
6995 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == | 6995 | (gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(dbgr_status0) == |
6996 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); | 6996 | gr_gpc0_tpc0_sm_dbgr_status0_locked_down_true_v()); |
6997 | bool no_error_pending = | 6997 | no_error_pending = |
6998 | check_errors && | 6998 | check_errors && |
6999 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == | 6999 | (gr_gpc0_tpc0_sm_hww_warp_esr_error_v(warp_esr) == |
7000 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && | 7000 | gr_gpc0_tpc0_sm_hww_warp_esr_error_none_v()) && |
@@ -7018,9 +7018,7 @@ static int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
7018 | 7018 | ||
7019 | usleep_range(delay, delay * 2); | 7019 | usleep_range(delay, delay * 2); |
7020 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | 7020 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); |
7021 | 7021 | } while (!locked_down); | |
7022 | } while (time_before(jiffies, end_jiffies) | ||
7023 | || !tegra_platform_is_silicon()); | ||
7024 | 7022 | ||
7025 | gk20a_err(dev_from_gk20a(g), | 7023 | gk20a_err(dev_from_gk20a(g), |
7026 | "GPC%d TPC%d: timed out while trying to lock down SM", | 7024 | "GPC%d TPC%d: timed out while trying to lock down SM", |
@@ -7273,6 +7271,52 @@ static void gr_gk20a_init_cyclestats(struct gk20a *g) | |||
7273 | #endif | 7271 | #endif |
7274 | } | 7272 | } |
7275 | 7273 | ||
7274 | void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | ||
7275 | { | ||
7276 | /* Check if we have at least one valid warp */ | ||
7277 | struct gr_gk20a *gr = &g->gr; | ||
7278 | u32 gpc, tpc, sm_id; | ||
7279 | u32 tpc_offset, gpc_offset, reg_offset; | ||
7280 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
7281 | |||
7282 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
7283 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
7284 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
7285 | |||
7286 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
7287 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
7288 | reg_offset = tpc_offset + gpc_offset; | ||
7289 | |||
7290 | /* 64 bit read */ | ||
7291 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
7292 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
7293 | |||
7294 | |||
7295 | /* 64 bit read */ | ||
7296 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
7297 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
7298 | |||
7299 | /* 64 bit read */ | ||
7300 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
7301 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
7302 | |||
7303 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
7304 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
7305 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
7306 | } | ||
7307 | |||
7308 | /* Only for debug purpose */ | ||
7309 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
7310 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
7311 | sm_id, w_state[sm_id].valid_warps[0]); | ||
7312 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
7313 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
7314 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
7315 | sm_id, w_state[sm_id].paused_warps[0]); | ||
7316 | } | ||
7317 | } | ||
7318 | |||
7319 | |||
7276 | void gk20a_init_gr_ops(struct gpu_ops *gops) | 7320 | void gk20a_init_gr_ops(struct gpu_ops *gops) |
7277 | { | 7321 | { |
7278 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | 7322 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; |
@@ -7324,4 +7368,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
7324 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | 7368 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; |
7325 | gops->gr.wait_empty = gr_gk20a_wait_idle; | 7369 | gops->gr.wait_empty = gr_gk20a_wait_idle; |
7326 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; | 7370 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; |
7371 | gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info; | ||
7327 | } | 7372 | } |