summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorsujeet baranwal <sbaranwal@nvidia.com>2015-08-20 20:04:44 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-12-04 16:03:11 -0500
commit397c6d44ed3ee6cc0c24fce7711bda4f0d6cd9bf (patch)
tree4cf5477fd29605022291239e4f060a8030f793d0 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent71c8d62657db7ef40a30b7504632d668f4e64bc6 (diff)
gpu: nvgpu: Wait for pause for SMs
SM locking & register reads Order has been changed. Also, functions have been implemented based on gk20a and gm20b. Change-Id: Iaf720d088130f84c4b2ca318d9860194c07966e1 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Signed-off-by: ashutosh jain <ashutoshj@nvidia.com> Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/837236
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c78
1 files changed, 78 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index d590f566..512c470d 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1050,6 +1050,83 @@ static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr)
1050 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); 1050 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v);
1051} 1051}
1052 1052
1053void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1054{
1055 /* Check if we have at least one valid warp */
1056 /* get paused state on maxwell */
1057 struct gr_gk20a *gr = &g->gr;
1058 u32 gpc, tpc, sm_id;
1059 u32 tpc_offset, gpc_offset, reg_offset;
1060 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
1061
1062 /* for maxwell & kepler */
1063 u32 numSmPerTpc = 1;
1064 u32 numWarpPerTpc = g->gpu_characteristics.sm_arch_warp_count * numSmPerTpc;
1065
1066 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
1067 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1068 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1069
1070 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
1071 gpc_offset = proj_gpc_stride_v() * gpc;
1072 reg_offset = tpc_offset + gpc_offset;
1073
1074 /* 64 bit read */
1075 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
1076 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
1077
1078 /* 64 bit read */
1079 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
1080 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
1081
1082 /* 64 bit read */
1083 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
1084 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
1085
1086 w_state[sm_id].valid_warps[0] = warps_valid;
1087 w_state[sm_id].trapped_warps[0] = warps_trapped;
1088 w_state[sm_id].paused_warps[0] = warps_paused;
1089
1090
1091 if (numWarpPerTpc > 64) {
1092 /* 64 bit read */
1093 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32;
1094 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset);
1095
1096 /* 64 bit read */
1097 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32;
1098 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset);
1099
1100 /* 64 bit read */
1101 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32;
1102 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset);
1103
1104 w_state[sm_id].valid_warps[1] = warps_valid;
1105 w_state[sm_id].trapped_warps[1] = warps_trapped;
1106 w_state[sm_id].paused_warps[1] = warps_paused;
1107 }
1108 }
1109
1110
1111 /* Only for debug purpose */
1112 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
1113 gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
1114 sm_id, w_state[sm_id].valid_warps[0]);
1115 gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n",
1116 sm_id, w_state[sm_id].valid_warps[1]);
1117
1118 gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
1119 sm_id, w_state[sm_id].trapped_warps[0]);
1120 gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n",
1121 sm_id, w_state[sm_id].trapped_warps[1]);
1122
1123 gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
1124 sm_id, w_state[sm_id].paused_warps[0]);
1125 gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n",
1126 sm_id, w_state[sm_id].paused_warps[1]);
1127 }
1128}
1129
1053void gm20b_init_gr(struct gpu_ops *gops) 1130void gm20b_init_gr(struct gpu_ops *gops)
1054{ 1131{
1055 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1132 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1106,4 +1183,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1106 gops->gr.wait_empty = gr_gk20a_wait_idle; 1183 gops->gr.wait_empty = gr_gk20a_wait_idle;
1107 gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; 1184 gops->gr.init_cyclestats = gr_gm20b_init_cyclestats;
1108 gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; 1185 gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs;
1186 gops->gr.bpt_reg_info = gr_gm20b_bpt_reg_info;
1109} 1187}