summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c78
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h30
2 files changed, 105 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index d590f566..512c470d 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1050,6 +1050,83 @@ static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr)
1050 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); 1050 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v);
1051} 1051}
1052 1052
1053void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1054{
1055 /* Check if we have at least one valid warp */
1056 /* get paused state on maxwell */
1057 struct gr_gk20a *gr = &g->gr;
1058 u32 gpc, tpc, sm_id;
1059 u32 tpc_offset, gpc_offset, reg_offset;
1060 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
1061
1062 /* for maxwell & kepler */
1063 u32 numSmPerTpc = 1;
1064 u32 numWarpPerTpc = g->gpu_characteristics.sm_arch_warp_count * numSmPerTpc;
1065
1066 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
1067 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1068 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1069
1070 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
1071 gpc_offset = proj_gpc_stride_v() * gpc;
1072 reg_offset = tpc_offset + gpc_offset;
1073
1074 /* 64 bit read */
1075 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32;
1076 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset);
1077
1078 /* 64 bit read */
1079 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32;
1080 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset);
1081
1082 /* 64 bit read */
1083 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32;
1084 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset);
1085
1086 w_state[sm_id].valid_warps[0] = warps_valid;
1087 w_state[sm_id].trapped_warps[0] = warps_trapped;
1088 w_state[sm_id].paused_warps[0] = warps_paused;
1089
1090
1091 if (numWarpPerTpc > 64) {
1092 /* 64 bit read */
1093 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32;
1094 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset);
1095
1096 /* 64 bit read */
1097 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32;
1098 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset);
1099
1100 /* 64 bit read */
1101 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32;
1102 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset);
1103
1104 w_state[sm_id].valid_warps[1] = warps_valid;
1105 w_state[sm_id].trapped_warps[1] = warps_trapped;
1106 w_state[sm_id].paused_warps[1] = warps_paused;
1107 }
1108 }
1109
1110
1111 /* Only for debug purpose */
1112 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
1113 gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n",
1114 sm_id, w_state[sm_id].valid_warps[0]);
1115 gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n",
1116 sm_id, w_state[sm_id].valid_warps[1]);
1117
1118 gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n",
1119 sm_id, w_state[sm_id].trapped_warps[0]);
1120 gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n",
1121 sm_id, w_state[sm_id].trapped_warps[1]);
1122
1123 gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n",
1124 sm_id, w_state[sm_id].paused_warps[0]);
1125 gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n",
1126 sm_id, w_state[sm_id].paused_warps[1]);
1127 }
1128}
1129
1053void gm20b_init_gr(struct gpu_ops *gops) 1130void gm20b_init_gr(struct gpu_ops *gops)
1054{ 1131{
1055 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1132 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1106,4 +1183,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
1106 gops->gr.wait_empty = gr_gk20a_wait_idle; 1183 gops->gr.wait_empty = gr_gk20a_wait_idle;
1107 gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; 1184 gops->gr.init_cyclestats = gr_gm20b_init_cyclestats;
1108 gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; 1185 gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs;
1186 gops->gr.bpt_reg_info = gr_gm20b_bpt_reg_info;
1109} 1187}
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index 4a712394..05f6cae5 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -3130,15 +3130,15 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
3130{ 3130{
3131 return 0x0; 3131 return 0x0;
3132} 3132}
3133static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) 3133static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_0_r(void)
3134{ 3134{
3135 return 0x00504614; 3135 return 0x00504614;
3136} 3136}
3137static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) 3137static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_0_r(void)
3138{ 3138{
3139 return 0x00504624; 3139 return 0x00504624;
3140} 3140}
3141static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) 3141static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_0_r(void)
3142{ 3142{
3143 return 0x00504634; 3143 return 0x00504634;
3144} 3144}
@@ -3150,6 +3150,18 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(vo
3150{ 3150{
3151 return 0x00000000; 3151 return 0x00000000;
3152} 3152}
3153static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_2_r(void)
3154{
3155 return 0x0050461c;
3156}
3157static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r(void)
3158{
3159 return 0x00504750;
3160}
3161static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r(void)
3162{
3163 return 0x00504758;
3164}
3153static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) 3165static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
3154{ 3166{
3155 return 0x0050460c; 3167 return 0x0050460c;
@@ -3626,6 +3638,18 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f(void)
3626{ 3638{
3627 return 0x0; 3639 return 0x0;
3628} 3640}
3641static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
3642{
3643 return 0x00504614;
3644}
3645static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
3646{
3647 return 0x00504624;
3648}
3649static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
3650{
3651 return 0x00504634;
3652}
3629static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void) 3653static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void)
3630{ 3654{
3631 return 0x1 << 30; 3655 return 0x1 << 30;