diff options
author | sujeet baranwal <sbaranwal@nvidia.com> | 2015-08-20 20:04:44 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-12-04 16:03:11 -0500 |
commit | 397c6d44ed3ee6cc0c24fce7711bda4f0d6cd9bf (patch) | |
tree | 4cf5477fd29605022291239e4f060a8030f793d0 /drivers/gpu/nvgpu/gm20b | |
parent | 71c8d62657db7ef40a30b7504632d668f4e64bc6 (diff) |
gpu: nvgpu: Wait for pause for SMs
SM locking & register reads Order has been changed.
Also, functions have been implemented based on gk20a
and gm20b.
Change-Id: Iaf720d088130f84c4b2ca318d9860194c07966e1
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Signed-off-by: ashutosh jain <ashutoshj@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/837236
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 78 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 30 |
2 files changed, 105 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index d590f566..512c470d 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1050,6 +1050,83 @@ static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) | |||
1050 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); | 1050 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | ||
1054 | { | ||
1055 | /* Check if we have at least one valid warp */ | ||
1056 | /* get paused state on maxwell */ | ||
1057 | struct gr_gk20a *gr = &g->gr; | ||
1058 | u32 gpc, tpc, sm_id; | ||
1059 | u32 tpc_offset, gpc_offset, reg_offset; | ||
1060 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
1061 | |||
1062 | /* for maxwell & kepler */ | ||
1063 | u32 numSmPerTpc = 1; | ||
1064 | u32 numWarpPerTpc = g->gpu_characteristics.sm_arch_warp_count * numSmPerTpc; | ||
1065 | |||
1066 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
1067 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1068 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
1069 | |||
1070 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
1071 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
1072 | reg_offset = tpc_offset + gpc_offset; | ||
1073 | |||
1074 | /* 64 bit read */ | ||
1075 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
1076 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
1077 | |||
1078 | /* 64 bit read */ | ||
1079 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
1080 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
1081 | |||
1082 | /* 64 bit read */ | ||
1083 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
1084 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
1085 | |||
1086 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
1087 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
1088 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
1089 | |||
1090 | |||
1091 | if (numWarpPerTpc > 64) { | ||
1092 | /* 64 bit read */ | ||
1093 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset + 4) << 32; | ||
1094 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_2_r() + reg_offset); | ||
1095 | |||
1096 | /* 64 bit read */ | ||
1097 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset + 4) << 32; | ||
1098 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r() + reg_offset); | ||
1099 | |||
1100 | /* 64 bit read */ | ||
1101 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset + 4) << 32; | ||
1102 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r() + reg_offset); | ||
1103 | |||
1104 | w_state[sm_id].valid_warps[1] = warps_valid; | ||
1105 | w_state[sm_id].trapped_warps[1] = warps_trapped; | ||
1106 | w_state[sm_id].paused_warps[1] = warps_paused; | ||
1107 | } | ||
1108 | } | ||
1109 | |||
1110 | |||
1111 | /* Only for debug purpose */ | ||
1112 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
1113 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
1114 | sm_id, w_state[sm_id].valid_warps[0]); | ||
1115 | gk20a_dbg_fn("w_state[%d].valid_warps[1]: %llx\n", | ||
1116 | sm_id, w_state[sm_id].valid_warps[1]); | ||
1117 | |||
1118 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
1119 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
1120 | gk20a_dbg_fn("w_state[%d].trapped_warps[1]: %llx\n", | ||
1121 | sm_id, w_state[sm_id].trapped_warps[1]); | ||
1122 | |||
1123 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
1124 | sm_id, w_state[sm_id].paused_warps[0]); | ||
1125 | gk20a_dbg_fn("w_state[%d].paused_warps[1]: %llx\n", | ||
1126 | sm_id, w_state[sm_id].paused_warps[1]); | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1053 | void gm20b_init_gr(struct gpu_ops *gops) | 1130 | void gm20b_init_gr(struct gpu_ops *gops) |
1054 | { | 1131 | { |
1055 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 1132 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -1106,4 +1183,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1106 | gops->gr.wait_empty = gr_gk20a_wait_idle; | 1183 | gops->gr.wait_empty = gr_gk20a_wait_idle; |
1107 | gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; | 1184 | gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; |
1108 | gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; | 1185 | gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; |
1186 | gops->gr.bpt_reg_info = gr_gm20b_bpt_reg_info; | ||
1109 | } | 1187 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index 4a712394..05f6cae5 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | |||
@@ -3130,15 +3130,15 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void) | |||
3130 | { | 3130 | { |
3131 | return 0x0; | 3131 | return 0x0; |
3132 | } | 3132 | } |
3133 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | 3133 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_0_r(void) |
3134 | { | 3134 | { |
3135 | return 0x00504614; | 3135 | return 0x00504614; |
3136 | } | 3136 | } |
3137 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | 3137 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_0_r(void) |
3138 | { | 3138 | { |
3139 | return 0x00504624; | 3139 | return 0x00504624; |
3140 | } | 3140 | } |
3141 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | 3141 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_0_r(void) |
3142 | { | 3142 | { |
3143 | return 0x00504634; | 3143 | return 0x00504634; |
3144 | } | 3144 | } |
@@ -3150,6 +3150,18 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(vo | |||
3150 | { | 3150 | { |
3151 | return 0x00000000; | 3151 | return 0x00000000; |
3152 | } | 3152 | } |
3153 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_2_r(void) | ||
3154 | { | ||
3155 | return 0x0050461c; | ||
3156 | } | ||
3157 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_2_r(void) | ||
3158 | { | ||
3159 | return 0x00504750; | ||
3160 | } | ||
3161 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_2_r(void) | ||
3162 | { | ||
3163 | return 0x00504758; | ||
3164 | } | ||
3153 | static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) | 3165 | static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) |
3154 | { | 3166 | { |
3155 | return 0x0050460c; | 3167 | return 0x0050460c; |
@@ -3626,6 +3638,18 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_disable_f(void) | |||
3626 | { | 3638 | { |
3627 | return 0x0; | 3639 | return 0x0; |
3628 | } | 3640 | } |
3641 | static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void) | ||
3642 | { | ||
3643 | return 0x00504614; | ||
3644 | } | ||
3645 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void) | ||
3646 | { | ||
3647 | return 0x00504624; | ||
3648 | } | ||
3649 | static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void) | ||
3650 | { | ||
3651 | return 0x00504634; | ||
3652 | } | ||
3629 | static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void) | 3653 | static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_m(void) |
3630 | { | 3654 | { |
3631 | return 0x1 << 30; | 3655 | return 0x1 << 30; |