diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 1167 |
1 files changed, 0 insertions, 1167 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index a9632eaa..3ac4e397 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -837,71 +837,6 @@ u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc) | |||
837 | return tpc_offset; | 837 | return tpc_offset; |
838 | } | 838 | } |
839 | 839 | ||
840 | static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | ||
841 | struct channel_gk20a *c, bool patch) | ||
842 | { | ||
843 | struct gr_gk20a *gr = &g->gr; | ||
844 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
845 | u32 attrib_offset_in_chunk = 0; | ||
846 | u32 alpha_offset_in_chunk = 0; | ||
847 | u32 pd_ab_max_output; | ||
848 | u32 gpc_index, ppc_index; | ||
849 | u32 temp; | ||
850 | u32 cbm_cfg_size1, cbm_cfg_size2; | ||
851 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
852 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
853 | |||
854 | gk20a_dbg_fn(""); | ||
855 | |||
856 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | ||
857 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | ||
858 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | ||
859 | patch); | ||
860 | |||
861 | pd_ab_max_output = (gr->alpha_cb_default_size * | ||
862 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) / | ||
863 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
864 | |||
865 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | ||
866 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
867 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | ||
868 | |||
869 | alpha_offset_in_chunk = attrib_offset_in_chunk + | ||
870 | gr->tpc_count * gr->attrib_cb_size; | ||
871 | |||
872 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
873 | temp = gpc_stride * gpc_index; | ||
874 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
875 | ppc_index++) { | ||
876 | cbm_cfg_size1 = gr->attrib_cb_default_size * | ||
877 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
878 | cbm_cfg_size2 = gr->alpha_cb_default_size * | ||
879 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
880 | |||
881 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
882 | gr_gpc0_ppc0_cbm_cfg_r() + temp + | ||
883 | ppc_in_gpc_stride * ppc_index, | ||
884 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | | ||
885 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | | ||
886 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); | ||
887 | |||
888 | attrib_offset_in_chunk += gr->attrib_cb_size * | ||
889 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
890 | |||
891 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
892 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + | ||
893 | ppc_in_gpc_stride * ppc_index, | ||
894 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | | ||
895 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); | ||
896 | |||
897 | alpha_offset_in_chunk += gr->alpha_cb_size * | ||
898 | gr->pes_tpc_count[ppc_index][gpc_index]; | ||
899 | } | ||
900 | } | ||
901 | |||
902 | return 0; | ||
903 | } | ||
904 | |||
905 | static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | 840 | static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, |
906 | struct channel_gk20a *c, bool patch) | 841 | struct channel_gk20a *c, bool patch) |
907 | { | 842 | { |
@@ -964,55 +899,6 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
964 | return 0; | 899 | return 0; |
965 | } | 900 | } |
966 | 901 | ||
967 | static void gr_gk20a_commit_global_attrib_cb(struct gk20a *g, | ||
968 | struct channel_ctx_gk20a *ch_ctx, | ||
969 | u64 addr, bool patch) | ||
970 | { | ||
971 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | ||
972 | gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) | | ||
973 | gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch); | ||
974 | |||
975 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(), | ||
976 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) | | ||
977 | gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch); | ||
978 | } | ||
979 | |||
980 | static void gr_gk20a_commit_global_bundle_cb(struct gk20a *g, | ||
981 | struct channel_ctx_gk20a *ch_ctx, | ||
982 | u64 addr, u64 size, bool patch) | ||
983 | { | ||
984 | u32 data; | ||
985 | |||
986 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | ||
987 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | ||
988 | |||
989 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | ||
990 | gr_scc_bundle_cb_size_div_256b_f(size) | | ||
991 | gr_scc_bundle_cb_size_valid_true_f(), patch); | ||
992 | |||
993 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_base_r(), | ||
994 | gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch); | ||
995 | |||
996 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_bundle_cb_size_r(), | ||
997 | gr_gpcs_setup_bundle_cb_size_div_256b_f(size) | | ||
998 | gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch); | ||
999 | |||
1000 | /* data for state_limit */ | ||
1001 | data = (g->gr.bundle_cb_default_size * | ||
1002 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / | ||
1003 | gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); | ||
1004 | |||
1005 | data = min_t(u32, data, g->gr.min_gpm_fifo_depth); | ||
1006 | |||
1007 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | ||
1008 | g->gr.bundle_cb_token_limit, data); | ||
1009 | |||
1010 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | ||
1011 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | ||
1012 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | ||
1013 | |||
1014 | } | ||
1015 | |||
1016 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, | 902 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, |
1017 | bool patch) | 903 | bool patch) |
1018 | { | 904 | { |
@@ -1235,135 +1121,6 @@ static inline u32 count_bits(u32 mask) | |||
1235 | return count; | 1121 | return count; |
1236 | } | 1122 | } |
1237 | 1123 | ||
1238 | static inline u32 clear_count_bits(u32 num, u32 clear_count) | ||
1239 | { | ||
1240 | u32 count = clear_count; | ||
1241 | for (; (num != 0) && (count != 0); count--) | ||
1242 | num &= num - 1; | ||
1243 | |||
1244 | return num; | ||
1245 | } | ||
1246 | |||
1247 | static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | ||
1248 | struct gr_gk20a *gr) | ||
1249 | { | ||
1250 | u32 table_index_bits = 5; | ||
1251 | u32 rows = (1 << table_index_bits); | ||
1252 | u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows; | ||
1253 | |||
1254 | u32 row; | ||
1255 | u32 index; | ||
1256 | u32 gpc_index; | ||
1257 | u32 gpcs_per_reg = 4; | ||
1258 | u32 pes_index; | ||
1259 | u32 tpc_count_pes; | ||
1260 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); | ||
1261 | |||
1262 | u32 alpha_target, beta_target; | ||
1263 | u32 alpha_bits, beta_bits; | ||
1264 | u32 alpha_mask, beta_mask, partial_mask; | ||
1265 | u32 reg_offset; | ||
1266 | bool assign_alpha; | ||
1267 | |||
1268 | u32 *map_alpha; | ||
1269 | u32 *map_beta; | ||
1270 | u32 *map_reg_used; | ||
1271 | |||
1272 | gk20a_dbg_fn(""); | ||
1273 | |||
1274 | map_alpha = nvgpu_kzalloc(g, 3 * gr_pd_alpha_ratio_table__size_1_v() * | ||
1275 | sizeof(u32)); | ||
1276 | if (!map_alpha) | ||
1277 | return -ENOMEM; | ||
1278 | map_beta = map_alpha + gr_pd_alpha_ratio_table__size_1_v(); | ||
1279 | map_reg_used = map_beta + gr_pd_alpha_ratio_table__size_1_v(); | ||
1280 | |||
1281 | for (row = 0; row < rows; ++row) { | ||
1282 | alpha_target = max_t(u32, gr->tpc_count * row / rows, 1); | ||
1283 | beta_target = gr->tpc_count - alpha_target; | ||
1284 | |||
1285 | assign_alpha = (alpha_target < beta_target); | ||
1286 | |||
1287 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1288 | reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg); | ||
1289 | alpha_mask = beta_mask = 0; | ||
1290 | |||
1291 | for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) { | ||
1292 | tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index]; | ||
1293 | |||
1294 | if (assign_alpha) { | ||
1295 | alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes; | ||
1296 | beta_bits = tpc_count_pes - alpha_bits; | ||
1297 | } else { | ||
1298 | beta_bits = (beta_target == 0) ? 0 : tpc_count_pes; | ||
1299 | alpha_bits = tpc_count_pes - beta_bits; | ||
1300 | } | ||
1301 | |||
1302 | partial_mask = gr->pes_tpc_mask[pes_index][gpc_index]; | ||
1303 | partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits); | ||
1304 | alpha_mask |= partial_mask; | ||
1305 | |||
1306 | partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask; | ||
1307 | beta_mask |= partial_mask; | ||
1308 | |||
1309 | alpha_target -= min(alpha_bits, alpha_target); | ||
1310 | beta_target -= min(beta_bits, beta_target); | ||
1311 | |||
1312 | if ((alpha_bits > 0) || (beta_bits > 0)) | ||
1313 | assign_alpha = !assign_alpha; | ||
1314 | } | ||
1315 | |||
1316 | switch (gpc_index % gpcs_per_reg) { | ||
1317 | case 0: | ||
1318 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask); | ||
1319 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask); | ||
1320 | break; | ||
1321 | case 1: | ||
1322 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask); | ||
1323 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask); | ||
1324 | break; | ||
1325 | case 2: | ||
1326 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask); | ||
1327 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask); | ||
1328 | break; | ||
1329 | case 3: | ||
1330 | map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask); | ||
1331 | map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask); | ||
1332 | break; | ||
1333 | } | ||
1334 | map_reg_used[reg_offset] = true; | ||
1335 | } | ||
1336 | } | ||
1337 | |||
1338 | for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) { | ||
1339 | if (map_reg_used[index]) { | ||
1340 | gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]); | ||
1341 | gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]); | ||
1342 | } | ||
1343 | } | ||
1344 | |||
1345 | nvgpu_kfree(g, map_alpha); | ||
1346 | return 0; | ||
1347 | } | ||
1348 | |||
1349 | static u32 gr_gk20a_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
1350 | { | ||
1351 | /* One TPC for gk20a */ | ||
1352 | return 0x1; | ||
1353 | } | ||
1354 | |||
1355 | static void gr_gk20a_program_active_tpc_counts(struct gk20a *g, u32 gpc_index) | ||
1356 | { | ||
1357 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1358 | u32 gpc_offset = gpc_stride * gpc_index; | ||
1359 | struct gr_gk20a *gr = &g->gr; | ||
1360 | |||
1361 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | ||
1362 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1363 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1364 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1365 | } | ||
1366 | |||
1367 | void gr_gk20a_init_sm_id_table(struct gk20a *g) | 1124 | void gr_gk20a_init_sm_id_table(struct gk20a *g) |
1368 | { | 1125 | { |
1369 | u32 gpc, tpc; | 1126 | u32 gpc, tpc; |
@@ -1385,24 +1142,6 @@ void gr_gk20a_init_sm_id_table(struct gk20a *g) | |||
1385 | g->gr.no_of_sm = sm_id; | 1142 | g->gr.no_of_sm = sm_id; |
1386 | } | 1143 | } |
1387 | 1144 | ||
1388 | static void gr_gk20a_program_sm_id_numbering(struct gk20a *g, | ||
1389 | u32 gpc, u32 tpc, u32 sm_id) | ||
1390 | { | ||
1391 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1392 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1393 | u32 gpc_offset = gpc_stride * gpc; | ||
1394 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
1395 | |||
1396 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1397 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1398 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1399 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1400 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
1401 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1402 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1403 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1404 | } | ||
1405 | |||
1406 | /* | 1145 | /* |
1407 | * Return number of TPCs in a GPC | 1146 | * Return number of TPCs in a GPC |
1408 | * Return 0 if GPC index is invalid i.e. GPC is disabled | 1147 | * Return 0 if GPC index is invalid i.e. GPC is disabled |
@@ -2564,23 +2303,6 @@ void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, | |||
2564 | gr_fecs_bootvec_vec_f(segments->boot_entry)); | 2303 | gr_fecs_bootvec_vec_f(segments->boot_entry)); |
2565 | } | 2304 | } |
2566 | 2305 | ||
2567 | static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
2568 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
2569 | { | ||
2570 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
2571 | gr_fecs_dmactl_require_ctx_f(0)); | ||
2572 | |||
2573 | /* Copy falcon bootloader into dmem */ | ||
2574 | gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); | ||
2575 | gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); | ||
2576 | |||
2577 | /* Write to CPUCTL to start the falcon */ | ||
2578 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | ||
2579 | gr_fecs_cpuctl_startcpu_f(0x01)); | ||
2580 | |||
2581 | return 0; | ||
2582 | } | ||
2583 | |||
2584 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) | 2306 | static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) |
2585 | { | 2307 | { |
2586 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 2308 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
@@ -3116,41 +2838,6 @@ void gk20a_free_channel_ctx(struct channel_gk20a *c) | |||
3116 | c->first_init = false; | 2838 | c->first_init = false; |
3117 | } | 2839 | } |
3118 | 2840 | ||
3119 | static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num) | ||
3120 | { | ||
3121 | bool valid = false; | ||
3122 | |||
3123 | switch (class_num) { | ||
3124 | case KEPLER_COMPUTE_A: | ||
3125 | case KEPLER_C: | ||
3126 | case FERMI_TWOD_A: | ||
3127 | case KEPLER_DMA_COPY_A: | ||
3128 | valid = true; | ||
3129 | break; | ||
3130 | |||
3131 | default: | ||
3132 | break; | ||
3133 | } | ||
3134 | |||
3135 | return valid; | ||
3136 | } | ||
3137 | |||
3138 | static bool gr_gk20a_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
3139 | { | ||
3140 | if (class_num == KEPLER_C) | ||
3141 | return true; | ||
3142 | else | ||
3143 | return false; | ||
3144 | } | ||
3145 | |||
3146 | static bool gr_gk20a_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
3147 | { | ||
3148 | if (class_num == KEPLER_COMPUTE_A) | ||
3149 | return true; | ||
3150 | else | ||
3151 | return false; | ||
3152 | } | ||
3153 | |||
3154 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | 2841 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, |
3155 | struct nvgpu_alloc_obj_ctx_args *args) | 2842 | struct nvgpu_alloc_obj_ctx_args *args) |
3156 | { | 2843 | { |
@@ -3461,18 +3148,6 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) | |||
3461 | gk20a_comptag_allocator_destroy(&gr->comp_tags); | 3148 | gk20a_comptag_allocator_destroy(&gr->comp_tags); |
3462 | } | 3149 | } |
3463 | 3150 | ||
3464 | static void gr_gk20a_bundle_cb_defaults(struct gk20a *g) | ||
3465 | { | ||
3466 | struct gr_gk20a *gr = &g->gr; | ||
3467 | |||
3468 | gr->bundle_cb_default_size = | ||
3469 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
3470 | gr->min_gpm_fifo_depth = | ||
3471 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
3472 | gr->bundle_cb_token_limit = | ||
3473 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
3474 | } | ||
3475 | |||
3476 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | 3151 | static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) |
3477 | { | 3152 | { |
3478 | u32 gpc_index, pes_index; | 3153 | u32 gpc_index, pes_index; |
@@ -3954,27 +3629,6 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | |||
3954 | return 0; | 3629 | return 0; |
3955 | } | 3630 | } |
3956 | 3631 | ||
3957 | static void gr_gk20a_detect_sm_arch(struct gk20a *g) | ||
3958 | { | ||
3959 | u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); | ||
3960 | |||
3961 | u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); | ||
3962 | u32 version = 0; | ||
3963 | |||
3964 | if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v()) | ||
3965 | version = 0x320; /* SM 3.2 */ | ||
3966 | else | ||
3967 | nvgpu_err(g, "Unknown SM version 0x%x", | ||
3968 | raw_version); | ||
3969 | |||
3970 | /* on Kepler, SM version == SPA version */ | ||
3971 | g->gpu_characteristics.sm_arch_spa_version = version; | ||
3972 | g->gpu_characteristics.sm_arch_sm_version = version; | ||
3973 | |||
3974 | g->gpu_characteristics.sm_arch_warp_count = | ||
3975 | gr_gpc0_tpc0_sm_arch_warp_count_v(v); | ||
3976 | } | ||
3977 | |||
3978 | int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, | 3632 | int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, |
3979 | struct zbc_entry *color_val, u32 index) | 3633 | struct zbc_entry *color_val, u32 index) |
3980 | { | 3634 | { |
@@ -4729,42 +4383,6 @@ void gr_gk20a_enable_hww_exceptions(struct gk20a *g) | |||
4729 | gr_memfmt_hww_esr_reset_active_f()); | 4383 | gr_memfmt_hww_esr_reset_active_f()); |
4730 | } | 4384 | } |
4731 | 4385 | ||
4732 | static void gr_gk20a_set_hww_esr_report_mask(struct gk20a *g) | ||
4733 | { | ||
4734 | /* setup sm warp esr report masks */ | ||
4735 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(), | ||
4736 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_stack_error_report_f() | | ||
4737 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_api_stack_error_report_f() | | ||
4738 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_ret_empty_stack_error_report_f() | | ||
4739 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_wrap_report_f() | | ||
4740 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_pc_report_f() | | ||
4741 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_pc_overflow_report_f() | | ||
4742 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_immc_addr_report_f() | | ||
4743 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_reg_report_f() | | ||
4744 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_encoding_report_f() | | ||
4745 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_sph_instr_combo_report_f() | | ||
4746 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param_report_f() | | ||
4747 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_report_f() | | ||
4748 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_reg_report_f() | | ||
4749 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_oor_addr_report_f() | | ||
4750 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_misaligned_addr_report_f() | | ||
4751 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_addr_space_report_f() | | ||
4752 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_illegal_instr_param2_report_f() | | ||
4753 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f() | | ||
4754 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_geometry_sm_error_report_f() | | ||
4755 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_divergent_report_f()); | ||
4756 | |||
4757 | /* setup sm global esr report mask */ | ||
4758 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(), | ||
4759 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_sm_to_sm_fault_report_f() | | ||
4760 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_l1_error_report_f() | | ||
4761 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_multiple_warp_errors_report_f() | | ||
4762 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_physical_stack_overflow_error_report_f() | | ||
4763 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_int_report_f() | | ||
4764 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_bpt_pause_report_f() | | ||
4765 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_single_step_complete_report_f()); | ||
4766 | } | ||
4767 | |||
4768 | static int gk20a_init_gr_setup_hw(struct gk20a *g) | 4386 | static int gk20a_init_gr_setup_hw(struct gk20a *g) |
4769 | { | 4387 | { |
4770 | struct gr_gk20a *gr = &g->gr; | 4388 | struct gr_gk20a *gr = &g->gr; |
@@ -5364,107 +4982,6 @@ void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data) | |||
5364 | } | 4982 | } |
5365 | } | 4983 | } |
5366 | 4984 | ||
5367 | static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | ||
5368 | { | ||
5369 | struct gr_gk20a *gr = &g->gr; | ||
5370 | u32 gpc_index, ppc_index, stride, val, offset; | ||
5371 | u32 cb_size = data * 4; | ||
5372 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
5373 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
5374 | |||
5375 | gk20a_dbg_fn(""); | ||
5376 | |||
5377 | if (cb_size > gr->attrib_cb_size) | ||
5378 | cb_size = gr->attrib_cb_size; | ||
5379 | |||
5380 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
5381 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
5382 | ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | | ||
5383 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | ||
5384 | |||
5385 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
5386 | stride = gpc_stride * gpc_index; | ||
5387 | |||
5388 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
5389 | ppc_index++) { | ||
5390 | |||
5391 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
5392 | stride + | ||
5393 | ppc_in_gpc_stride * ppc_index); | ||
5394 | |||
5395 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); | ||
5396 | |||
5397 | val = set_field(val, | ||
5398 | gr_gpc0_ppc0_cbm_cfg_size_m(), | ||
5399 | gr_gpc0_ppc0_cbm_cfg_size_f(cb_size * | ||
5400 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
5401 | val = set_field(val, | ||
5402 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | ||
5403 | (offset + 1)); | ||
5404 | |||
5405 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
5406 | stride + | ||
5407 | ppc_in_gpc_stride * ppc_index, val); | ||
5408 | |||
5409 | val = set_field(val, | ||
5410 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | ||
5411 | offset); | ||
5412 | |||
5413 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | ||
5414 | stride + | ||
5415 | ppc_in_gpc_stride * ppc_index, val); | ||
5416 | } | ||
5417 | } | ||
5418 | } | ||
5419 | |||
5420 | static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | ||
5421 | { | ||
5422 | struct gr_gk20a *gr = &g->gr; | ||
5423 | u32 gpc_index, ppc_index, stride, val; | ||
5424 | u32 pd_ab_max_output; | ||
5425 | u32 alpha_cb_size = data * 4; | ||
5426 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
5427 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
5428 | |||
5429 | gk20a_dbg_fn(""); | ||
5430 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | ||
5431 | return; */ | ||
5432 | |||
5433 | if (alpha_cb_size > gr->alpha_cb_size) | ||
5434 | alpha_cb_size = gr->alpha_cb_size; | ||
5435 | |||
5436 | gk20a_writel(g, gr_ds_tga_constraintlogic_r(), | ||
5437 | (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & | ||
5438 | ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | | ||
5439 | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); | ||
5440 | |||
5441 | pd_ab_max_output = alpha_cb_size * | ||
5442 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v() / | ||
5443 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | ||
5444 | |||
5445 | gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), | ||
5446 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | ||
5447 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | ||
5448 | |||
5449 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
5450 | stride = gpc_stride * gpc_index; | ||
5451 | |||
5452 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | ||
5453 | ppc_index++) { | ||
5454 | |||
5455 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + | ||
5456 | stride + ppc_in_gpc_stride * ppc_index); | ||
5457 | |||
5458 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), | ||
5459 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * | ||
5460 | gr->pes_tpc_count[ppc_index][gpc_index])); | ||
5461 | |||
5462 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + | ||
5463 | stride + ppc_in_gpc_stride * ppc_index, val); | ||
5464 | } | ||
5465 | } | ||
5466 | } | ||
5467 | |||
5468 | int gk20a_enable_gr_hw(struct gk20a *g) | 4985 | int gk20a_enable_gr_hw(struct gk20a *g) |
5469 | { | 4986 | { |
5470 | int err; | 4987 | int err; |
@@ -5548,44 +5065,6 @@ int gk20a_gr_reset(struct gk20a *g) | |||
5548 | return err; | 5065 | return err; |
5549 | } | 5066 | } |
5550 | 5067 | ||
5551 | static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr, | ||
5552 | u32 class_num, u32 offset, u32 data) | ||
5553 | { | ||
5554 | gk20a_dbg_fn(""); | ||
5555 | |||
5556 | trace_gr_gk20a_handle_sw_method(g->name); | ||
5557 | |||
5558 | if (class_num == KEPLER_COMPUTE_A) { | ||
5559 | switch (offset << 2) { | ||
5560 | case NVA0C0_SET_SHADER_EXCEPTIONS: | ||
5561 | gk20a_gr_set_shader_exceptions(g, data); | ||
5562 | break; | ||
5563 | default: | ||
5564 | goto fail; | ||
5565 | } | ||
5566 | } | ||
5567 | |||
5568 | if (class_num == KEPLER_C) { | ||
5569 | switch (offset << 2) { | ||
5570 | case NVA297_SET_SHADER_EXCEPTIONS: | ||
5571 | gk20a_gr_set_shader_exceptions(g, data); | ||
5572 | break; | ||
5573 | case NVA297_SET_CIRCULAR_BUFFER_SIZE: | ||
5574 | g->ops.gr.set_circular_buffer_size(g, data); | ||
5575 | break; | ||
5576 | case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
5577 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
5578 | break; | ||
5579 | default: | ||
5580 | goto fail; | ||
5581 | } | ||
5582 | } | ||
5583 | return 0; | ||
5584 | |||
5585 | fail: | ||
5586 | return -EINVAL; | ||
5587 | } | ||
5588 | |||
5589 | static void gk20a_gr_set_error_notifier(struct gk20a *g, | 5068 | static void gk20a_gr_set_error_notifier(struct gk20a *g, |
5590 | struct gr_gk20a_isr_data *isr_data, u32 error_notifier) | 5069 | struct gr_gk20a_isr_data *isr_data, u32 error_notifier) |
5591 | { | 5070 | { |
@@ -6043,143 +5522,6 @@ u32 gk20a_mask_hww_warp_esr(u32 hww_warp_esr) | |||
6043 | return hww_warp_esr; | 5522 | return hww_warp_esr; |
6044 | } | 5523 | } |
6045 | 5524 | ||
6046 | static int gk20a_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc) | ||
6047 | { | ||
6048 | int sm_id; | ||
6049 | struct gr_gk20a *gr = &g->gr; | ||
6050 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6051 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
6052 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6053 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
6054 | |||
6055 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
6056 | |||
6057 | sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g, | ||
6058 | gr_gpc0_tpc0_sm_cfg_r() + offset)); | ||
6059 | |||
6060 | gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g, | ||
6061 | gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
6062 | gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g, | ||
6063 | gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset); | ||
6064 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g, | ||
6065 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset); | ||
6066 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g, | ||
6067 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset); | ||
6068 | |||
6069 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
6070 | |||
6071 | return 0; | ||
6072 | } | ||
6073 | |||
6074 | static int gk20a_gr_update_sm_error_state(struct gk20a *g, | ||
6075 | struct channel_gk20a *ch, u32 sm_id, | ||
6076 | struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state) | ||
6077 | { | ||
6078 | u32 gpc, tpc, offset; | ||
6079 | struct gr_gk20a *gr = &g->gr; | ||
6080 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
6081 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6082 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
6083 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6084 | int err = 0; | ||
6085 | |||
6086 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
6087 | |||
6088 | gr->sm_error_states[sm_id].hww_global_esr = | ||
6089 | sm_error_state->hww_global_esr; | ||
6090 | gr->sm_error_states[sm_id].hww_warp_esr = | ||
6091 | sm_error_state->hww_warp_esr; | ||
6092 | gr->sm_error_states[sm_id].hww_global_esr_report_mask = | ||
6093 | sm_error_state->hww_global_esr_report_mask; | ||
6094 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask = | ||
6095 | sm_error_state->hww_warp_esr_report_mask; | ||
6096 | |||
6097 | err = gr_gk20a_disable_ctxsw(g); | ||
6098 | if (err) { | ||
6099 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
6100 | goto fail; | ||
6101 | } | ||
6102 | |||
6103 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
6104 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
6105 | |||
6106 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
6107 | |||
6108 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
6109 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
6110 | gr->sm_error_states[sm_id].hww_global_esr); | ||
6111 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
6112 | gr->sm_error_states[sm_id].hww_warp_esr); | ||
6113 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
6114 | gr->sm_error_states[sm_id].hww_global_esr_report_mask); | ||
6115 | gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
6116 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask); | ||
6117 | } else { | ||
6118 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | ||
6119 | if (err) | ||
6120 | goto enable_ctxsw; | ||
6121 | |||
6122 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
6123 | gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset, | ||
6124 | gr->sm_error_states[sm_id].hww_global_esr_report_mask, | ||
6125 | true); | ||
6126 | gr_gk20a_ctx_patch_write(g, ch_ctx, | ||
6127 | gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset, | ||
6128 | gr->sm_error_states[sm_id].hww_warp_esr_report_mask, | ||
6129 | true); | ||
6130 | |||
6131 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | ||
6132 | } | ||
6133 | |||
6134 | enable_ctxsw: | ||
6135 | err = gr_gk20a_enable_ctxsw(g); | ||
6136 | |||
6137 | fail: | ||
6138 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
6139 | return err; | ||
6140 | } | ||
6141 | |||
6142 | static int gk20a_gr_clear_sm_error_state(struct gk20a *g, | ||
6143 | struct channel_gk20a *ch, u32 sm_id) | ||
6144 | { | ||
6145 | u32 gpc, tpc, offset; | ||
6146 | u32 val; | ||
6147 | struct gr_gk20a *gr = &g->gr; | ||
6148 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6149 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | ||
6150 | GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6151 | int err = 0; | ||
6152 | |||
6153 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | ||
6154 | |||
6155 | memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states)); | ||
6156 | |||
6157 | err = gr_gk20a_disable_ctxsw(g); | ||
6158 | if (err) { | ||
6159 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
6160 | goto fail; | ||
6161 | } | ||
6162 | |||
6163 | if (gk20a_is_channel_ctx_resident(ch)) { | ||
6164 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
6165 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
6166 | |||
6167 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
6168 | |||
6169 | val = gk20a_readl(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset); | ||
6170 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | ||
6171 | val); | ||
6172 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset, | ||
6173 | 0); | ||
6174 | } | ||
6175 | |||
6176 | err = gr_gk20a_enable_ctxsw(g); | ||
6177 | |||
6178 | fail: | ||
6179 | nvgpu_mutex_release(&g->dbg_sessions_lock); | ||
6180 | return err; | ||
6181 | } | ||
6182 | |||
6183 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, | 5525 | int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, |
6184 | bool *post_event, struct channel_gk20a *fault_ch, | 5526 | bool *post_event, struct channel_gk20a *fault_ch, |
6185 | u32 *hww_global_esr) | 5527 | u32 *hww_global_esr) |
@@ -7171,12 +6513,6 @@ static const u32 _num_ovr_perf_regs = 17; | |||
7171 | static u32 _ovr_perf_regs[17] = { 0, }; | 6513 | static u32 _ovr_perf_regs[17] = { 0, }; |
7172 | /* Following are the blocks of registers that the ucode | 6514 | /* Following are the blocks of registers that the ucode |
7173 | stores in the extended region.*/ | 6515 | stores in the extended region.*/ |
7174 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | ||
7175 | static const u32 _num_sm_dsm_perf_regs = 5; | ||
7176 | /* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ | ||
7177 | static const u32 _num_sm_dsm_perf_ctrl_regs = 4; | ||
7178 | static u32 _sm_dsm_perf_regs[5]; | ||
7179 | static u32 _sm_dsm_perf_ctrl_regs[4]; | ||
7180 | 6516 | ||
7181 | static void init_ovr_perf_reg_info(void) | 6517 | static void init_ovr_perf_reg_info(void) |
7182 | { | 6518 | { |
@@ -7202,24 +6538,6 @@ static void init_ovr_perf_reg_info(void) | |||
7202 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); | 6538 | _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); |
7203 | } | 6539 | } |
7204 | 6540 | ||
7205 | static void gr_gk20a_init_sm_dsm_reg_info(void) | ||
7206 | { | ||
7207 | if (_sm_dsm_perf_regs[0] != 0) | ||
7208 | return; | ||
7209 | |||
7210 | _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); | ||
7211 | _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); | ||
7212 | _sm_dsm_perf_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter1_r(); | ||
7213 | _sm_dsm_perf_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter2_r(); | ||
7214 | _sm_dsm_perf_regs[4] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter3_r(); | ||
7215 | |||
7216 | _sm_dsm_perf_ctrl_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control1_r(); | ||
7217 | _sm_dsm_perf_ctrl_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control2_r(); | ||
7218 | _sm_dsm_perf_ctrl_regs[2] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control3_r(); | ||
7219 | _sm_dsm_perf_ctrl_regs[3] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control4_r(); | ||
7220 | |||
7221 | } | ||
7222 | |||
7223 | /* TBD: would like to handle this elsewhere, at a higher level. | 6541 | /* TBD: would like to handle this elsewhere, at a higher level. |
7224 | * these are currently constructed in a "test-then-write" style | 6542 | * these are currently constructed in a "test-then-write" style |
7225 | * which makes it impossible to know externally whether a ctx | 6543 | * which makes it impossible to know externally whether a ctx |
@@ -7289,44 +6607,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
7289 | return 0; | 6607 | return 0; |
7290 | } | 6608 | } |
7291 | 6609 | ||
7292 | static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | ||
7293 | { | ||
7294 | u32 reg; | ||
7295 | u32 quad_ctrl; | ||
7296 | u32 half_ctrl; | ||
7297 | u32 tpc, gpc; | ||
7298 | u32 gpc_tpc_addr; | ||
7299 | u32 gpc_tpc_stride; | ||
7300 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7301 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7302 | |||
7303 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); | ||
7304 | |||
7305 | gpc = pri_get_gpc_num(g, offset); | ||
7306 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); | ||
7307 | tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr); | ||
7308 | |||
7309 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ | ||
7310 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ | ||
7311 | |||
7312 | gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride; | ||
7313 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; | ||
7314 | |||
7315 | reg = gk20a_readl(g, gpc_tpc_addr); | ||
7316 | reg = set_field(reg, | ||
7317 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_m(), | ||
7318 | gr_gpcs_tpcs_sm_halfctl_ctrl_sctl_read_quad_ctl_f(quad_ctrl)); | ||
7319 | |||
7320 | gk20a_writel(g, gpc_tpc_addr, reg); | ||
7321 | |||
7322 | gpc_tpc_addr = gr_gpc0_tpc0_sm_debug_sfe_control_r() + gpc_tpc_stride; | ||
7323 | reg = gk20a_readl(g, gpc_tpc_addr); | ||
7324 | reg = set_field(reg, | ||
7325 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_m(), | ||
7326 | gr_gpcs_tpcs_sm_debug_sfe_control_read_half_ctl_f(half_ctrl)); | ||
7327 | gk20a_writel(g, gpc_tpc_addr, reg); | ||
7328 | } | ||
7329 | |||
7330 | #define ILLEGAL_ID ((u32)~0) | 6610 | #define ILLEGAL_ID ((u32)~0) |
7331 | 6611 | ||
7332 | static inline bool check_main_image_header_magic(u8 *context) | 6612 | static inline bool check_main_image_header_magic(u8 *context) |
@@ -7349,26 +6629,6 @@ static inline int ctxsw_prog_ucode_header_size_in_bytes(void) | |||
7349 | return 256; | 6629 | return 256; |
7350 | } | 6630 | } |
7351 | 6631 | ||
7352 | static void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, | ||
7353 | u32 *num_sm_dsm_perf_regs, | ||
7354 | u32 **sm_dsm_perf_regs, | ||
7355 | u32 *perf_register_stride) | ||
7356 | { | ||
7357 | *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; | ||
7358 | *sm_dsm_perf_regs = _sm_dsm_perf_regs; | ||
7359 | *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); | ||
7360 | } | ||
7361 | |||
7362 | static void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
7363 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
7364 | u32 **sm_dsm_perf_ctrl_regs, | ||
7365 | u32 *ctrl_register_stride) | ||
7366 | { | ||
7367 | *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; | ||
7368 | *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; | ||
7369 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | ||
7370 | } | ||
7371 | |||
7372 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | 6632 | static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, |
7373 | u32 addr, | 6633 | u32 addr, |
7374 | bool is_quad, u32 quad, | 6634 | bool is_quad, u32 quad, |
@@ -8639,37 +7899,6 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8639 | return err; | 7899 | return err; |
8640 | } | 7900 | } |
8641 | 7901 | ||
8642 | static void gr_gk20a_cb_size_default(struct gk20a *g) | ||
8643 | { | ||
8644 | struct gr_gk20a *gr = &g->gr; | ||
8645 | |||
8646 | if (!gr->attrib_cb_default_size) | ||
8647 | gr->attrib_cb_default_size = | ||
8648 | gr_gpc0_ppc0_cbm_cfg_size_default_v(); | ||
8649 | gr->alpha_cb_default_size = | ||
8650 | gr_gpc0_ppc0_cbm_cfg2_size_default_v(); | ||
8651 | } | ||
8652 | |||
8653 | static int gr_gk20a_calc_global_ctx_buffer_size(struct gk20a *g) | ||
8654 | { | ||
8655 | struct gr_gk20a *gr = &g->gr; | ||
8656 | int size; | ||
8657 | |||
8658 | gr->attrib_cb_size = gr->attrib_cb_default_size; | ||
8659 | gr->alpha_cb_size = gr->alpha_cb_default_size | ||
8660 | + (gr->alpha_cb_default_size >> 1); | ||
8661 | |||
8662 | size = gr->attrib_cb_size * | ||
8663 | gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * | ||
8664 | gr->max_tpc_count; | ||
8665 | |||
8666 | size += gr->alpha_cb_size * | ||
8667 | gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * | ||
8668 | gr->max_tpc_count; | ||
8669 | |||
8670 | return size; | ||
8671 | } | ||
8672 | |||
8673 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 7902 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
8674 | struct channel_ctx_gk20a *ch_ctx, | 7903 | struct channel_ctx_gk20a *ch_ctx, |
8675 | u64 addr, u32 size, bool patch) | 7904 | u64 addr, u32 size, bool patch) |
@@ -8697,33 +7926,6 @@ void gk20a_init_gr(struct gk20a *g) | |||
8697 | nvgpu_cond_init(&g->gr.init_wq); | 7926 | nvgpu_cond_init(&g->gr.init_wq); |
8698 | } | 7927 | } |
8699 | 7928 | ||
8700 | static bool gr_gk20a_is_tpc_addr(struct gk20a *g, u32 addr) | ||
8701 | { | ||
8702 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
8703 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8704 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
8705 | return ((addr >= tpc_in_gpc_base) && | ||
8706 | (addr < tpc_in_gpc_base + | ||
8707 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
8708 | || pri_is_tpc_addr_shared(g, addr); | ||
8709 | } | ||
8710 | |||
8711 | static u32 gr_gk20a_get_tpc_num(struct gk20a *g, u32 addr) | ||
8712 | { | ||
8713 | u32 i, start; | ||
8714 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
8715 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
8716 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8717 | |||
8718 | for (i = 0; i < num_tpcs; i++) { | ||
8719 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); | ||
8720 | if ((addr >= start) && | ||
8721 | (addr < (start + tpc_in_gpc_stride))) | ||
8722 | return i; | ||
8723 | } | ||
8724 | return 0; | ||
8725 | } | ||
8726 | |||
8727 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | 7929 | int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, |
8728 | u32 global_esr_mask, bool check_errors) | 7930 | u32 global_esr_mask, bool check_errors) |
8729 | { | 7931 | { |
@@ -8949,176 +8151,6 @@ void gk20a_resume_all_sms(struct gk20a *g) | |||
8949 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | 8151 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); |
8950 | } | 8152 | } |
8951 | 8153 | ||
8952 | static u32 gr_gk20a_pagepool_default_size(struct gk20a *g) | ||
8953 | { | ||
8954 | return gr_scc_pagepool_total_pages_hwmax_value_v(); | ||
8955 | } | ||
8956 | |||
8957 | static u32 gr_gk20a_get_max_fbps_count(struct gk20a *g) | ||
8958 | { | ||
8959 | u32 max_fbps_count, tmp; | ||
8960 | tmp = gk20a_readl(g, top_num_fbps_r()); | ||
8961 | max_fbps_count = top_num_fbps_value_v(tmp); | ||
8962 | return max_fbps_count; | ||
8963 | } | ||
8964 | |||
8965 | |||
8966 | static u32 gr_gk20a_get_fbp_en_mask(struct gk20a *g) | ||
8967 | { | ||
8968 | u32 fbp_en_mask, opt_fbio; | ||
8969 | opt_fbio = gk20a_readl(g, top_fs_status_fbp_r()); | ||
8970 | fbp_en_mask = top_fs_status_fbp_cluster_v(opt_fbio); | ||
8971 | return fbp_en_mask; | ||
8972 | } | ||
8973 | |||
8974 | static u32 gr_gk20a_get_max_ltc_per_fbp(struct gk20a *g) | ||
8975 | { | ||
8976 | return 1; | ||
8977 | } | ||
8978 | |||
8979 | static u32 gr_gk20a_get_max_lts_per_ltc(struct gk20a *g) | ||
8980 | { | ||
8981 | return 1; | ||
8982 | } | ||
8983 | |||
8984 | static u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g) | ||
8985 | { | ||
8986 | /* gk20a doesnt have rop_l2_en_mask */ | ||
8987 | return NULL; | ||
8988 | } | ||
8989 | |||
8990 | |||
8991 | |||
8992 | static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, | ||
8993 | struct gk20a_debug_output *o) | ||
8994 | { | ||
8995 | u32 gr_engine_id; | ||
8996 | |||
8997 | gr_engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
8998 | |||
8999 | gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n", | ||
9000 | gk20a_readl(g, gr_status_r())); | ||
9001 | gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n", | ||
9002 | gk20a_readl(g, gr_status_1_r())); | ||
9003 | gk20a_debug_output(o, "NV_PGRAPH_STATUS2: 0x%x\n", | ||
9004 | gk20a_readl(g, gr_status_2_r())); | ||
9005 | gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n", | ||
9006 | gk20a_readl(g, gr_engine_status_r())); | ||
9007 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n", | ||
9008 | gk20a_readl(g, gr_gpfifo_status_r())); | ||
9009 | gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n", | ||
9010 | gk20a_readl(g, gr_gpfifo_ctl_r())); | ||
9011 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n", | ||
9012 | gk20a_readl(g, gr_fecs_host_int_status_r())); | ||
9013 | gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n", | ||
9014 | gk20a_readl(g, gr_exception_r())); | ||
9015 | gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n", | ||
9016 | gk20a_readl(g, gr_fecs_intr_r())); | ||
9017 | gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n", | ||
9018 | gk20a_readl(g, fifo_engine_status_r(gr_engine_id))); | ||
9019 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n", | ||
9020 | gk20a_readl(g, gr_activity_0_r())); | ||
9021 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n", | ||
9022 | gk20a_readl(g, gr_activity_1_r())); | ||
9023 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY2: 0x%x\n", | ||
9024 | gk20a_readl(g, gr_activity_2_r())); | ||
9025 | gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n", | ||
9026 | gk20a_readl(g, gr_activity_4_r())); | ||
9027 | gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n", | ||
9028 | gk20a_readl(g, gr_pri_sked_activity_r())); | ||
9029 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
9030 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r())); | ||
9031 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
9032 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r())); | ||
9033 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
9034 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r())); | ||
9035 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
9036 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r())); | ||
9037 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9038 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r())); | ||
9039 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9040 | gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r())); | ||
9041 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n", | ||
9042 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r())); | ||
9043 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n", | ||
9044 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r())); | ||
9045 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n", | ||
9046 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r())); | ||
9047 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n", | ||
9048 | gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r())); | ||
9049 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9050 | gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r())); | ||
9051 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n", | ||
9052 | gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r())); | ||
9053 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n", | ||
9054 | gk20a_readl(g, gr_pri_be0_becs_be_activity0_r())); | ||
9055 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n", | ||
9056 | gk20a_readl(g, gr_pri_bes_becs_be_activity0_r())); | ||
9057 | gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n", | ||
9058 | gk20a_readl(g, gr_pri_ds_mpipe_status_r())); | ||
9059 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_ON_STATUS: 0x%x\n", | ||
9060 | gk20a_readl(g, gr_pri_fe_go_idle_on_status_r())); | ||
9061 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n", | ||
9062 | gk20a_readl(g, gr_fe_go_idle_timeout_r())); | ||
9063 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_CHECK : 0x%x\n", | ||
9064 | gk20a_readl(g, gr_pri_fe_go_idle_check_r())); | ||
9065 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n", | ||
9066 | gk20a_readl(g, gr_pri_fe_go_idle_info_r())); | ||
9067 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n", | ||
9068 | gk20a_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r())); | ||
9069 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n", | ||
9070 | gk20a_readl(g, gr_fecs_ctxsw_status_fe_0_r())); | ||
9071 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n", | ||
9072 | gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); | ||
9073 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n", | ||
9074 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r())); | ||
9075 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n", | ||
9076 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r())); | ||
9077 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n", | ||
9078 | gk20a_readl(g, gr_fecs_ctxsw_idlestate_r())); | ||
9079 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n", | ||
9080 | gk20a_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r())); | ||
9081 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n", | ||
9082 | gk20a_readl(g, gr_fecs_current_ctx_r())); | ||
9083 | gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n", | ||
9084 | gk20a_readl(g, gr_fecs_new_ctx_r())); | ||
9085 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_CROP_STATUS1 : 0x%x\n", | ||
9086 | gk20a_readl(g, gr_pri_be0_crop_status1_r())); | ||
9087 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_CROP_STATUS1 : 0x%x\n", | ||
9088 | gk20a_readl(g, gr_pri_bes_crop_status1_r())); | ||
9089 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS : 0x%x\n", | ||
9090 | gk20a_readl(g, gr_pri_be0_zrop_status_r())); | ||
9091 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_ZROP_STATUS2 : 0x%x\n", | ||
9092 | gk20a_readl(g, gr_pri_be0_zrop_status2_r())); | ||
9093 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS : 0x%x\n", | ||
9094 | gk20a_readl(g, gr_pri_bes_zrop_status_r())); | ||
9095 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_ZROP_STATUS2 : 0x%x\n", | ||
9096 | gk20a_readl(g, gr_pri_bes_zrop_status2_r())); | ||
9097 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION: 0x%x\n", | ||
9098 | gk20a_readl(g, gr_pri_be0_becs_be_exception_r())); | ||
9099 | gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_EN: 0x%x\n", | ||
9100 | gk20a_readl(g, gr_pri_be0_becs_be_exception_en_r())); | ||
9101 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n", | ||
9102 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r())); | ||
9103 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n", | ||
9104 | gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r())); | ||
9105 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n", | ||
9106 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r())); | ||
9107 | gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n", | ||
9108 | gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r())); | ||
9109 | return 0; | ||
9110 | } | ||
9111 | |||
9112 | static void gr_gk20a_init_cyclestats(struct gk20a *g) | ||
9113 | { | ||
9114 | #if defined(CONFIG_GK20A_CYCLE_STATS) | ||
9115 | g->gpu_characteristics.flags |= | ||
9116 | NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; | ||
9117 | #else | ||
9118 | (void)g; | ||
9119 | #endif | ||
9120 | } | ||
9121 | |||
9122 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | 8154 | int gr_gk20a_set_sm_debug_mode(struct gk20a *g, |
9123 | struct channel_gk20a *ch, u64 sms, bool enable) | 8155 | struct channel_gk20a *ch, u64 sms, bool enable) |
9124 | { | 8156 | { |
@@ -9175,70 +8207,6 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
9175 | return err; | 8207 | return err; |
9176 | } | 8208 | } |
9177 | 8209 | ||
9178 | static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | ||
9179 | { | ||
9180 | /* Check if we have at least one valid warp */ | ||
9181 | struct gr_gk20a *gr = &g->gr; | ||
9182 | u32 gpc, tpc, sm_id; | ||
9183 | u32 tpc_offset, gpc_offset, reg_offset; | ||
9184 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
9185 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
9186 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
9187 | |||
9188 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9189 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
9190 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
9191 | |||
9192 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
9193 | gpc_offset = gpc_stride * gpc; | ||
9194 | reg_offset = tpc_offset + gpc_offset; | ||
9195 | |||
9196 | /* 64 bit read */ | ||
9197 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4) << 32; | ||
9198 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset); | ||
9199 | |||
9200 | |||
9201 | /* 64 bit read */ | ||
9202 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4) << 32; | ||
9203 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset); | ||
9204 | |||
9205 | /* 64 bit read */ | ||
9206 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4) << 32; | ||
9207 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset); | ||
9208 | |||
9209 | w_state[sm_id].valid_warps[0] = warps_valid; | ||
9210 | w_state[sm_id].trapped_warps[0] = warps_trapped; | ||
9211 | w_state[sm_id].paused_warps[0] = warps_paused; | ||
9212 | } | ||
9213 | |||
9214 | /* Only for debug purpose */ | ||
9215 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9216 | gk20a_dbg_fn("w_state[%d].valid_warps[0]: %llx\n", | ||
9217 | sm_id, w_state[sm_id].valid_warps[0]); | ||
9218 | gk20a_dbg_fn("w_state[%d].trapped_warps[0]: %llx\n", | ||
9219 | sm_id, w_state[sm_id].trapped_warps[0]); | ||
9220 | gk20a_dbg_fn("w_state[%d].paused_warps[0]: %llx\n", | ||
9221 | sm_id, w_state[sm_id].paused_warps[0]); | ||
9222 | } | ||
9223 | } | ||
9224 | |||
9225 | static void gr_gk20a_get_access_map(struct gk20a *g, | ||
9226 | u32 **whitelist, int *num_entries) | ||
9227 | { | ||
9228 | static u32 wl_addr_gk20a[] = { | ||
9229 | /* this list must be sorted (low to high) */ | ||
9230 | 0x404468, /* gr_pri_mme_max_instructions */ | ||
9231 | 0x418800, /* gr_pri_gpcs_setup_debug */ | ||
9232 | 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ | ||
9233 | 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ | ||
9234 | 0x419e10, /* gr_pri_gpcs_tpcs_sm_dbgr_control0 */ | ||
9235 | 0x419f78, /* gr_pri_gpcs_tpcs_sm_disp_ctrl */ | ||
9236 | }; | ||
9237 | |||
9238 | *whitelist = wl_addr_gk20a; | ||
9239 | *num_entries = ARRAY_SIZE(wl_addr_gk20a); | ||
9240 | } | ||
9241 | |||
9242 | /* | 8210 | /* |
9243 | * gr_gk20a_suspend_context() | 8211 | * gr_gk20a_suspend_context() |
9244 | * This API should be called with dbg_session lock held | 8212 | * This API should be called with dbg_session lock held |
@@ -9356,44 +8324,6 @@ clean_up: | |||
9356 | return err; | 8324 | return err; |
9357 | } | 8325 | } |
9358 | 8326 | ||
9359 | static int gr_gk20a_get_preemption_mode_flags(struct gk20a *g, | ||
9360 | struct nvgpu_preemption_modes_rec *preemption_modes_rec) | ||
9361 | { | ||
9362 | preemption_modes_rec->graphics_preemption_mode_flags = | ||
9363 | NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
9364 | preemption_modes_rec->compute_preemption_mode_flags = | ||
9365 | NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
9366 | |||
9367 | preemption_modes_rec->default_graphics_preempt_mode = | ||
9368 | NVGPU_GRAPHICS_PREEMPTION_MODE_WFI; | ||
9369 | preemption_modes_rec->default_compute_preempt_mode = | ||
9370 | NVGPU_COMPUTE_PREEMPTION_MODE_WFI; | ||
9371 | |||
9372 | return 0; | ||
9373 | } | ||
9374 | |||
9375 | static bool gr_gk20a_is_ltcs_ltss_addr_stub(struct gk20a *g, u32 addr) | ||
9376 | { | ||
9377 | return false; | ||
9378 | } | ||
9379 | |||
9380 | static bool gr_gk20a_is_ltcn_ltss_addr_stub(struct gk20a *g, u32 addr) | ||
9381 | { | ||
9382 | return false; | ||
9383 | } | ||
9384 | |||
9385 | static void gr_gk20a_split_lts_broadcast_addr_stub(struct gk20a *g, u32 addr, | ||
9386 | u32 *priv_addr_table, | ||
9387 | u32 *priv_addr_table_index) | ||
9388 | { | ||
9389 | } | ||
9390 | |||
9391 | static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr, | ||
9392 | u32 *priv_addr_table, | ||
9393 | u32 *priv_addr_table_index) | ||
9394 | { | ||
9395 | } | ||
9396 | |||
9397 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch) | 8327 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch) |
9398 | { | 8328 | { |
9399 | int err = 0; | 8329 | int err = 0; |
@@ -9565,100 +8495,3 @@ u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) | |||
9565 | 8495 | ||
9566 | return tpc_exception_en; | 8496 | return tpc_exception_en; |
9567 | } | 8497 | } |
9568 | |||
9569 | void gk20a_init_gr_ops(struct gpu_ops *gops) | ||
9570 | { | ||
9571 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | ||
9572 | gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults; | ||
9573 | gops->gr.cb_size_default = gr_gk20a_cb_size_default; | ||
9574 | gops->gr.calc_global_ctx_buffer_size = | ||
9575 | gr_gk20a_calc_global_ctx_buffer_size; | ||
9576 | gops->gr.commit_global_attrib_cb = gr_gk20a_commit_global_attrib_cb; | ||
9577 | gops->gr.commit_global_bundle_cb = gr_gk20a_commit_global_bundle_cb; | ||
9578 | gops->gr.commit_global_cb_manager = gr_gk20a_commit_global_cb_manager; | ||
9579 | gops->gr.commit_global_pagepool = gr_gk20a_commit_global_pagepool; | ||
9580 | gops->gr.handle_sw_method = gr_gk20a_handle_sw_method; | ||
9581 | gops->gr.set_alpha_circular_buffer_size = | ||
9582 | gk20a_gr_set_circular_buffer_size; | ||
9583 | gops->gr.set_circular_buffer_size = | ||
9584 | gk20a_gr_set_alpha_circular_buffer_size; | ||
9585 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | ||
9586 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | ||
9587 | gops->gr.is_valid_gfx_class = gr_gk20a_is_valid_gfx_class; | ||
9588 | gops->gr.is_valid_compute_class = gr_gk20a_is_valid_compute_class; | ||
9589 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | ||
9590 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | ||
9591 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; | ||
9592 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; | ||
9593 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; | ||
9594 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; | ||
9595 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; | ||
9596 | gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask; | ||
9597 | gops->gr.free_channel_ctx = gk20a_free_channel_ctx; | ||
9598 | gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx; | ||
9599 | gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull; | ||
9600 | gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; | ||
9601 | gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; | ||
9602 | gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; | ||
9603 | gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch; | ||
9604 | gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; | ||
9605 | gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; | ||
9606 | gops->gr.zbc_set_table = gk20a_gr_zbc_set_table; | ||
9607 | gops->gr.zbc_query_table = gr_gk20a_query_zbc; | ||
9608 | gops->gr.pmu_save_zbc = gr_gk20a_pmu_save_zbc; | ||
9609 | gops->gr.add_zbc = _gk20a_gr_zbc_set_table; | ||
9610 | gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size; | ||
9611 | gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; | ||
9612 | gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; | ||
9613 | gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; | ||
9614 | gops->gr.dump_gr_regs = gr_gk20a_dump_gr_status_regs; | ||
9615 | gops->gr.get_max_fbps_count = gr_gk20a_get_max_fbps_count; | ||
9616 | gops->gr.get_fbp_en_mask = gr_gk20a_get_fbp_en_mask; | ||
9617 | gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; | ||
9618 | gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; | ||
9619 | gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; | ||
9620 | gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; | ||
9621 | gops->gr.wait_empty = gr_gk20a_wait_idle; | ||
9622 | gops->gr.init_cyclestats = gr_gk20a_init_cyclestats; | ||
9623 | gops->gr.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode; | ||
9624 | gops->gr.bpt_reg_info = gr_gk20a_bpt_reg_info; | ||
9625 | gops->gr.get_access_map = gr_gk20a_get_access_map; | ||
9626 | gops->gr.handle_fecs_error = gk20a_gr_handle_fecs_error; | ||
9627 | gops->gr.mask_hww_warp_esr = gk20a_mask_hww_warp_esr; | ||
9628 | gops->gr.handle_sm_exception = gr_gk20a_handle_sm_exception; | ||
9629 | gops->gr.handle_tex_exception = gr_gk20a_handle_tex_exception; | ||
9630 | gops->gr.enable_gpc_exceptions = gk20a_gr_enable_gpc_exceptions; | ||
9631 | gops->gr.enable_exceptions = gk20a_gr_enable_exceptions; | ||
9632 | gops->gr.get_lrf_tex_ltc_dram_override = NULL; | ||
9633 | gops->gr.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode; | ||
9634 | gops->gr.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode; | ||
9635 | gops->gr.record_sm_error_state = gk20a_gr_record_sm_error_state; | ||
9636 | gops->gr.update_sm_error_state = gk20a_gr_update_sm_error_state; | ||
9637 | gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; | ||
9638 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | ||
9639 | gops->gr.resume_contexts = gr_gk20a_resume_contexts; | ||
9640 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; | ||
9641 | gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; | ||
9642 | gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; | ||
9643 | gops->gr.init_sm_id_table = gr_gk20a_init_sm_id_table; | ||
9644 | gops->gr.is_ltcs_ltss_addr = gr_gk20a_is_ltcs_ltss_addr_stub; | ||
9645 | gops->gr.is_ltcn_ltss_addr = gr_gk20a_is_ltcn_ltss_addr_stub; | ||
9646 | gops->gr.split_lts_broadcast_addr = | ||
9647 | gr_gk20a_split_lts_broadcast_addr_stub; | ||
9648 | gops->gr.split_ltc_broadcast_addr = | ||
9649 | gr_gk20a_split_ltc_broadcast_addr_stub; | ||
9650 | gops->gr.setup_rop_mapping = gr_gk20a_setup_rop_mapping; | ||
9651 | gops->gr.program_zcull_mapping = gr_gk20a_program_zcull_mapping; | ||
9652 | gops->gr.commit_global_timeslice = gr_gk20a_commit_global_timeslice; | ||
9653 | gops->gr.commit_inst = gr_gk20a_commit_inst; | ||
9654 | gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; | ||
9655 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; | ||
9656 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; | ||
9657 | gops->gr.inval_icache = gr_gk20a_inval_icache; | ||
9658 | gops->gr.trigger_suspend = gr_gk20a_trigger_suspend; | ||
9659 | gops->gr.wait_for_pause = gr_gk20a_wait_for_pause; | ||
9660 | gops->gr.resume_from_pause = gr_gk20a_resume_from_pause; | ||
9661 | gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors; | ||
9662 | gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions; | ||
9663 | gops->gr.get_esr_sm_sel = gk20a_gr_get_esr_sm_sel; | ||
9664 | } | ||