diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 303 |
1 files changed, 173 insertions, 130 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index aa42e1dd..51a61de3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include "hw_pri_ringstation_sys_gk20a.h" | 46 | #include "hw_pri_ringstation_sys_gk20a.h" |
47 | #include "hw_pri_ringstation_gpc_gk20a.h" | 47 | #include "hw_pri_ringstation_gpc_gk20a.h" |
48 | #include "hw_pri_ringstation_fbp_gk20a.h" | 48 | #include "hw_pri_ringstation_fbp_gk20a.h" |
49 | #include "hw_proj_gk20a.h" | ||
50 | #include "hw_top_gk20a.h" | 49 | #include "hw_top_gk20a.h" |
51 | #include "hw_ltc_gk20a.h" | 50 | #include "hw_ltc_gk20a.h" |
52 | #include "hw_fb_gk20a.h" | 51 | #include "hw_fb_gk20a.h" |
@@ -815,6 +814,8 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
815 | u32 gpc_index, ppc_index; | 814 | u32 gpc_index, ppc_index; |
816 | u32 temp; | 815 | u32 temp; |
817 | u32 cbm_cfg_size1, cbm_cfg_size2; | 816 | u32 cbm_cfg_size1, cbm_cfg_size2; |
817 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
818 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
818 | 819 | ||
819 | gk20a_dbg_fn(""); | 820 | gk20a_dbg_fn(""); |
820 | 821 | ||
@@ -835,7 +836,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
835 | gr->tpc_count * gr->attrib_cb_size; | 836 | gr->tpc_count * gr->attrib_cb_size; |
836 | 837 | ||
837 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 838 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
838 | temp = proj_gpc_stride_v() * gpc_index; | 839 | temp = gpc_stride * gpc_index; |
839 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 840 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
840 | ppc_index++) { | 841 | ppc_index++) { |
841 | cbm_cfg_size1 = gr->attrib_cb_default_size * | 842 | cbm_cfg_size1 = gr->attrib_cb_default_size * |
@@ -845,7 +846,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
845 | 846 | ||
846 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 847 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
847 | gr_gpc0_ppc0_cbm_cfg_r() + temp + | 848 | gr_gpc0_ppc0_cbm_cfg_r() + temp + |
848 | proj_ppc_in_gpc_stride_v() * ppc_index, | 849 | ppc_in_gpc_stride * ppc_index, |
849 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | | 850 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | |
850 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | | 851 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | |
851 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); | 852 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); |
@@ -855,7 +856,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
855 | 856 | ||
856 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 857 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
857 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + | 858 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + |
858 | proj_ppc_in_gpc_stride_v() * ppc_index, | 859 | ppc_in_gpc_stride * ppc_index, |
859 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | | 860 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | |
860 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); | 861 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); |
861 | 862 | ||
@@ -1209,7 +1210,7 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | |||
1209 | u32 gpcs_per_reg = 4; | 1210 | u32 gpcs_per_reg = 4; |
1210 | u32 pes_index; | 1211 | u32 pes_index; |
1211 | u32 tpc_count_pes; | 1212 | u32 tpc_count_pes; |
1212 | u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | 1213 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
1213 | 1214 | ||
1214 | u32 alpha_target, beta_target; | 1215 | u32 alpha_target, beta_target; |
1215 | u32 alpha_bits, beta_bits; | 1216 | u32 alpha_bits, beta_bits; |
@@ -1309,14 +1310,16 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | |||
1309 | u32 tpc_per_gpc; | 1310 | u32 tpc_per_gpc; |
1310 | u32 max_ways_evict = INVALID_MAX_WAYS; | 1311 | u32 max_ways_evict = INVALID_MAX_WAYS; |
1311 | u32 l1c_dbg_reg_val; | 1312 | u32 l1c_dbg_reg_val; |
1313 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1314 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1312 | 1315 | ||
1313 | gk20a_dbg_fn(""); | 1316 | gk20a_dbg_fn(""); |
1314 | 1317 | ||
1315 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | 1318 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { |
1316 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 1319 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
1317 | gpc_offset = proj_gpc_stride_v() * gpc_index; | 1320 | gpc_offset = gpc_stride * gpc_index; |
1318 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | 1321 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { |
1319 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | 1322 | tpc_offset = tpc_in_gpc_stride * tpc_index; |
1320 | 1323 | ||
1321 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | 1324 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, |
1322 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | 1325 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); |
@@ -3196,6 +3199,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3196 | u32 pes_heavy_index; | 3199 | u32 pes_heavy_index; |
3197 | u32 gpc_new_skip_mask; | 3200 | u32 gpc_new_skip_mask; |
3198 | u32 tmp; | 3201 | u32 tmp; |
3202 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3199 | 3203 | ||
3200 | tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); | 3204 | tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); |
3201 | gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); | 3205 | gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); |
@@ -3219,8 +3223,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3219 | tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); | 3223 | tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); |
3220 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); | 3224 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); |
3221 | 3225 | ||
3222 | gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | 3226 | gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
3223 | gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v(); | 3227 | gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); |
3224 | 3228 | ||
3225 | if (!gr->gpc_count) { | 3229 | if (!gr->gpc_count) { |
3226 | gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); | 3230 | gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); |
@@ -3270,7 +3274,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3270 | 3274 | ||
3271 | tmp = gk20a_readl(g, | 3275 | tmp = gk20a_readl(g, |
3272 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + | 3276 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + |
3273 | gpc_index * proj_gpc_stride_v()); | 3277 | gpc_index * gpc_stride); |
3274 | 3278 | ||
3275 | pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); | 3279 | pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); |
3276 | pes_tpc_count = count_bits(pes_tpc_mask); | 3280 | pes_tpc_count = count_bits(pes_tpc_mask); |
@@ -3414,16 +3418,17 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3414 | bool delete_map = false; | 3418 | bool delete_map = false; |
3415 | bool gpc_sorted; | 3419 | bool gpc_sorted; |
3416 | int ret = 0; | 3420 | int ret = 0; |
3421 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
3422 | int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
3417 | 3423 | ||
3418 | init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3424 | init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3419 | init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3425 | init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3420 | run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3426 | run_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3421 | sorted_num_tpcs = | 3427 | sorted_num_tpcs = |
3422 | kzalloc(proj_scal_max_gpcs_v() * | 3428 | kzalloc(num_gpcs * num_tpc_per_gpc * sizeof(s32), |
3423 | proj_scal_max_tpc_per_gpc_v() * sizeof(s32), | ||
3424 | GFP_KERNEL); | 3429 | GFP_KERNEL); |
3425 | sorted_to_unsorted_gpc_map = | 3430 | sorted_to_unsorted_gpc_map = |
3426 | kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3431 | kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3427 | 3432 | ||
3428 | if (!(init_frac && init_err && run_err && sorted_num_tpcs && | 3433 | if (!(init_frac && init_err && run_err && sorted_num_tpcs && |
3429 | sorted_to_unsorted_gpc_map)) { | 3434 | sorted_to_unsorted_gpc_map)) { |
@@ -3490,9 +3495,9 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3490 | } | 3495 | } |
3491 | 3496 | ||
3492 | if (gr->map_tiles == NULL) { | 3497 | if (gr->map_tiles == NULL) { |
3493 | gr->map_tile_count = proj_scal_max_gpcs_v(); | 3498 | gr->map_tile_count = num_gpcs; |
3494 | 3499 | ||
3495 | gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL); | 3500 | gr->map_tiles = kzalloc(num_gpcs * sizeof(u8), GFP_KERNEL); |
3496 | if (gr->map_tiles == NULL) { | 3501 | if (gr->map_tiles == NULL) { |
3497 | ret = -ENOMEM; | 3502 | ret = -ENOMEM; |
3498 | goto clean_up; | 3503 | goto clean_up; |
@@ -3628,11 +3633,11 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | |||
3628 | zcull_params->region_byte_multiplier = | 3633 | zcull_params->region_byte_multiplier = |
3629 | gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); | 3634 | gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); |
3630 | zcull_params->region_header_size = | 3635 | zcull_params->region_header_size = |
3631 | proj_scal_litter_num_gpcs_v() * | 3636 | nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * |
3632 | gr_zcull_save_restore_header_bytes_per_gpc_v(); | 3637 | gr_zcull_save_restore_header_bytes_per_gpc_v(); |
3633 | 3638 | ||
3634 | zcull_params->subregion_header_size = | 3639 | zcull_params->subregion_header_size = |
3635 | proj_scal_litter_num_gpcs_v() * | 3640 | nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * |
3636 | gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); | 3641 | gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); |
3637 | 3642 | ||
3638 | zcull_params->subregion_width_align_pixels = | 3643 | zcull_params->subregion_width_align_pixels = |
@@ -4082,19 +4087,22 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4082 | u32 rcp_conserv; | 4087 | u32 rcp_conserv; |
4083 | u32 offset; | 4088 | u32 offset; |
4084 | bool floorsweep = false; | 4089 | bool floorsweep = false; |
4090 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
4091 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
4092 | int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
4085 | 4093 | ||
4086 | if (!gr->map_tiles) | 4094 | if (!gr->map_tiles) |
4087 | return -1; | 4095 | return -1; |
4088 | 4096 | ||
4089 | zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() * | 4097 | zcull_map_tiles = kzalloc(num_gpcs * |
4090 | proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); | 4098 | num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); |
4091 | if (!zcull_map_tiles) { | 4099 | if (!zcull_map_tiles) { |
4092 | gk20a_err(dev_from_gk20a(g), | 4100 | gk20a_err(dev_from_gk20a(g), |
4093 | "failed to allocate zcull temp buffers"); | 4101 | "failed to allocate zcull temp buffers"); |
4094 | return -ENOMEM; | 4102 | return -ENOMEM; |
4095 | } | 4103 | } |
4096 | zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() * | 4104 | zcull_bank_counters = kzalloc(num_gpcs * |
4097 | proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); | 4105 | num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); |
4098 | 4106 | ||
4099 | if (!zcull_bank_counters) { | 4107 | if (!zcull_bank_counters) { |
4100 | gk20a_err(dev_from_gk20a(g), | 4108 | gk20a_err(dev_from_gk20a(g), |
@@ -4173,7 +4181,7 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4173 | gr->gpc_tpc_count[0]); | 4181 | gr->gpc_tpc_count[0]); |
4174 | 4182 | ||
4175 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 4183 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
4176 | offset = gpc_index * proj_gpc_stride_v(); | 4184 | offset = gpc_index * gpc_stride; |
4177 | 4185 | ||
4178 | if (floorsweep) { | 4186 | if (floorsweep) { |
4179 | gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, | 4187 | gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, |
@@ -4836,6 +4844,8 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4836 | struct gr_gk20a *gr = &g->gr; | 4844 | struct gr_gk20a *gr = &g->gr; |
4837 | u32 gpc_index, ppc_index, stride, val, offset; | 4845 | u32 gpc_index, ppc_index, stride, val, offset; |
4838 | u32 cb_size = data * 4; | 4846 | u32 cb_size = data * 4; |
4847 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
4848 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
4839 | 4849 | ||
4840 | gk20a_dbg_fn(""); | 4850 | gk20a_dbg_fn(""); |
4841 | 4851 | ||
@@ -4848,14 +4858,14 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4848 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | 4858 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); |
4849 | 4859 | ||
4850 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 4860 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
4851 | stride = proj_gpc_stride_v() * gpc_index; | 4861 | stride = gpc_stride * gpc_index; |
4852 | 4862 | ||
4853 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 4863 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
4854 | ppc_index++) { | 4864 | ppc_index++) { |
4855 | 4865 | ||
4856 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + | 4866 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + |
4857 | stride + | 4867 | stride + |
4858 | proj_ppc_in_gpc_stride_v() * ppc_index); | 4868 | ppc_in_gpc_stride * ppc_index); |
4859 | 4869 | ||
4860 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); | 4870 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); |
4861 | 4871 | ||
@@ -4869,7 +4879,7 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4869 | 4879 | ||
4870 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | 4880 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + |
4871 | stride + | 4881 | stride + |
4872 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 4882 | ppc_in_gpc_stride * ppc_index, val); |
4873 | 4883 | ||
4874 | val = set_field(val, | 4884 | val = set_field(val, |
4875 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | 4885 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), |
@@ -4877,7 +4887,7 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4877 | 4887 | ||
4878 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | 4888 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + |
4879 | stride + | 4889 | stride + |
4880 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 4890 | ppc_in_gpc_stride * ppc_index, val); |
4881 | } | 4891 | } |
4882 | } | 4892 | } |
4883 | } | 4893 | } |
@@ -4888,6 +4898,8 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
4888 | u32 gpc_index, ppc_index, stride, val; | 4898 | u32 gpc_index, ppc_index, stride, val; |
4889 | u32 pd_ab_max_output; | 4899 | u32 pd_ab_max_output; |
4890 | u32 alpha_cb_size = data * 4; | 4900 | u32 alpha_cb_size = data * 4; |
4901 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
4902 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
4891 | 4903 | ||
4892 | gk20a_dbg_fn(""); | 4904 | gk20a_dbg_fn(""); |
4893 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | 4905 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) |
@@ -4910,22 +4922,20 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
4910 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | 4922 | gr_pd_ab_dist_cfg1_max_batches_init_f()); |
4911 | 4923 | ||
4912 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 4924 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
4913 | stride = proj_gpc_stride_v() * gpc_index; | 4925 | stride = gpc_stride * gpc_index; |
4914 | 4926 | ||
4915 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 4927 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
4916 | ppc_index++) { | 4928 | ppc_index++) { |
4917 | 4929 | ||
4918 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + | 4930 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + |
4919 | stride + | 4931 | stride + ppc_in_gpc_stride * ppc_index); |
4920 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
4921 | 4932 | ||
4922 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), | 4933 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), |
4923 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * | 4934 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * |
4924 | gr->pes_tpc_count[ppc_index][gpc_index])); | 4935 | gr->pes_tpc_count[ppc_index][gpc_index])); |
4925 | 4936 | ||
4926 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + | 4937 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + |
4927 | stride + | 4938 | stride + ppc_in_gpc_stride * ppc_index, val); |
4928 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
4929 | } | 4939 | } |
4930 | } | 4940 | } |
4931 | } | 4941 | } |
@@ -5421,8 +5431,9 @@ int gk20a_gr_lock_down_sm(struct gk20a *g, | |||
5421 | u32 gpc, u32 tpc, u32 global_esr_mask, | 5431 | u32 gpc, u32 tpc, u32 global_esr_mask, |
5422 | bool check_errors) | 5432 | bool check_errors) |
5423 | { | 5433 | { |
5424 | u32 offset = | 5434 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5425 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; | 5435 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5436 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5426 | u32 dbgr_control0; | 5437 | u32 dbgr_control0; |
5427 | 5438 | ||
5428 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5439 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
@@ -5456,8 +5467,9 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g) | |||
5456 | void gk20a_gr_clear_sm_hww(struct gk20a *g, | 5467 | void gk20a_gr_clear_sm_hww(struct gk20a *g, |
5457 | u32 gpc, u32 tpc, u32 global_esr) | 5468 | u32 gpc, u32 tpc, u32 global_esr) |
5458 | { | 5469 | { |
5459 | u32 offset = proj_gpc_stride_v() * gpc + | 5470 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5460 | proj_tpc_in_gpc_stride_v() * tpc; | 5471 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5472 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5461 | 5473 | ||
5462 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | 5474 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, |
5463 | global_esr); | 5475 | global_esr); |
@@ -5477,8 +5489,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5477 | { | 5489 | { |
5478 | int ret = 0; | 5490 | int ret = 0; |
5479 | bool do_warp_sync = false, early_exit = false, ignore_debugger = false; | 5491 | bool do_warp_sync = false, early_exit = false, ignore_debugger = false; |
5480 | u32 offset = proj_gpc_stride_v() * gpc + | 5492 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5481 | proj_tpc_in_gpc_stride_v() * tpc; | 5493 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5494 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5482 | 5495 | ||
5483 | /* these three interrupts don't require locking down the SM. They can | 5496 | /* these three interrupts don't require locking down the SM. They can |
5484 | * be handled by usermode clients as they aren't fatal. Additionally, | 5497 | * be handled by usermode clients as they aren't fatal. Additionally, |
@@ -5590,8 +5603,9 @@ int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5590 | bool *post_event) | 5603 | bool *post_event) |
5591 | { | 5604 | { |
5592 | int ret = 0; | 5605 | int ret = 0; |
5593 | u32 offset = proj_gpc_stride_v() * gpc + | 5606 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5594 | proj_tpc_in_gpc_stride_v() * tpc; | 5607 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5608 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5595 | u32 esr; | 5609 | u32 esr; |
5596 | 5610 | ||
5597 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | 5611 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); |
@@ -5611,8 +5625,9 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5611 | bool *post_event, struct channel_gk20a *fault_ch) | 5625 | bool *post_event, struct channel_gk20a *fault_ch) |
5612 | { | 5626 | { |
5613 | int ret = 0; | 5627 | int ret = 0; |
5614 | u32 offset = proj_gpc_stride_v() * gpc + | 5628 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5615 | proj_tpc_in_gpc_stride_v() * tpc; | 5629 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5630 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5616 | u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() | 5631 | u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() |
5617 | + offset); | 5632 | + offset); |
5618 | 5633 | ||
@@ -5646,6 +5661,8 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5646 | struct gr_gk20a *gr = &g->gr; | 5661 | struct gr_gk20a *gr = &g->gr; |
5647 | u32 exception1 = gk20a_readl(g, gr_exception1_r()); | 5662 | u32 exception1 = gk20a_readl(g, gr_exception1_r()); |
5648 | u32 gpc_exception, global_esr; | 5663 | u32 gpc_exception, global_esr; |
5664 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
5665 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
5649 | 5666 | ||
5650 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); | 5667 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); |
5651 | 5668 | ||
@@ -5656,7 +5673,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5656 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5673 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5657 | "GPC%d exception pending", gpc); | 5674 | "GPC%d exception pending", gpc); |
5658 | 5675 | ||
5659 | gpc_offset = proj_gpc_stride_v() * gpc; | 5676 | gpc_offset = gpc_stride * gpc; |
5660 | 5677 | ||
5661 | gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r() | 5678 | gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r() |
5662 | + gpc_offset); | 5679 | + gpc_offset); |
@@ -5670,7 +5687,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5670 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5687 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5671 | "GPC%d: TPC%d exception pending", gpc, tpc); | 5688 | "GPC%d: TPC%d exception pending", gpc, tpc); |
5672 | 5689 | ||
5673 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 5690 | tpc_offset = tpc_in_gpc_stride * tpc; |
5674 | 5691 | ||
5675 | global_esr = gk20a_readl(g, | 5692 | global_esr = gk20a_readl(g, |
5676 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | 5693 | gr_gpc0_tpc0_sm_hww_global_esr_r() + |
@@ -6045,31 +6062,31 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6045 | *ppc_num = 0; | 6062 | *ppc_num = 0; |
6046 | *be_num = 0; | 6063 | *be_num = 0; |
6047 | 6064 | ||
6048 | if (pri_is_gpc_addr(addr)) { | 6065 | if (pri_is_gpc_addr(g, addr)) { |
6049 | *addr_type = CTXSW_ADDR_TYPE_GPC; | 6066 | *addr_type = CTXSW_ADDR_TYPE_GPC; |
6050 | gpc_addr = pri_gpccs_addr_mask(addr); | 6067 | gpc_addr = pri_gpccs_addr_mask(addr); |
6051 | if (pri_is_gpc_addr_shared(addr)) { | 6068 | if (pri_is_gpc_addr_shared(g, addr)) { |
6052 | *addr_type = CTXSW_ADDR_TYPE_GPC; | 6069 | *addr_type = CTXSW_ADDR_TYPE_GPC; |
6053 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; | 6070 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; |
6054 | } else | 6071 | } else |
6055 | *gpc_num = pri_get_gpc_num(addr); | 6072 | *gpc_num = pri_get_gpc_num(g, addr); |
6056 | 6073 | ||
6057 | if (g->ops.gr.is_tpc_addr(gpc_addr)) { | 6074 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { |
6058 | *addr_type = CTXSW_ADDR_TYPE_TPC; | 6075 | *addr_type = CTXSW_ADDR_TYPE_TPC; |
6059 | if (pri_is_tpc_addr_shared(gpc_addr)) { | 6076 | if (pri_is_tpc_addr_shared(g, gpc_addr)) { |
6060 | *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; | 6077 | *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; |
6061 | return 0; | 6078 | return 0; |
6062 | } | 6079 | } |
6063 | *tpc_num = g->ops.gr.get_tpc_num(gpc_addr); | 6080 | *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
6064 | } | 6081 | } |
6065 | return 0; | 6082 | return 0; |
6066 | } else if (pri_is_be_addr(addr)) { | 6083 | } else if (pri_is_be_addr(g, addr)) { |
6067 | *addr_type = CTXSW_ADDR_TYPE_BE; | 6084 | *addr_type = CTXSW_ADDR_TYPE_BE; |
6068 | if (pri_is_be_addr_shared(addr)) { | 6085 | if (pri_is_be_addr_shared(g, addr)) { |
6069 | *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; | 6086 | *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; |
6070 | return 0; | 6087 | return 0; |
6071 | } | 6088 | } |
6072 | *be_num = pri_get_be_num(addr); | 6089 | *be_num = pri_get_be_num(g, addr); |
6073 | return 0; | 6090 | return 0; |
6074 | } else { | 6091 | } else { |
6075 | *addr_type = CTXSW_ADDR_TYPE_SYS; | 6092 | *addr_type = CTXSW_ADDR_TYPE_SYS; |
@@ -6090,7 +6107,7 @@ static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | |||
6090 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 6107 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
6091 | 6108 | ||
6092 | for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) | 6109 | for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) |
6093 | priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr), | 6110 | priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr), |
6094 | gpc_num, ppc_num); | 6111 | gpc_num, ppc_num); |
6095 | 6112 | ||
6096 | return 0; | 6113 | return 0; |
@@ -6133,7 +6150,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6133 | * so that we can look up the offset. */ | 6150 | * so that we can look up the offset. */ |
6134 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && | 6151 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && |
6135 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) | 6152 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) |
6136 | priv_addr_table[t++] = pri_be_shared_addr(addr); | 6153 | priv_addr_table[t++] = pri_be_shared_addr(g, addr); |
6137 | else | 6154 | else |
6138 | priv_addr_table[t++] = addr; | 6155 | priv_addr_table[t++] = addr; |
6139 | 6156 | ||
@@ -6152,7 +6169,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6152 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6169 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
6153 | tpc_num++) | 6170 | tpc_num++) |
6154 | priv_addr_table[t++] = | 6171 | priv_addr_table[t++] = |
6155 | pri_tpc_addr(pri_tpccs_addr_mask(addr), | 6172 | pri_tpc_addr(g, pri_tpccs_addr_mask(addr), |
6156 | gpc_num, tpc_num); | 6173 | gpc_num, tpc_num); |
6157 | 6174 | ||
6158 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { | 6175 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { |
@@ -6162,7 +6179,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6162 | return err; | 6179 | return err; |
6163 | } else | 6180 | } else |
6164 | priv_addr_table[t++] = | 6181 | priv_addr_table[t++] = |
6165 | pri_gpc_addr(pri_gpccs_addr_mask(addr), | 6182 | pri_gpc_addr(g, pri_gpccs_addr_mask(addr), |
6166 | gpc_num); | 6183 | gpc_num); |
6167 | } | 6184 | } |
6168 | } else { | 6185 | } else { |
@@ -6171,7 +6188,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6171 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6188 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
6172 | tpc_num++) | 6189 | tpc_num++) |
6173 | priv_addr_table[t++] = | 6190 | priv_addr_table[t++] = |
6174 | pri_tpc_addr(pri_tpccs_addr_mask(addr), | 6191 | pri_tpc_addr(g, pri_tpccs_addr_mask(addr), |
6175 | gpc_num, tpc_num); | 6192 | gpc_num, tpc_num); |
6176 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) | 6193 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) |
6177 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, | 6194 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, |
@@ -6403,6 +6420,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6403 | u32 vaddr_lo; | 6420 | u32 vaddr_lo; |
6404 | u32 vaddr_hi; | 6421 | u32 vaddr_hi; |
6405 | u32 tmp; | 6422 | u32 tmp; |
6423 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6424 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6406 | 6425 | ||
6407 | init_ovr_perf_reg_info(); | 6426 | init_ovr_perf_reg_info(); |
6408 | g->ops.gr.init_sm_dsm_reg_info(); | 6427 | g->ops.gr.init_sm_dsm_reg_info(); |
@@ -6413,8 +6432,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6413 | for (gpc = 0; gpc < num_gpc; gpc++) { | 6432 | for (gpc = 0; gpc < num_gpc; gpc++) { |
6414 | num_tpc = g->gr.gpc_tpc_count[gpc]; | 6433 | num_tpc = g->gr.gpc_tpc_count[gpc]; |
6415 | for (tpc = 0; tpc < num_tpc; tpc++) { | 6434 | for (tpc = 0; tpc < num_tpc; tpc++) { |
6416 | chk_addr = ((proj_gpc_stride_v() * gpc) + | 6435 | chk_addr = ((gpc_stride * gpc) + |
6417 | (proj_tpc_in_gpc_stride_v() * tpc) + | 6436 | (tpc_in_gpc_stride * tpc) + |
6418 | _ovr_perf_regs[reg]); | 6437 | _ovr_perf_regs[reg]); |
6419 | if (chk_addr != addr) | 6438 | if (chk_addr != addr) |
6420 | continue; | 6439 | continue; |
@@ -6461,18 +6480,19 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | |||
6461 | u32 tpc, gpc; | 6480 | u32 tpc, gpc; |
6462 | u32 gpc_tpc_addr; | 6481 | u32 gpc_tpc_addr; |
6463 | u32 gpc_tpc_stride; | 6482 | u32 gpc_tpc_stride; |
6483 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6484 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6464 | 6485 | ||
6465 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); | 6486 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); |
6466 | 6487 | ||
6467 | gpc = pri_get_gpc_num(offset); | 6488 | gpc = pri_get_gpc_num(g, offset); |
6468 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); | 6489 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); |
6469 | tpc = g->ops.gr.get_tpc_num(gpc_tpc_addr); | 6490 | tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr); |
6470 | 6491 | ||
6471 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ | 6492 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ |
6472 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ | 6493 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ |
6473 | 6494 | ||
6474 | gpc_tpc_stride = gpc * proj_gpc_stride_v() + | 6495 | gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride; |
6475 | tpc * proj_tpc_in_gpc_stride_v(); | ||
6476 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; | 6496 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; |
6477 | 6497 | ||
6478 | reg = gk20a_readl(g, gpc_tpc_addr); | 6498 | reg = gk20a_readl(g, gpc_tpc_addr); |
@@ -6552,7 +6572,6 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6552 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; | 6572 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; |
6553 | u32 num_ext_gpccs_ext_buffer_segments; | 6573 | u32 num_ext_gpccs_ext_buffer_segments; |
6554 | u32 inter_seg_offset; | 6574 | u32 inter_seg_offset; |
6555 | u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1); | ||
6556 | u32 max_tpc_count; | 6575 | u32 max_tpc_count; |
6557 | u32 *sm_dsm_perf_ctrl_regs = NULL; | 6576 | u32 *sm_dsm_perf_ctrl_regs = NULL; |
6558 | u32 num_sm_dsm_perf_ctrl_regs = 0; | 6577 | u32 num_sm_dsm_perf_ctrl_regs = 0; |
@@ -6563,15 +6582,20 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6563 | u32 control_register_stride = 0; | 6582 | u32 control_register_stride = 0; |
6564 | u32 perf_register_stride = 0; | 6583 | u32 perf_register_stride = 0; |
6565 | struct gr_gk20a *gr = &g->gr; | 6584 | struct gr_gk20a *gr = &g->gr; |
6585 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | ||
6586 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6587 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
6588 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6589 | u32 tpc_gpc_mask = (tpc_in_gpc_stride - 1); | ||
6566 | 6590 | ||
6567 | /* Only have TPC registers in extended region, so if not a TPC reg, | 6591 | /* Only have TPC registers in extended region, so if not a TPC reg, |
6568 | then return error so caller can look elsewhere. */ | 6592 | then return error so caller can look elsewhere. */ |
6569 | if (pri_is_gpc_addr(addr)) { | 6593 | if (pri_is_gpc_addr(g, addr)) { |
6570 | u32 gpc_addr = 0; | 6594 | u32 gpc_addr = 0; |
6571 | gpc_num = pri_get_gpc_num(addr); | 6595 | gpc_num = pri_get_gpc_num(g, addr); |
6572 | gpc_addr = pri_gpccs_addr_mask(addr); | 6596 | gpc_addr = pri_gpccs_addr_mask(addr); |
6573 | if (g->ops.gr.is_tpc_addr(gpc_addr)) | 6597 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) |
6574 | tpc_num = g->ops.gr.get_tpc_num(gpc_addr); | 6598 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
6575 | else | 6599 | else |
6576 | return -EINVAL; | 6600 | return -EINVAL; |
6577 | 6601 | ||
@@ -6639,11 +6663,10 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6639 | gk20a_dbg_info("register match: 0x%08x", | 6663 | gk20a_dbg_info("register match: 0x%08x", |
6640 | sm_dsm_perf_regs[i]); | 6664 | sm_dsm_perf_regs[i]); |
6641 | 6665 | ||
6642 | chk_addr = (proj_gpc_base_v() + | 6666 | chk_addr = (gpc_base + gpc_stride * gpc_num) + |
6643 | (proj_gpc_stride_v() * gpc_num) + | 6667 | tpc_in_gpc_base + |
6644 | proj_tpc_in_gpc_base_v() + | 6668 | (tpc_in_gpc_stride * tpc_num) + |
6645 | (proj_tpc_in_gpc_stride_v() * tpc_num) + | 6669 | (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask); |
6646 | (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask)); | ||
6647 | 6670 | ||
6648 | if (chk_addr != addr) { | 6671 | if (chk_addr != addr) { |
6649 | gk20a_err(dev_from_gk20a(g), | 6672 | gk20a_err(dev_from_gk20a(g), |
@@ -6670,12 +6693,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6670 | gk20a_dbg_info("register match: 0x%08x", | 6693 | gk20a_dbg_info("register match: 0x%08x", |
6671 | sm_dsm_perf_ctrl_regs[i]); | 6694 | sm_dsm_perf_ctrl_regs[i]); |
6672 | 6695 | ||
6673 | chk_addr = (proj_gpc_base_v() + | 6696 | chk_addr = (gpc_base + gpc_stride * gpc_num) + |
6674 | (proj_gpc_stride_v() * gpc_num) + | 6697 | tpc_in_gpc_base + |
6675 | proj_tpc_in_gpc_base_v() + | 6698 | tpc_in_gpc_stride * tpc_num + |
6676 | (proj_tpc_in_gpc_stride_v() * tpc_num) + | ||
6677 | (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & | 6699 | (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & |
6678 | tpc_gpc_mask)); | 6700 | tpc_gpc_mask); |
6679 | 6701 | ||
6680 | if (chk_addr != addr) { | 6702 | if (chk_addr != addr) { |
6681 | gk20a_err(dev_from_gk20a(g), | 6703 | gk20a_err(dev_from_gk20a(g), |
@@ -6772,6 +6794,12 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6772 | u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; | 6794 | u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; |
6773 | u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; | 6795 | u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; |
6774 | struct aiv_gk20a *reg; | 6796 | struct aiv_gk20a *reg; |
6797 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | ||
6798 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6799 | u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE); | ||
6800 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
6801 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
6802 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6775 | 6803 | ||
6776 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); | 6804 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); |
6777 | 6805 | ||
@@ -6800,10 +6828,10 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6800 | reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; | 6828 | reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; |
6801 | address = reg->addr; | 6829 | address = reg->addr; |
6802 | tpc_addr = pri_tpccs_addr_mask(address); | 6830 | tpc_addr = pri_tpccs_addr_mask(address); |
6803 | base_address = proj_gpc_base_v() + | 6831 | base_address = gpc_base + |
6804 | (gpc_num * proj_gpc_stride_v()) + | 6832 | (gpc_num * gpc_stride) + |
6805 | proj_tpc_in_gpc_base_v() + | 6833 | tpc_in_gpc_base + |
6806 | (tpc_num * proj_tpc_in_gpc_stride_v()); | 6834 | (tpc_num * tpc_in_gpc_stride); |
6807 | address = base_address + tpc_addr; | 6835 | address = base_address + tpc_addr; |
6808 | /* | 6836 | /* |
6809 | * The data for the TPCs is interleaved in the context buffer. | 6837 | * The data for the TPCs is interleaved in the context buffer. |
@@ -6828,10 +6856,10 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6828 | reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; | 6856 | reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; |
6829 | address = reg->addr; | 6857 | address = reg->addr; |
6830 | ppc_addr = pri_ppccs_addr_mask(address); | 6858 | ppc_addr = pri_ppccs_addr_mask(address); |
6831 | base_address = proj_gpc_base_v() + | 6859 | base_address = gpc_base + |
6832 | (gpc_num * proj_gpc_stride_v()) + | 6860 | (gpc_num * gpc_stride) + |
6833 | proj_ppc_in_gpc_base_v() + | 6861 | ppc_in_gpc_base + |
6834 | (ppc_num * proj_ppc_in_gpc_stride_v()); | 6862 | (ppc_num * ppc_in_gpc_stride); |
6835 | address = base_address + ppc_addr; | 6863 | address = base_address + ppc_addr; |
6836 | /* | 6864 | /* |
6837 | * The data for the PPCs is interleaved in the context buffer. | 6865 | * The data for the PPCs is interleaved in the context buffer. |
@@ -6859,8 +6887,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6859 | gpc_addr = pri_gpccs_addr_mask(address); | 6887 | gpc_addr = pri_gpccs_addr_mask(address); |
6860 | gpc_offset = reg->index; | 6888 | gpc_offset = reg->index; |
6861 | 6889 | ||
6862 | base_address = proj_gpc_base_v() + | 6890 | base_address = gpc_base + (gpc_num * gpc_stride); |
6863 | (gpc_num * proj_gpc_stride_v()); | ||
6864 | address = base_address + gpc_addr; | 6891 | address = base_address + gpc_addr; |
6865 | 6892 | ||
6866 | if (pri_addr == address) { | 6893 | if (pri_addr == address) { |
@@ -6879,7 +6906,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
6879 | u32 *reg_ppc_count) | 6906 | u32 *reg_ppc_count) |
6880 | { | 6907 | { |
6881 | u32 data32; | 6908 | u32 data32; |
6882 | u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | 6909 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
6883 | 6910 | ||
6884 | /* | 6911 | /* |
6885 | * if there is only 1 PES_PER_GPC, then we put the PES registers | 6912 | * if there is only 1 PES_PER_GPC, then we put the PES registers |
@@ -6887,7 +6914,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
6887 | */ | 6914 | */ |
6888 | if ((!g->gr.ctx_vars.valid) || | 6915 | if ((!g->gr.ctx_vars.valid) || |
6889 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && | 6916 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && |
6890 | (litter_num_pes_per_gpc > 1))) | 6917 | (num_pes_per_gpc > 1))) |
6891 | return -EINVAL; | 6918 | return -EINVAL; |
6892 | 6919 | ||
6893 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); | 6920 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); |
@@ -7028,9 +7055,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7028 | /* The ucode stores TPC/PPC data before GPC data. | 7055 | /* The ucode stores TPC/PPC data before GPC data. |
7029 | * Advance offset past TPC/PPC data to GPC data. */ | 7056 | * Advance offset past TPC/PPC data to GPC data. */ |
7030 | /* note 1 PES_PER_GPC case */ | 7057 | /* note 1 PES_PER_GPC case */ |
7031 | u32 litter_num_pes_per_gpc = | 7058 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, |
7032 | proj_scal_litter_num_pes_per_gpc_v(); | 7059 | GPU_LIT_NUM_PES_PER_GPC); |
7033 | if (litter_num_pes_per_gpc > 1) { | 7060 | if (num_pes_per_gpc > 1) { |
7034 | offset_to_segment += | 7061 | offset_to_segment += |
7035 | (((gr->ctx_vars.ctxsw_regs.tpc.count * | 7062 | (((gr->ctx_vars.ctxsw_regs.tpc.count * |
7036 | num_tpcs) << 2) + | 7063 | num_tpcs) << 2) + |
@@ -7136,33 +7163,37 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, | |||
7136 | { | 7163 | { |
7137 | u32 num_gpcs = g->gr.gpc_count; | 7164 | u32 num_gpcs = g->gr.gpc_count; |
7138 | u32 num_ppcs, num_tpcs, gpc_num, base; | 7165 | u32 num_ppcs, num_tpcs, gpc_num, base; |
7166 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | ||
7167 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7168 | u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE); | ||
7169 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
7170 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
7171 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7139 | 7172 | ||
7140 | for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { | 7173 | for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { |
7141 | num_tpcs = g->gr.gpc_tpc_count[gpc_num]; | 7174 | num_tpcs = g->gr.gpc_tpc_count[gpc_num]; |
7142 | base = proj_gpc_base_v() + | 7175 | base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base; |
7143 | (proj_gpc_stride_v() * gpc_num) + proj_tpc_in_gpc_base_v(); | ||
7144 | if (add_ctxsw_buffer_map_entries_subunits(map, | 7176 | if (add_ctxsw_buffer_map_entries_subunits(map, |
7145 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, | 7177 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, |
7146 | count, offset, max_cnt, base, num_tpcs, | 7178 | count, offset, max_cnt, base, num_tpcs, |
7147 | proj_tpc_in_gpc_stride_v(), | 7179 | tpc_in_gpc_stride, |
7148 | (proj_tpc_in_gpc_stride_v() - 1))) | 7180 | (tpc_in_gpc_stride - 1))) |
7149 | return -EINVAL; | 7181 | return -EINVAL; |
7150 | 7182 | ||
7151 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; | 7183 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; |
7152 | base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num) + | 7184 | base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; |
7153 | proj_ppc_in_gpc_base_v(); | ||
7154 | if (add_ctxsw_buffer_map_entries_subunits(map, | 7185 | if (add_ctxsw_buffer_map_entries_subunits(map, |
7155 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, | 7186 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, |
7156 | count, offset, max_cnt, base, num_ppcs, | 7187 | count, offset, max_cnt, base, num_ppcs, |
7157 | proj_ppc_in_gpc_stride_v(), | 7188 | ppc_in_gpc_stride, |
7158 | (proj_ppc_in_gpc_stride_v() - 1))) | 7189 | (ppc_in_gpc_stride - 1))) |
7159 | return -EINVAL; | 7190 | return -EINVAL; |
7160 | 7191 | ||
7161 | base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num); | 7192 | base = gpc_base + (gpc_stride * gpc_num); |
7162 | if (add_ctxsw_buffer_map_entries(map, | 7193 | if (add_ctxsw_buffer_map_entries(map, |
7163 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, | 7194 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, |
7164 | count, offset, max_cnt, base, | 7195 | count, offset, max_cnt, base, |
7165 | (proj_gpc_stride_v() - 1))) | 7196 | (gpc_stride - 1))) |
7166 | return -EINVAL; | 7197 | return -EINVAL; |
7167 | 7198 | ||
7168 | base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); | 7199 | base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); |
@@ -7242,6 +7273,9 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7242 | u32 i, count = 0; | 7273 | u32 i, count = 0; |
7243 | u32 offset = 0; | 7274 | u32 offset = 0; |
7244 | struct ctxsw_buf_offset_map_entry *map; | 7275 | struct ctxsw_buf_offset_map_entry *map; |
7276 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
7277 | u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); | ||
7278 | u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE); | ||
7245 | 7279 | ||
7246 | if (hwpm_ctxsw_buffer_size == 0) { | 7280 | if (hwpm_ctxsw_buffer_size == 0) { |
7247 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | 7281 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, |
@@ -7289,8 +7323,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7289 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, | 7323 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, |
7290 | &count, &offset, | 7324 | &count, &offset, |
7291 | hwpm_ctxsw_reg_count_max, 0, | 7325 | hwpm_ctxsw_reg_count_max, 0, |
7292 | proj_scal_litter_num_fbpas_v(), | 7326 | num_fbpas, fbpa_stride, ~0)) |
7293 | proj_fbpa_stride_v(), ~0)) | ||
7294 | goto cleanup; | 7327 | goto cleanup; |
7295 | 7328 | ||
7296 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ | 7329 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ |
@@ -7298,7 +7331,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7298 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, | 7331 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, |
7299 | &count, &offset, | 7332 | &count, &offset, |
7300 | hwpm_ctxsw_reg_count_max, 0, | 7333 | hwpm_ctxsw_reg_count_max, 0, |
7301 | g->ltc_count, proj_ltc_stride_v(), ~0)) | 7334 | g->ltc_count, ltc_stride, ~0)) |
7302 | goto cleanup; | 7335 | goto cleanup; |
7303 | 7336 | ||
7304 | offset = ALIGN(offset, 256); | 7337 | offset = ALIGN(offset, 256); |
@@ -7737,25 +7770,28 @@ void gk20a_init_gr(struct gk20a *g) | |||
7737 | init_waitqueue_head(&g->gr.init_wq); | 7770 | init_waitqueue_head(&g->gr.init_wq); |
7738 | } | 7771 | } |
7739 | 7772 | ||
7740 | static bool gr_gk20a_is_tpc_addr(u32 addr) | 7773 | static bool gr_gk20a_is_tpc_addr(struct gk20a *g, u32 addr) |
7741 | { | 7774 | { |
7742 | return ((addr >= proj_tpc_in_gpc_base_v()) && | 7775 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); |
7743 | (addr < proj_tpc_in_gpc_base_v() + | 7776 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
7744 | (proj_scal_litter_num_tpc_per_gpc_v() * | 7777 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
7745 | proj_tpc_in_gpc_stride_v()))) | 7778 | return ((addr >= tpc_in_gpc_base) && |
7746 | || pri_is_tpc_addr_shared(addr); | 7779 | (addr < tpc_in_gpc_base + |
7780 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
7781 | || pri_is_tpc_addr_shared(g, addr); | ||
7747 | } | 7782 | } |
7748 | 7783 | ||
7749 | static u32 gr_gk20a_get_tpc_num(u32 addr) | 7784 | static u32 gr_gk20a_get_tpc_num(struct gk20a *g, u32 addr) |
7750 | { | 7785 | { |
7751 | u32 i, start; | 7786 | u32 i, start; |
7752 | u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); | 7787 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
7788 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
7789 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7753 | 7790 | ||
7754 | for (i = 0; i < num_tpcs; i++) { | 7791 | for (i = 0; i < num_tpcs; i++) { |
7755 | start = proj_tpc_in_gpc_base_v() + | 7792 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); |
7756 | (i * proj_tpc_in_gpc_stride_v()); | ||
7757 | if ((addr >= start) && | 7793 | if ((addr >= start) && |
7758 | (addr < (start + proj_tpc_in_gpc_stride_v()))) | 7794 | (addr < (start + tpc_in_gpc_stride))) |
7759 | return i; | 7795 | return i; |
7760 | } | 7796 | } |
7761 | return 0; | 7797 | return 0; |
@@ -7768,8 +7804,10 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
7768 | bool no_error_pending; | 7804 | bool no_error_pending; |
7769 | u32 delay = GR_IDLE_CHECK_DEFAULT; | 7805 | u32 delay = GR_IDLE_CHECK_DEFAULT; |
7770 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); | 7806 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); |
7807 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7808 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7771 | u32 offset = | 7809 | u32 offset = |
7772 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; | 7810 | gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
7773 | 7811 | ||
7774 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 7812 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
7775 | "GPC%d TPC%d: locking down SM", gpc, tpc); | 7813 | "GPC%d TPC%d: locking down SM", gpc, tpc); |
@@ -7828,9 +7866,9 @@ void gk20a_suspend_single_sm(struct gk20a *g, | |||
7828 | u32 offset; | 7866 | u32 offset; |
7829 | int err; | 7867 | int err; |
7830 | u32 dbgr_control0; | 7868 | u32 dbgr_control0; |
7831 | 7869 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | |
7832 | offset = proj_gpc_stride_v() * gpc + | 7870 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
7833 | proj_tpc_in_gpc_stride_v() * tpc; | 7871 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
7834 | 7872 | ||
7835 | /* if an SM debugger isn't attached, skip suspend */ | 7873 | /* if an SM debugger isn't attached, skip suspend */ |
7836 | if (!gk20a_gr_sm_debugger_attached(g)) { | 7874 | if (!gk20a_gr_sm_debugger_attached(g)) { |
@@ -7899,6 +7937,8 @@ void gk20a_resume_single_sm(struct gk20a *g, | |||
7899 | { | 7937 | { |
7900 | u32 dbgr_control0; | 7938 | u32 dbgr_control0; |
7901 | u32 offset; | 7939 | u32 offset; |
7940 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7941 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7902 | /* | 7942 | /* |
7903 | * The following requires some clarification. Despite the fact that both | 7943 | * The following requires some clarification. Despite the fact that both |
7904 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their | 7944 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their |
@@ -7912,8 +7952,7 @@ void gk20a_resume_single_sm(struct gk20a *g, | |||
7912 | * effect, before enabling the run trigger. | 7952 | * effect, before enabling the run trigger. |
7913 | */ | 7953 | */ |
7914 | 7954 | ||
7915 | offset = proj_gpc_stride_v() * gpc + | 7955 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
7916 | proj_tpc_in_gpc_stride_v() * tpc; | ||
7917 | 7956 | ||
7918 | /*De-assert stop trigger */ | 7957 | /*De-assert stop trigger */ |
7919 | dbgr_control0 = | 7958 | dbgr_control0 = |
@@ -8144,6 +8183,8 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
8144 | { | 8183 | { |
8145 | struct nvgpu_dbg_gpu_reg_op *ops; | 8184 | struct nvgpu_dbg_gpu_reg_op *ops; |
8146 | int i = 0, sm_id, err; | 8185 | int i = 0, sm_id, err; |
8186 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
8187 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8147 | 8188 | ||
8148 | ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL); | 8189 | ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL); |
8149 | if (!ops) | 8190 | if (!ops) |
@@ -8158,8 +8199,8 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
8158 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 8199 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
8159 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 8200 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
8160 | 8201 | ||
8161 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 8202 | tpc_offset = tpc_in_gpc_stride * tpc; |
8162 | gpc_offset = proj_gpc_stride_v() * gpc; | 8203 | gpc_offset = gpc_stride * gpc; |
8163 | reg_offset = tpc_offset + gpc_offset; | 8204 | reg_offset = tpc_offset + gpc_offset; |
8164 | 8205 | ||
8165 | ops[i].op = REGOP(WRITE_32); | 8206 | ops[i].op = REGOP(WRITE_32); |
@@ -8199,13 +8240,15 @@ static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | |||
8199 | u32 gpc, tpc, sm_id; | 8240 | u32 gpc, tpc, sm_id; |
8200 | u32 tpc_offset, gpc_offset, reg_offset; | 8241 | u32 tpc_offset, gpc_offset, reg_offset; |
8201 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 8242 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; |
8243 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
8244 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8202 | 8245 | ||
8203 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | 8246 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { |
8204 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 8247 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
8205 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 8248 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
8206 | 8249 | ||
8207 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 8250 | tpc_offset = tpc_in_gpc_stride * tpc; |
8208 | gpc_offset = proj_gpc_stride_v() * gpc; | 8251 | gpc_offset = gpc_stride * gpc; |
8209 | reg_offset = tpc_offset + gpc_offset; | 8252 | reg_offset = tpc_offset + gpc_offset; |
8210 | 8253 | ||
8211 | /* 64 bit read */ | 8254 | /* 64 bit read */ |