diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-06 16:10:32 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-15 11:48:20 -0400 |
commit | 6839341bf8ffafa115cfc0427bba694ee1d131f3 (patch) | |
tree | 1f9369a3bacf0f1a2cc23371f5de988efdc07c31 | |
parent | 61e009c0f8874898335e6c47a610233c3382be47 (diff) |
gpu: nvgpu: Add litter values HAL
Move per-chip constants to be returned by a chip specific function.
Implement get_litter_value() for each chip.
Change-Id: I2a2730fce14010924d2507f6fa15cc2ea0795113
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1121383
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 31 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 303 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 101 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 74 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 76 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 74 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 21 |
11 files changed, 486 insertions, 223 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index a3b02481..00635c4d 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include "hw_pbdma_gk20a.h" | 32 | #include "hw_pbdma_gk20a.h" |
33 | #include "hw_ccsr_gk20a.h" | 33 | #include "hw_ccsr_gk20a.h" |
34 | #include "hw_ram_gk20a.h" | 34 | #include "hw_ram_gk20a.h" |
35 | #include "hw_proj_gk20a.h" | ||
36 | #include "hw_top_gk20a.h" | 35 | #include "hw_top_gk20a.h" |
37 | #include "hw_mc_gk20a.h" | 36 | #include "hw_mc_gk20a.h" |
38 | #include "hw_gr_gk20a.h" | 37 | #include "hw_gr_gk20a.h" |
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 87f0683f..b1d35141 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include "regops_gk20a.h" | 28 | #include "regops_gk20a.h" |
29 | #include "hw_gr_gk20a.h" | 29 | #include "hw_gr_gk20a.h" |
30 | #include "hw_fb_gk20a.h" | 30 | #include "hw_fb_gk20a.h" |
31 | #include "hw_proj_gk20a.h" | ||
32 | #include "hw_timer_gk20a.h" | 31 | #include "hw_timer_gk20a.h" |
33 | 32 | ||
34 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | 33 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) |
@@ -451,15 +450,17 @@ static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) | |||
451 | u32 gpc_offset, tpc_offset, gpc, tpc; | 450 | u32 gpc_offset, tpc_offset, gpc, tpc; |
452 | struct gr_gk20a *gr = &g->gr; | 451 | struct gr_gk20a *gr = &g->gr; |
453 | u32 global_esr; | 452 | u32 global_esr; |
453 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
454 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
454 | 455 | ||
455 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | 456 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { |
456 | 457 | ||
457 | gpc_offset = proj_gpc_stride_v() * gpc; | 458 | gpc_offset = gpc_stride * gpc; |
458 | 459 | ||
459 | /* check if any tpc has an exception */ | 460 | /* check if any tpc has an exception */ |
460 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | 461 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { |
461 | 462 | ||
462 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 463 | tpc_offset = tpc_in_gpc_stride * tpc; |
463 | 464 | ||
464 | global_esr = gk20a_readl(g, | 465 | global_esr = gk20a_readl(g, |
465 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | 466 | gr_gpc0_tpc0_sm_hww_global_esr_r() + |
@@ -482,13 +483,15 @@ static int nvgpu_gpu_ioctl_has_any_exception( | |||
482 | struct gr_gk20a *gr = &g->gr; | 483 | struct gr_gk20a *gr = &g->gr; |
483 | u32 sm_id, tpc_exception_en = 0; | 484 | u32 sm_id, tpc_exception_en = 0; |
484 | u32 offset, regval, tpc_offset, gpc_offset; | 485 | u32 offset, regval, tpc_offset, gpc_offset; |
486 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
487 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
485 | 488 | ||
486 | mutex_lock(&g->dbg_sessions_lock); | 489 | mutex_lock(&g->dbg_sessions_lock); |
487 | 490 | ||
488 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | 491 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { |
489 | 492 | ||
490 | tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index; | 493 | tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index; |
491 | gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index; | 494 | gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index; |
492 | offset = tpc_offset + gpc_offset; | 495 | offset = tpc_offset + gpc_offset; |
493 | 496 | ||
494 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + | 497 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 44329a53..33ed9a04 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include "hw_pbdma_gk20a.h" | 31 | #include "hw_pbdma_gk20a.h" |
32 | #include "hw_ccsr_gk20a.h" | 32 | #include "hw_ccsr_gk20a.h" |
33 | #include "hw_ram_gk20a.h" | 33 | #include "hw_ram_gk20a.h" |
34 | #include "hw_proj_gk20a.h" | ||
35 | #include "hw_top_gk20a.h" | 34 | #include "hw_top_gk20a.h" |
36 | #include "hw_mc_gk20a.h" | 35 | #include "hw_mc_gk20a.h" |
37 | #include "hw_gr_gk20a.h" | 36 | #include "hw_gr_gk20a.h" |
@@ -349,6 +348,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | |||
349 | u32 timeout; | 348 | u32 timeout; |
350 | int i; | 349 | int i; |
351 | struct gk20a_platform *platform = dev_get_drvdata(g->dev); | 350 | struct gk20a_platform *platform = dev_get_drvdata(g->dev); |
351 | u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA); | ||
352 | 352 | ||
353 | gk20a_dbg_fn(""); | 353 | gk20a_dbg_fn(""); |
354 | /* enable pmc pfifo */ | 354 | /* enable pmc pfifo */ |
@@ -367,7 +367,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | |||
367 | 367 | ||
368 | /* enable pbdma */ | 368 | /* enable pbdma */ |
369 | mask = 0; | 369 | mask = 0; |
370 | for (i = 0; i < proj_host_num_pbdma_v(); ++i) | 370 | for (i = 0; i < host_num_pbdma; ++i) |
371 | mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i); | 371 | mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i); |
372 | gk20a_writel(g, mc_enable_pb_r(), mask); | 372 | gk20a_writel(g, mc_enable_pb_r(), mask); |
373 | 373 | ||
@@ -378,7 +378,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) | |||
378 | 378 | ||
379 | /* enable pbdma interrupt */ | 379 | /* enable pbdma interrupt */ |
380 | mask = 0; | 380 | mask = 0; |
381 | for (i = 0; i < proj_host_num_pbdma_v(); i++) { | 381 | for (i = 0; i < host_num_pbdma; i++) { |
382 | intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i)); | 382 | intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i)); |
383 | intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f(); | 383 | intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f(); |
384 | gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall); | 384 | gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall); |
@@ -487,7 +487,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
487 | 487 | ||
488 | f->num_channels = g->ops.fifo.get_num_fifos(g); | 488 | f->num_channels = g->ops.fifo.get_num_fifos(g); |
489 | f->num_runlist_entries = fifo_eng_runlist_length_max_v(); | 489 | f->num_runlist_entries = fifo_eng_runlist_length_max_v(); |
490 | f->num_pbdma = proj_host_num_pbdma_v(); | 490 | f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA); |
491 | f->max_engines = ENGINE_INVAL_GK20A; | 491 | f->max_engines = ENGINE_INVAL_GK20A; |
492 | 492 | ||
493 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | 493 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 71271a2c..e17392d0 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -72,6 +72,32 @@ enum gk20a_cbc_op { | |||
72 | #define MC_INTR_UNIT_DISABLE false | 72 | #define MC_INTR_UNIT_DISABLE false |
73 | #define MC_INTR_UNIT_ENABLE true | 73 | #define MC_INTR_UNIT_ENABLE true |
74 | 74 | ||
75 | enum nvgpu_litter_value { | ||
76 | GPU_LIT_NUM_GPCS, | ||
77 | GPU_LIT_NUM_PES_PER_GPC, | ||
78 | GPU_LIT_NUM_ZCULL_BANKS, | ||
79 | GPU_LIT_NUM_TPC_PER_GPC, | ||
80 | GPU_LIT_NUM_FBPS, | ||
81 | GPU_LIT_GPC_BASE, | ||
82 | GPU_LIT_GPC_STRIDE, | ||
83 | GPU_LIT_GPC_SHARED_BASE, | ||
84 | GPU_LIT_TPC_IN_GPC_BASE, | ||
85 | GPU_LIT_TPC_IN_GPC_STRIDE, | ||
86 | GPU_LIT_TPC_IN_GPC_SHARED_BASE, | ||
87 | GPU_LIT_PPC_IN_GPC_BASE, | ||
88 | GPU_LIT_PPC_IN_GPC_STRIDE, | ||
89 | GPU_LIT_ROP_BASE, | ||
90 | GPU_LIT_ROP_STRIDE, | ||
91 | GPU_LIT_ROP_SHARED_BASE, | ||
92 | GPU_LIT_HOST_NUM_PBDMA, | ||
93 | GPU_LIT_LTC_STRIDE, | ||
94 | GPU_LIT_LTS_STRIDE, | ||
95 | GPU_LIT_NUM_FBPAS, | ||
96 | GPU_LIT_FBPA_STRIDE, | ||
97 | }; | ||
98 | |||
99 | #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v) | ||
100 | |||
75 | struct gpu_ops { | 101 | struct gpu_ops { |
76 | struct { | 102 | struct { |
77 | int (*determine_L2_size_bytes)(struct gk20a *gk20a); | 103 | int (*determine_L2_size_bytes)(struct gk20a *gk20a); |
@@ -151,8 +177,8 @@ struct gpu_ops { | |||
151 | u32 mode); | 177 | u32 mode); |
152 | int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, | 178 | int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, |
153 | struct gr_zcull_info *zcull_params); | 179 | struct gr_zcull_info *zcull_params); |
154 | bool (*is_tpc_addr)(u32 addr); | 180 | bool (*is_tpc_addr)(struct gk20a *g, u32 addr); |
155 | u32 (*get_tpc_num)(u32 addr); | 181 | u32 (*get_tpc_num)(struct gk20a *g, u32 addr); |
156 | void (*detect_sm_arch)(struct gk20a *g); | 182 | void (*detect_sm_arch)(struct gk20a *g); |
157 | int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, | 183 | int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, |
158 | struct zbc_entry *color_val, u32 index); | 184 | struct zbc_entry *color_val, u32 index); |
@@ -526,6 +552,7 @@ struct gpu_ops { | |||
526 | size_t scatter_buffer_size); | 552 | size_t scatter_buffer_size); |
527 | } cde; | 553 | } cde; |
528 | 554 | ||
555 | int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value); | ||
529 | int (*chip_init_gpu_characteristics)(struct gk20a *g); | 556 | int (*chip_init_gpu_characteristics)(struct gk20a *g); |
530 | }; | 557 | }; |
531 | 558 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index aa42e1dd..51a61de3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include "hw_pri_ringstation_sys_gk20a.h" | 46 | #include "hw_pri_ringstation_sys_gk20a.h" |
47 | #include "hw_pri_ringstation_gpc_gk20a.h" | 47 | #include "hw_pri_ringstation_gpc_gk20a.h" |
48 | #include "hw_pri_ringstation_fbp_gk20a.h" | 48 | #include "hw_pri_ringstation_fbp_gk20a.h" |
49 | #include "hw_proj_gk20a.h" | ||
50 | #include "hw_top_gk20a.h" | 49 | #include "hw_top_gk20a.h" |
51 | #include "hw_ltc_gk20a.h" | 50 | #include "hw_ltc_gk20a.h" |
52 | #include "hw_fb_gk20a.h" | 51 | #include "hw_fb_gk20a.h" |
@@ -815,6 +814,8 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
815 | u32 gpc_index, ppc_index; | 814 | u32 gpc_index, ppc_index; |
816 | u32 temp; | 815 | u32 temp; |
817 | u32 cbm_cfg_size1, cbm_cfg_size2; | 816 | u32 cbm_cfg_size1, cbm_cfg_size2; |
817 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
818 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
818 | 819 | ||
819 | gk20a_dbg_fn(""); | 820 | gk20a_dbg_fn(""); |
820 | 821 | ||
@@ -835,7 +836,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
835 | gr->tpc_count * gr->attrib_cb_size; | 836 | gr->tpc_count * gr->attrib_cb_size; |
836 | 837 | ||
837 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 838 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
838 | temp = proj_gpc_stride_v() * gpc_index; | 839 | temp = gpc_stride * gpc_index; |
839 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 840 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
840 | ppc_index++) { | 841 | ppc_index++) { |
841 | cbm_cfg_size1 = gr->attrib_cb_default_size * | 842 | cbm_cfg_size1 = gr->attrib_cb_default_size * |
@@ -845,7 +846,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
845 | 846 | ||
846 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 847 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
847 | gr_gpc0_ppc0_cbm_cfg_r() + temp + | 848 | gr_gpc0_ppc0_cbm_cfg_r() + temp + |
848 | proj_ppc_in_gpc_stride_v() * ppc_index, | 849 | ppc_in_gpc_stride * ppc_index, |
849 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | | 850 | gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | |
850 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | | 851 | gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | |
851 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); | 852 | gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); |
@@ -855,7 +856,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g, | |||
855 | 856 | ||
856 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 857 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
857 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + | 858 | gr_gpc0_ppc0_cbm_cfg2_r() + temp + |
858 | proj_ppc_in_gpc_stride_v() * ppc_index, | 859 | ppc_in_gpc_stride * ppc_index, |
859 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | | 860 | gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | |
860 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); | 861 | gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); |
861 | 862 | ||
@@ -1209,7 +1210,7 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | |||
1209 | u32 gpcs_per_reg = 4; | 1210 | u32 gpcs_per_reg = 4; |
1210 | u32 pes_index; | 1211 | u32 pes_index; |
1211 | u32 tpc_count_pes; | 1212 | u32 tpc_count_pes; |
1212 | u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | 1213 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
1213 | 1214 | ||
1214 | u32 alpha_target, beta_target; | 1215 | u32 alpha_target, beta_target; |
1215 | u32 alpha_bits, beta_bits; | 1216 | u32 alpha_bits, beta_bits; |
@@ -1309,14 +1310,16 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | |||
1309 | u32 tpc_per_gpc; | 1310 | u32 tpc_per_gpc; |
1310 | u32 max_ways_evict = INVALID_MAX_WAYS; | 1311 | u32 max_ways_evict = INVALID_MAX_WAYS; |
1311 | u32 l1c_dbg_reg_val; | 1312 | u32 l1c_dbg_reg_val; |
1313 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1314 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1312 | 1315 | ||
1313 | gk20a_dbg_fn(""); | 1316 | gk20a_dbg_fn(""); |
1314 | 1317 | ||
1315 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | 1318 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { |
1316 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 1319 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
1317 | gpc_offset = proj_gpc_stride_v() * gpc_index; | 1320 | gpc_offset = gpc_stride * gpc_index; |
1318 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | 1321 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { |
1319 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | 1322 | tpc_offset = tpc_in_gpc_stride * tpc_index; |
1320 | 1323 | ||
1321 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | 1324 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, |
1322 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | 1325 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); |
@@ -3196,6 +3199,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3196 | u32 pes_heavy_index; | 3199 | u32 pes_heavy_index; |
3197 | u32 gpc_new_skip_mask; | 3200 | u32 gpc_new_skip_mask; |
3198 | u32 tmp; | 3201 | u32 tmp; |
3202 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
3199 | 3203 | ||
3200 | tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); | 3204 | tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); |
3201 | gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); | 3205 | gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); |
@@ -3219,8 +3223,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3219 | tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); | 3223 | tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); |
3220 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); | 3224 | gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); |
3221 | 3225 | ||
3222 | gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | 3226 | gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
3223 | gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v(); | 3227 | gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS); |
3224 | 3228 | ||
3225 | if (!gr->gpc_count) { | 3229 | if (!gr->gpc_count) { |
3226 | gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); | 3230 | gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); |
@@ -3270,7 +3274,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3270 | 3274 | ||
3271 | tmp = gk20a_readl(g, | 3275 | tmp = gk20a_readl(g, |
3272 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + | 3276 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + |
3273 | gpc_index * proj_gpc_stride_v()); | 3277 | gpc_index * gpc_stride); |
3274 | 3278 | ||
3275 | pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); | 3279 | pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); |
3276 | pes_tpc_count = count_bits(pes_tpc_mask); | 3280 | pes_tpc_count = count_bits(pes_tpc_mask); |
@@ -3414,16 +3418,17 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3414 | bool delete_map = false; | 3418 | bool delete_map = false; |
3415 | bool gpc_sorted; | 3419 | bool gpc_sorted; |
3416 | int ret = 0; | 3420 | int ret = 0; |
3421 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
3422 | int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
3417 | 3423 | ||
3418 | init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3424 | init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3419 | init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3425 | init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3420 | run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3426 | run_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3421 | sorted_num_tpcs = | 3427 | sorted_num_tpcs = |
3422 | kzalloc(proj_scal_max_gpcs_v() * | 3428 | kzalloc(num_gpcs * num_tpc_per_gpc * sizeof(s32), |
3423 | proj_scal_max_tpc_per_gpc_v() * sizeof(s32), | ||
3424 | GFP_KERNEL); | 3429 | GFP_KERNEL); |
3425 | sorted_to_unsorted_gpc_map = | 3430 | sorted_to_unsorted_gpc_map = |
3426 | kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); | 3431 | kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); |
3427 | 3432 | ||
3428 | if (!(init_frac && init_err && run_err && sorted_num_tpcs && | 3433 | if (!(init_frac && init_err && run_err && sorted_num_tpcs && |
3429 | sorted_to_unsorted_gpc_map)) { | 3434 | sorted_to_unsorted_gpc_map)) { |
@@ -3490,9 +3495,9 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr) | |||
3490 | } | 3495 | } |
3491 | 3496 | ||
3492 | if (gr->map_tiles == NULL) { | 3497 | if (gr->map_tiles == NULL) { |
3493 | gr->map_tile_count = proj_scal_max_gpcs_v(); | 3498 | gr->map_tile_count = num_gpcs; |
3494 | 3499 | ||
3495 | gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL); | 3500 | gr->map_tiles = kzalloc(num_gpcs * sizeof(u8), GFP_KERNEL); |
3496 | if (gr->map_tiles == NULL) { | 3501 | if (gr->map_tiles == NULL) { |
3497 | ret = -ENOMEM; | 3502 | ret = -ENOMEM; |
3498 | goto clean_up; | 3503 | goto clean_up; |
@@ -3628,11 +3633,11 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, | |||
3628 | zcull_params->region_byte_multiplier = | 3633 | zcull_params->region_byte_multiplier = |
3629 | gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); | 3634 | gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); |
3630 | zcull_params->region_header_size = | 3635 | zcull_params->region_header_size = |
3631 | proj_scal_litter_num_gpcs_v() * | 3636 | nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * |
3632 | gr_zcull_save_restore_header_bytes_per_gpc_v(); | 3637 | gr_zcull_save_restore_header_bytes_per_gpc_v(); |
3633 | 3638 | ||
3634 | zcull_params->subregion_header_size = | 3639 | zcull_params->subregion_header_size = |
3635 | proj_scal_litter_num_gpcs_v() * | 3640 | nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) * |
3636 | gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); | 3641 | gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); |
3637 | 3642 | ||
3638 | zcull_params->subregion_width_align_pixels = | 3643 | zcull_params->subregion_width_align_pixels = |
@@ -4082,19 +4087,22 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4082 | u32 rcp_conserv; | 4087 | u32 rcp_conserv; |
4083 | u32 offset; | 4088 | u32 offset; |
4084 | bool floorsweep = false; | 4089 | bool floorsweep = false; |
4090 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
4091 | int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); | ||
4092 | int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); | ||
4085 | 4093 | ||
4086 | if (!gr->map_tiles) | 4094 | if (!gr->map_tiles) |
4087 | return -1; | 4095 | return -1; |
4088 | 4096 | ||
4089 | zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() * | 4097 | zcull_map_tiles = kzalloc(num_gpcs * |
4090 | proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); | 4098 | num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); |
4091 | if (!zcull_map_tiles) { | 4099 | if (!zcull_map_tiles) { |
4092 | gk20a_err(dev_from_gk20a(g), | 4100 | gk20a_err(dev_from_gk20a(g), |
4093 | "failed to allocate zcull temp buffers"); | 4101 | "failed to allocate zcull temp buffers"); |
4094 | return -ENOMEM; | 4102 | return -ENOMEM; |
4095 | } | 4103 | } |
4096 | zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() * | 4104 | zcull_bank_counters = kzalloc(num_gpcs * |
4097 | proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); | 4105 | num_tpc_per_gpc * sizeof(u32), GFP_KERNEL); |
4098 | 4106 | ||
4099 | if (!zcull_bank_counters) { | 4107 | if (!zcull_bank_counters) { |
4100 | gk20a_err(dev_from_gk20a(g), | 4108 | gk20a_err(dev_from_gk20a(g), |
@@ -4173,7 +4181,7 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr) | |||
4173 | gr->gpc_tpc_count[0]); | 4181 | gr->gpc_tpc_count[0]); |
4174 | 4182 | ||
4175 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 4183 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
4176 | offset = gpc_index * proj_gpc_stride_v(); | 4184 | offset = gpc_index * gpc_stride; |
4177 | 4185 | ||
4178 | if (floorsweep) { | 4186 | if (floorsweep) { |
4179 | gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, | 4187 | gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, |
@@ -4836,6 +4844,8 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4836 | struct gr_gk20a *gr = &g->gr; | 4844 | struct gr_gk20a *gr = &g->gr; |
4837 | u32 gpc_index, ppc_index, stride, val, offset; | 4845 | u32 gpc_index, ppc_index, stride, val, offset; |
4838 | u32 cb_size = data * 4; | 4846 | u32 cb_size = data * 4; |
4847 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
4848 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
4839 | 4849 | ||
4840 | gk20a_dbg_fn(""); | 4850 | gk20a_dbg_fn(""); |
4841 | 4851 | ||
@@ -4848,14 +4858,14 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4848 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | 4858 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); |
4849 | 4859 | ||
4850 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 4860 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
4851 | stride = proj_gpc_stride_v() * gpc_index; | 4861 | stride = gpc_stride * gpc_index; |
4852 | 4862 | ||
4853 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 4863 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
4854 | ppc_index++) { | 4864 | ppc_index++) { |
4855 | 4865 | ||
4856 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + | 4866 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + |
4857 | stride + | 4867 | stride + |
4858 | proj_ppc_in_gpc_stride_v() * ppc_index); | 4868 | ppc_in_gpc_stride * ppc_index); |
4859 | 4869 | ||
4860 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); | 4870 | offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); |
4861 | 4871 | ||
@@ -4869,7 +4879,7 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4869 | 4879 | ||
4870 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | 4880 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + |
4871 | stride + | 4881 | stride + |
4872 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 4882 | ppc_in_gpc_stride * ppc_index, val); |
4873 | 4883 | ||
4874 | val = set_field(val, | 4884 | val = set_field(val, |
4875 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), | 4885 | gr_gpc0_ppc0_cbm_cfg_start_offset_m(), |
@@ -4877,7 +4887,7 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
4877 | 4887 | ||
4878 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + | 4888 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + |
4879 | stride + | 4889 | stride + |
4880 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 4890 | ppc_in_gpc_stride * ppc_index, val); |
4881 | } | 4891 | } |
4882 | } | 4892 | } |
4883 | } | 4893 | } |
@@ -4888,6 +4898,8 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
4888 | u32 gpc_index, ppc_index, stride, val; | 4898 | u32 gpc_index, ppc_index, stride, val; |
4889 | u32 pd_ab_max_output; | 4899 | u32 pd_ab_max_output; |
4890 | u32 alpha_cb_size = data * 4; | 4900 | u32 alpha_cb_size = data * 4; |
4901 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
4902 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
4891 | 4903 | ||
4892 | gk20a_dbg_fn(""); | 4904 | gk20a_dbg_fn(""); |
4893 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | 4905 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) |
@@ -4910,22 +4922,20 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
4910 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | 4922 | gr_pd_ab_dist_cfg1_max_batches_init_f()); |
4911 | 4923 | ||
4912 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 4924 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
4913 | stride = proj_gpc_stride_v() * gpc_index; | 4925 | stride = gpc_stride * gpc_index; |
4914 | 4926 | ||
4915 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 4927 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
4916 | ppc_index++) { | 4928 | ppc_index++) { |
4917 | 4929 | ||
4918 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + | 4930 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + |
4919 | stride + | 4931 | stride + ppc_in_gpc_stride * ppc_index); |
4920 | proj_ppc_in_gpc_stride_v() * ppc_index); | ||
4921 | 4932 | ||
4922 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), | 4933 | val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), |
4923 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * | 4934 | gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * |
4924 | gr->pes_tpc_count[ppc_index][gpc_index])); | 4935 | gr->pes_tpc_count[ppc_index][gpc_index])); |
4925 | 4936 | ||
4926 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + | 4937 | gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + |
4927 | stride + | 4938 | stride + ppc_in_gpc_stride * ppc_index, val); |
4928 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | ||
4929 | } | 4939 | } |
4930 | } | 4940 | } |
4931 | } | 4941 | } |
@@ -5421,8 +5431,9 @@ int gk20a_gr_lock_down_sm(struct gk20a *g, | |||
5421 | u32 gpc, u32 tpc, u32 global_esr_mask, | 5431 | u32 gpc, u32 tpc, u32 global_esr_mask, |
5422 | bool check_errors) | 5432 | bool check_errors) |
5423 | { | 5433 | { |
5424 | u32 offset = | 5434 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5425 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; | 5435 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5436 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5426 | u32 dbgr_control0; | 5437 | u32 dbgr_control0; |
5427 | 5438 | ||
5428 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5439 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
@@ -5456,8 +5467,9 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g) | |||
5456 | void gk20a_gr_clear_sm_hww(struct gk20a *g, | 5467 | void gk20a_gr_clear_sm_hww(struct gk20a *g, |
5457 | u32 gpc, u32 tpc, u32 global_esr) | 5468 | u32 gpc, u32 tpc, u32 global_esr) |
5458 | { | 5469 | { |
5459 | u32 offset = proj_gpc_stride_v() * gpc + | 5470 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5460 | proj_tpc_in_gpc_stride_v() * tpc; | 5471 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5472 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5461 | 5473 | ||
5462 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, | 5474 | gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, |
5463 | global_esr); | 5475 | global_esr); |
@@ -5477,8 +5489,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5477 | { | 5489 | { |
5478 | int ret = 0; | 5490 | int ret = 0; |
5479 | bool do_warp_sync = false, early_exit = false, ignore_debugger = false; | 5491 | bool do_warp_sync = false, early_exit = false, ignore_debugger = false; |
5480 | u32 offset = proj_gpc_stride_v() * gpc + | 5492 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5481 | proj_tpc_in_gpc_stride_v() * tpc; | 5493 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5494 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5482 | 5495 | ||
5483 | /* these three interrupts don't require locking down the SM. They can | 5496 | /* these three interrupts don't require locking down the SM. They can |
5484 | * be handled by usermode clients as they aren't fatal. Additionally, | 5497 | * be handled by usermode clients as they aren't fatal. Additionally, |
@@ -5590,8 +5603,9 @@ int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5590 | bool *post_event) | 5603 | bool *post_event) |
5591 | { | 5604 | { |
5592 | int ret = 0; | 5605 | int ret = 0; |
5593 | u32 offset = proj_gpc_stride_v() * gpc + | 5606 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5594 | proj_tpc_in_gpc_stride_v() * tpc; | 5607 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5608 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5595 | u32 esr; | 5609 | u32 esr; |
5596 | 5610 | ||
5597 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); | 5611 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); |
@@ -5611,8 +5625,9 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
5611 | bool *post_event, struct channel_gk20a *fault_ch) | 5625 | bool *post_event, struct channel_gk20a *fault_ch) |
5612 | { | 5626 | { |
5613 | int ret = 0; | 5627 | int ret = 0; |
5614 | u32 offset = proj_gpc_stride_v() * gpc + | 5628 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
5615 | proj_tpc_in_gpc_stride_v() * tpc; | 5629 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
5630 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
5616 | u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() | 5631 | u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() |
5617 | + offset); | 5632 | + offset); |
5618 | 5633 | ||
@@ -5646,6 +5661,8 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5646 | struct gr_gk20a *gr = &g->gr; | 5661 | struct gr_gk20a *gr = &g->gr; |
5647 | u32 exception1 = gk20a_readl(g, gr_exception1_r()); | 5662 | u32 exception1 = gk20a_readl(g, gr_exception1_r()); |
5648 | u32 gpc_exception, global_esr; | 5663 | u32 gpc_exception, global_esr; |
5664 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
5665 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
5649 | 5666 | ||
5650 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); | 5667 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); |
5651 | 5668 | ||
@@ -5656,7 +5673,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5656 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5673 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5657 | "GPC%d exception pending", gpc); | 5674 | "GPC%d exception pending", gpc); |
5658 | 5675 | ||
5659 | gpc_offset = proj_gpc_stride_v() * gpc; | 5676 | gpc_offset = gpc_stride * gpc; |
5660 | 5677 | ||
5661 | gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r() | 5678 | gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r() |
5662 | + gpc_offset); | 5679 | + gpc_offset); |
@@ -5670,7 +5687,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event, | |||
5670 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 5687 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
5671 | "GPC%d: TPC%d exception pending", gpc, tpc); | 5688 | "GPC%d: TPC%d exception pending", gpc, tpc); |
5672 | 5689 | ||
5673 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 5690 | tpc_offset = tpc_in_gpc_stride * tpc; |
5674 | 5691 | ||
5675 | global_esr = gk20a_readl(g, | 5692 | global_esr = gk20a_readl(g, |
5676 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | 5693 | gr_gpc0_tpc0_sm_hww_global_esr_r() + |
@@ -6045,31 +6062,31 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6045 | *ppc_num = 0; | 6062 | *ppc_num = 0; |
6046 | *be_num = 0; | 6063 | *be_num = 0; |
6047 | 6064 | ||
6048 | if (pri_is_gpc_addr(addr)) { | 6065 | if (pri_is_gpc_addr(g, addr)) { |
6049 | *addr_type = CTXSW_ADDR_TYPE_GPC; | 6066 | *addr_type = CTXSW_ADDR_TYPE_GPC; |
6050 | gpc_addr = pri_gpccs_addr_mask(addr); | 6067 | gpc_addr = pri_gpccs_addr_mask(addr); |
6051 | if (pri_is_gpc_addr_shared(addr)) { | 6068 | if (pri_is_gpc_addr_shared(g, addr)) { |
6052 | *addr_type = CTXSW_ADDR_TYPE_GPC; | 6069 | *addr_type = CTXSW_ADDR_TYPE_GPC; |
6053 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; | 6070 | *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; |
6054 | } else | 6071 | } else |
6055 | *gpc_num = pri_get_gpc_num(addr); | 6072 | *gpc_num = pri_get_gpc_num(g, addr); |
6056 | 6073 | ||
6057 | if (g->ops.gr.is_tpc_addr(gpc_addr)) { | 6074 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { |
6058 | *addr_type = CTXSW_ADDR_TYPE_TPC; | 6075 | *addr_type = CTXSW_ADDR_TYPE_TPC; |
6059 | if (pri_is_tpc_addr_shared(gpc_addr)) { | 6076 | if (pri_is_tpc_addr_shared(g, gpc_addr)) { |
6060 | *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; | 6077 | *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; |
6061 | return 0; | 6078 | return 0; |
6062 | } | 6079 | } |
6063 | *tpc_num = g->ops.gr.get_tpc_num(gpc_addr); | 6080 | *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
6064 | } | 6081 | } |
6065 | return 0; | 6082 | return 0; |
6066 | } else if (pri_is_be_addr(addr)) { | 6083 | } else if (pri_is_be_addr(g, addr)) { |
6067 | *addr_type = CTXSW_ADDR_TYPE_BE; | 6084 | *addr_type = CTXSW_ADDR_TYPE_BE; |
6068 | if (pri_is_be_addr_shared(addr)) { | 6085 | if (pri_is_be_addr_shared(g, addr)) { |
6069 | *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; | 6086 | *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; |
6070 | return 0; | 6087 | return 0; |
6071 | } | 6088 | } |
6072 | *be_num = pri_get_be_num(addr); | 6089 | *be_num = pri_get_be_num(g, addr); |
6073 | return 0; | 6090 | return 0; |
6074 | } else { | 6091 | } else { |
6075 | *addr_type = CTXSW_ADDR_TYPE_SYS; | 6092 | *addr_type = CTXSW_ADDR_TYPE_SYS; |
@@ -6090,7 +6107,7 @@ static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, | |||
6090 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 6107 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
6091 | 6108 | ||
6092 | for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) | 6109 | for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) |
6093 | priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr), | 6110 | priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr), |
6094 | gpc_num, ppc_num); | 6111 | gpc_num, ppc_num); |
6095 | 6112 | ||
6096 | return 0; | 6113 | return 0; |
@@ -6133,7 +6150,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6133 | * so that we can look up the offset. */ | 6150 | * so that we can look up the offset. */ |
6134 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && | 6151 | if ((addr_type == CTXSW_ADDR_TYPE_BE) && |
6135 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) | 6152 | !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) |
6136 | priv_addr_table[t++] = pri_be_shared_addr(addr); | 6153 | priv_addr_table[t++] = pri_be_shared_addr(g, addr); |
6137 | else | 6154 | else |
6138 | priv_addr_table[t++] = addr; | 6155 | priv_addr_table[t++] = addr; |
6139 | 6156 | ||
@@ -6152,7 +6169,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6152 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6169 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
6153 | tpc_num++) | 6170 | tpc_num++) |
6154 | priv_addr_table[t++] = | 6171 | priv_addr_table[t++] = |
6155 | pri_tpc_addr(pri_tpccs_addr_mask(addr), | 6172 | pri_tpc_addr(g, pri_tpccs_addr_mask(addr), |
6156 | gpc_num, tpc_num); | 6173 | gpc_num, tpc_num); |
6157 | 6174 | ||
6158 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { | 6175 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { |
@@ -6162,7 +6179,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6162 | return err; | 6179 | return err; |
6163 | } else | 6180 | } else |
6164 | priv_addr_table[t++] = | 6181 | priv_addr_table[t++] = |
6165 | pri_gpc_addr(pri_gpccs_addr_mask(addr), | 6182 | pri_gpc_addr(g, pri_gpccs_addr_mask(addr), |
6166 | gpc_num); | 6183 | gpc_num); |
6167 | } | 6184 | } |
6168 | } else { | 6185 | } else { |
@@ -6171,7 +6188,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6171 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6188 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
6172 | tpc_num++) | 6189 | tpc_num++) |
6173 | priv_addr_table[t++] = | 6190 | priv_addr_table[t++] = |
6174 | pri_tpc_addr(pri_tpccs_addr_mask(addr), | 6191 | pri_tpc_addr(g, pri_tpccs_addr_mask(addr), |
6175 | gpc_num, tpc_num); | 6192 | gpc_num, tpc_num); |
6176 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) | 6193 | else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) |
6177 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, | 6194 | err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, |
@@ -6403,6 +6420,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6403 | u32 vaddr_lo; | 6420 | u32 vaddr_lo; |
6404 | u32 vaddr_hi; | 6421 | u32 vaddr_hi; |
6405 | u32 tmp; | 6422 | u32 tmp; |
6423 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6424 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6406 | 6425 | ||
6407 | init_ovr_perf_reg_info(); | 6426 | init_ovr_perf_reg_info(); |
6408 | g->ops.gr.init_sm_dsm_reg_info(); | 6427 | g->ops.gr.init_sm_dsm_reg_info(); |
@@ -6413,8 +6432,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6413 | for (gpc = 0; gpc < num_gpc; gpc++) { | 6432 | for (gpc = 0; gpc < num_gpc; gpc++) { |
6414 | num_tpc = g->gr.gpc_tpc_count[gpc]; | 6433 | num_tpc = g->gr.gpc_tpc_count[gpc]; |
6415 | for (tpc = 0; tpc < num_tpc; tpc++) { | 6434 | for (tpc = 0; tpc < num_tpc; tpc++) { |
6416 | chk_addr = ((proj_gpc_stride_v() * gpc) + | 6435 | chk_addr = ((gpc_stride * gpc) + |
6417 | (proj_tpc_in_gpc_stride_v() * tpc) + | 6436 | (tpc_in_gpc_stride * tpc) + |
6418 | _ovr_perf_regs[reg]); | 6437 | _ovr_perf_regs[reg]); |
6419 | if (chk_addr != addr) | 6438 | if (chk_addr != addr) |
6420 | continue; | 6439 | continue; |
@@ -6461,18 +6480,19 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | |||
6461 | u32 tpc, gpc; | 6480 | u32 tpc, gpc; |
6462 | u32 gpc_tpc_addr; | 6481 | u32 gpc_tpc_addr; |
6463 | u32 gpc_tpc_stride; | 6482 | u32 gpc_tpc_stride; |
6483 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6484 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6464 | 6485 | ||
6465 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); | 6486 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); |
6466 | 6487 | ||
6467 | gpc = pri_get_gpc_num(offset); | 6488 | gpc = pri_get_gpc_num(g, offset); |
6468 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); | 6489 | gpc_tpc_addr = pri_gpccs_addr_mask(offset); |
6469 | tpc = g->ops.gr.get_tpc_num(gpc_tpc_addr); | 6490 | tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr); |
6470 | 6491 | ||
6471 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ | 6492 | quad_ctrl = quad & 0x1; /* first bit tells us quad */ |
6472 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ | 6493 | half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ |
6473 | 6494 | ||
6474 | gpc_tpc_stride = gpc * proj_gpc_stride_v() + | 6495 | gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride; |
6475 | tpc * proj_tpc_in_gpc_stride_v(); | ||
6476 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; | 6496 | gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; |
6477 | 6497 | ||
6478 | reg = gk20a_readl(g, gpc_tpc_addr); | 6498 | reg = gk20a_readl(g, gpc_tpc_addr); |
@@ -6552,7 +6572,6 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6552 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; | 6572 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; |
6553 | u32 num_ext_gpccs_ext_buffer_segments; | 6573 | u32 num_ext_gpccs_ext_buffer_segments; |
6554 | u32 inter_seg_offset; | 6574 | u32 inter_seg_offset; |
6555 | u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1); | ||
6556 | u32 max_tpc_count; | 6575 | u32 max_tpc_count; |
6557 | u32 *sm_dsm_perf_ctrl_regs = NULL; | 6576 | u32 *sm_dsm_perf_ctrl_regs = NULL; |
6558 | u32 num_sm_dsm_perf_ctrl_regs = 0; | 6577 | u32 num_sm_dsm_perf_ctrl_regs = 0; |
@@ -6563,15 +6582,20 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6563 | u32 control_register_stride = 0; | 6582 | u32 control_register_stride = 0; |
6564 | u32 perf_register_stride = 0; | 6583 | u32 perf_register_stride = 0; |
6565 | struct gr_gk20a *gr = &g->gr; | 6584 | struct gr_gk20a *gr = &g->gr; |
6585 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | ||
6586 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6587 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
6588 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6589 | u32 tpc_gpc_mask = (tpc_in_gpc_stride - 1); | ||
6566 | 6590 | ||
6567 | /* Only have TPC registers in extended region, so if not a TPC reg, | 6591 | /* Only have TPC registers in extended region, so if not a TPC reg, |
6568 | then return error so caller can look elsewhere. */ | 6592 | then return error so caller can look elsewhere. */ |
6569 | if (pri_is_gpc_addr(addr)) { | 6593 | if (pri_is_gpc_addr(g, addr)) { |
6570 | u32 gpc_addr = 0; | 6594 | u32 gpc_addr = 0; |
6571 | gpc_num = pri_get_gpc_num(addr); | 6595 | gpc_num = pri_get_gpc_num(g, addr); |
6572 | gpc_addr = pri_gpccs_addr_mask(addr); | 6596 | gpc_addr = pri_gpccs_addr_mask(addr); |
6573 | if (g->ops.gr.is_tpc_addr(gpc_addr)) | 6597 | if (g->ops.gr.is_tpc_addr(g, gpc_addr)) |
6574 | tpc_num = g->ops.gr.get_tpc_num(gpc_addr); | 6598 | tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); |
6575 | else | 6599 | else |
6576 | return -EINVAL; | 6600 | return -EINVAL; |
6577 | 6601 | ||
@@ -6639,11 +6663,10 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6639 | gk20a_dbg_info("register match: 0x%08x", | 6663 | gk20a_dbg_info("register match: 0x%08x", |
6640 | sm_dsm_perf_regs[i]); | 6664 | sm_dsm_perf_regs[i]); |
6641 | 6665 | ||
6642 | chk_addr = (proj_gpc_base_v() + | 6666 | chk_addr = (gpc_base + gpc_stride * gpc_num) + |
6643 | (proj_gpc_stride_v() * gpc_num) + | 6667 | tpc_in_gpc_base + |
6644 | proj_tpc_in_gpc_base_v() + | 6668 | (tpc_in_gpc_stride * tpc_num) + |
6645 | (proj_tpc_in_gpc_stride_v() * tpc_num) + | 6669 | (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask); |
6646 | (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask)); | ||
6647 | 6670 | ||
6648 | if (chk_addr != addr) { | 6671 | if (chk_addr != addr) { |
6649 | gk20a_err(dev_from_gk20a(g), | 6672 | gk20a_err(dev_from_gk20a(g), |
@@ -6670,12 +6693,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6670 | gk20a_dbg_info("register match: 0x%08x", | 6693 | gk20a_dbg_info("register match: 0x%08x", |
6671 | sm_dsm_perf_ctrl_regs[i]); | 6694 | sm_dsm_perf_ctrl_regs[i]); |
6672 | 6695 | ||
6673 | chk_addr = (proj_gpc_base_v() + | 6696 | chk_addr = (gpc_base + gpc_stride * gpc_num) + |
6674 | (proj_gpc_stride_v() * gpc_num) + | 6697 | tpc_in_gpc_base + |
6675 | proj_tpc_in_gpc_base_v() + | 6698 | tpc_in_gpc_stride * tpc_num + |
6676 | (proj_tpc_in_gpc_stride_v() * tpc_num) + | ||
6677 | (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & | 6699 | (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & |
6678 | tpc_gpc_mask)); | 6700 | tpc_gpc_mask); |
6679 | 6701 | ||
6680 | if (chk_addr != addr) { | 6702 | if (chk_addr != addr) { |
6681 | gk20a_err(dev_from_gk20a(g), | 6703 | gk20a_err(dev_from_gk20a(g), |
@@ -6772,6 +6794,12 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6772 | u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; | 6794 | u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; |
6773 | u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; | 6795 | u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; |
6774 | struct aiv_gk20a *reg; | 6796 | struct aiv_gk20a *reg; |
6797 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | ||
6798 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
6799 | u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE); | ||
6800 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
6801 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
6802 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
6775 | 6803 | ||
6776 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); | 6804 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); |
6777 | 6805 | ||
@@ -6800,10 +6828,10 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6800 | reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; | 6828 | reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; |
6801 | address = reg->addr; | 6829 | address = reg->addr; |
6802 | tpc_addr = pri_tpccs_addr_mask(address); | 6830 | tpc_addr = pri_tpccs_addr_mask(address); |
6803 | base_address = proj_gpc_base_v() + | 6831 | base_address = gpc_base + |
6804 | (gpc_num * proj_gpc_stride_v()) + | 6832 | (gpc_num * gpc_stride) + |
6805 | proj_tpc_in_gpc_base_v() + | 6833 | tpc_in_gpc_base + |
6806 | (tpc_num * proj_tpc_in_gpc_stride_v()); | 6834 | (tpc_num * tpc_in_gpc_stride); |
6807 | address = base_address + tpc_addr; | 6835 | address = base_address + tpc_addr; |
6808 | /* | 6836 | /* |
6809 | * The data for the TPCs is interleaved in the context buffer. | 6837 | * The data for the TPCs is interleaved in the context buffer. |
@@ -6828,10 +6856,10 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6828 | reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; | 6856 | reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; |
6829 | address = reg->addr; | 6857 | address = reg->addr; |
6830 | ppc_addr = pri_ppccs_addr_mask(address); | 6858 | ppc_addr = pri_ppccs_addr_mask(address); |
6831 | base_address = proj_gpc_base_v() + | 6859 | base_address = gpc_base + |
6832 | (gpc_num * proj_gpc_stride_v()) + | 6860 | (gpc_num * gpc_stride) + |
6833 | proj_ppc_in_gpc_base_v() + | 6861 | ppc_in_gpc_base + |
6834 | (ppc_num * proj_ppc_in_gpc_stride_v()); | 6862 | (ppc_num * ppc_in_gpc_stride); |
6835 | address = base_address + ppc_addr; | 6863 | address = base_address + ppc_addr; |
6836 | /* | 6864 | /* |
6837 | * The data for the PPCs is interleaved in the context buffer. | 6865 | * The data for the PPCs is interleaved in the context buffer. |
@@ -6859,8 +6887,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
6859 | gpc_addr = pri_gpccs_addr_mask(address); | 6887 | gpc_addr = pri_gpccs_addr_mask(address); |
6860 | gpc_offset = reg->index; | 6888 | gpc_offset = reg->index; |
6861 | 6889 | ||
6862 | base_address = proj_gpc_base_v() + | 6890 | base_address = gpc_base + (gpc_num * gpc_stride); |
6863 | (gpc_num * proj_gpc_stride_v()); | ||
6864 | address = base_address + gpc_addr; | 6891 | address = base_address + gpc_addr; |
6865 | 6892 | ||
6866 | if (pri_addr == address) { | 6893 | if (pri_addr == address) { |
@@ -6879,7 +6906,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
6879 | u32 *reg_ppc_count) | 6906 | u32 *reg_ppc_count) |
6880 | { | 6907 | { |
6881 | u32 data32; | 6908 | u32 data32; |
6882 | u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); | 6909 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); |
6883 | 6910 | ||
6884 | /* | 6911 | /* |
6885 | * if there is only 1 PES_PER_GPC, then we put the PES registers | 6912 | * if there is only 1 PES_PER_GPC, then we put the PES registers |
@@ -6887,7 +6914,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
6887 | */ | 6914 | */ |
6888 | if ((!g->gr.ctx_vars.valid) || | 6915 | if ((!g->gr.ctx_vars.valid) || |
6889 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && | 6916 | ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && |
6890 | (litter_num_pes_per_gpc > 1))) | 6917 | (num_pes_per_gpc > 1))) |
6891 | return -EINVAL; | 6918 | return -EINVAL; |
6892 | 6919 | ||
6893 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); | 6920 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); |
@@ -7028,9 +7055,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7028 | /* The ucode stores TPC/PPC data before GPC data. | 7055 | /* The ucode stores TPC/PPC data before GPC data. |
7029 | * Advance offset past TPC/PPC data to GPC data. */ | 7056 | * Advance offset past TPC/PPC data to GPC data. */ |
7030 | /* note 1 PES_PER_GPC case */ | 7057 | /* note 1 PES_PER_GPC case */ |
7031 | u32 litter_num_pes_per_gpc = | 7058 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, |
7032 | proj_scal_litter_num_pes_per_gpc_v(); | 7059 | GPU_LIT_NUM_PES_PER_GPC); |
7033 | if (litter_num_pes_per_gpc > 1) { | 7060 | if (num_pes_per_gpc > 1) { |
7034 | offset_to_segment += | 7061 | offset_to_segment += |
7035 | (((gr->ctx_vars.ctxsw_regs.tpc.count * | 7062 | (((gr->ctx_vars.ctxsw_regs.tpc.count * |
7036 | num_tpcs) << 2) + | 7063 | num_tpcs) << 2) + |
@@ -7136,33 +7163,37 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, | |||
7136 | { | 7163 | { |
7137 | u32 num_gpcs = g->gr.gpc_count; | 7164 | u32 num_gpcs = g->gr.gpc_count; |
7138 | u32 num_ppcs, num_tpcs, gpc_num, base; | 7165 | u32 num_ppcs, num_tpcs, gpc_num, base; |
7166 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | ||
7167 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7168 | u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE); | ||
7169 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
7170 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
7171 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7139 | 7172 | ||
7140 | for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { | 7173 | for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { |
7141 | num_tpcs = g->gr.gpc_tpc_count[gpc_num]; | 7174 | num_tpcs = g->gr.gpc_tpc_count[gpc_num]; |
7142 | base = proj_gpc_base_v() + | 7175 | base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base; |
7143 | (proj_gpc_stride_v() * gpc_num) + proj_tpc_in_gpc_base_v(); | ||
7144 | if (add_ctxsw_buffer_map_entries_subunits(map, | 7176 | if (add_ctxsw_buffer_map_entries_subunits(map, |
7145 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, | 7177 | &g->gr.ctx_vars.ctxsw_regs.pm_tpc, |
7146 | count, offset, max_cnt, base, num_tpcs, | 7178 | count, offset, max_cnt, base, num_tpcs, |
7147 | proj_tpc_in_gpc_stride_v(), | 7179 | tpc_in_gpc_stride, |
7148 | (proj_tpc_in_gpc_stride_v() - 1))) | 7180 | (tpc_in_gpc_stride - 1))) |
7149 | return -EINVAL; | 7181 | return -EINVAL; |
7150 | 7182 | ||
7151 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; | 7183 | num_ppcs = g->gr.gpc_ppc_count[gpc_num]; |
7152 | base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num) + | 7184 | base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; |
7153 | proj_ppc_in_gpc_base_v(); | ||
7154 | if (add_ctxsw_buffer_map_entries_subunits(map, | 7185 | if (add_ctxsw_buffer_map_entries_subunits(map, |
7155 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, | 7186 | &g->gr.ctx_vars.ctxsw_regs.pm_ppc, |
7156 | count, offset, max_cnt, base, num_ppcs, | 7187 | count, offset, max_cnt, base, num_ppcs, |
7157 | proj_ppc_in_gpc_stride_v(), | 7188 | ppc_in_gpc_stride, |
7158 | (proj_ppc_in_gpc_stride_v() - 1))) | 7189 | (ppc_in_gpc_stride - 1))) |
7159 | return -EINVAL; | 7190 | return -EINVAL; |
7160 | 7191 | ||
7161 | base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num); | 7192 | base = gpc_base + (gpc_stride * gpc_num); |
7162 | if (add_ctxsw_buffer_map_entries(map, | 7193 | if (add_ctxsw_buffer_map_entries(map, |
7163 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, | 7194 | &g->gr.ctx_vars.ctxsw_regs.pm_gpc, |
7164 | count, offset, max_cnt, base, | 7195 | count, offset, max_cnt, base, |
7165 | (proj_gpc_stride_v() - 1))) | 7196 | (gpc_stride - 1))) |
7166 | return -EINVAL; | 7197 | return -EINVAL; |
7167 | 7198 | ||
7168 | base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); | 7199 | base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); |
@@ -7242,6 +7273,9 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7242 | u32 i, count = 0; | 7273 | u32 i, count = 0; |
7243 | u32 offset = 0; | 7274 | u32 offset = 0; |
7244 | struct ctxsw_buf_offset_map_entry *map; | 7275 | struct ctxsw_buf_offset_map_entry *map; |
7276 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
7277 | u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); | ||
7278 | u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE); | ||
7245 | 7279 | ||
7246 | if (hwpm_ctxsw_buffer_size == 0) { | 7280 | if (hwpm_ctxsw_buffer_size == 0) { |
7247 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | 7281 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, |
@@ -7289,8 +7323,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7289 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, | 7323 | &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, |
7290 | &count, &offset, | 7324 | &count, &offset, |
7291 | hwpm_ctxsw_reg_count_max, 0, | 7325 | hwpm_ctxsw_reg_count_max, 0, |
7292 | proj_scal_litter_num_fbpas_v(), | 7326 | num_fbpas, fbpa_stride, ~0)) |
7293 | proj_fbpa_stride_v(), ~0)) | ||
7294 | goto cleanup; | 7327 | goto cleanup; |
7295 | 7328 | ||
7296 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ | 7329 | /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ |
@@ -7298,7 +7331,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) | |||
7298 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, | 7331 | &g->gr.ctx_vars.ctxsw_regs.pm_ltc, |
7299 | &count, &offset, | 7332 | &count, &offset, |
7300 | hwpm_ctxsw_reg_count_max, 0, | 7333 | hwpm_ctxsw_reg_count_max, 0, |
7301 | g->ltc_count, proj_ltc_stride_v(), ~0)) | 7334 | g->ltc_count, ltc_stride, ~0)) |
7302 | goto cleanup; | 7335 | goto cleanup; |
7303 | 7336 | ||
7304 | offset = ALIGN(offset, 256); | 7337 | offset = ALIGN(offset, 256); |
@@ -7737,25 +7770,28 @@ void gk20a_init_gr(struct gk20a *g) | |||
7737 | init_waitqueue_head(&g->gr.init_wq); | 7770 | init_waitqueue_head(&g->gr.init_wq); |
7738 | } | 7771 | } |
7739 | 7772 | ||
7740 | static bool gr_gk20a_is_tpc_addr(u32 addr) | 7773 | static bool gr_gk20a_is_tpc_addr(struct gk20a *g, u32 addr) |
7741 | { | 7774 | { |
7742 | return ((addr >= proj_tpc_in_gpc_base_v()) && | 7775 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); |
7743 | (addr < proj_tpc_in_gpc_base_v() + | 7776 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
7744 | (proj_scal_litter_num_tpc_per_gpc_v() * | 7777 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
7745 | proj_tpc_in_gpc_stride_v()))) | 7778 | return ((addr >= tpc_in_gpc_base) && |
7746 | || pri_is_tpc_addr_shared(addr); | 7779 | (addr < tpc_in_gpc_base + |
7780 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
7781 | || pri_is_tpc_addr_shared(g, addr); | ||
7747 | } | 7782 | } |
7748 | 7783 | ||
7749 | static u32 gr_gk20a_get_tpc_num(u32 addr) | 7784 | static u32 gr_gk20a_get_tpc_num(struct gk20a *g, u32 addr) |
7750 | { | 7785 | { |
7751 | u32 i, start; | 7786 | u32 i, start; |
7752 | u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); | 7787 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
7788 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
7789 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7753 | 7790 | ||
7754 | for (i = 0; i < num_tpcs; i++) { | 7791 | for (i = 0; i < num_tpcs; i++) { |
7755 | start = proj_tpc_in_gpc_base_v() + | 7792 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); |
7756 | (i * proj_tpc_in_gpc_stride_v()); | ||
7757 | if ((addr >= start) && | 7793 | if ((addr >= start) && |
7758 | (addr < (start + proj_tpc_in_gpc_stride_v()))) | 7794 | (addr < (start + tpc_in_gpc_stride))) |
7759 | return i; | 7795 | return i; |
7760 | } | 7796 | } |
7761 | return 0; | 7797 | return 0; |
@@ -7768,8 +7804,10 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc, | |||
7768 | bool no_error_pending; | 7804 | bool no_error_pending; |
7769 | u32 delay = GR_IDLE_CHECK_DEFAULT; | 7805 | u32 delay = GR_IDLE_CHECK_DEFAULT; |
7770 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); | 7806 | bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); |
7807 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7808 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7771 | u32 offset = | 7809 | u32 offset = |
7772 | proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; | 7810 | gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
7773 | 7811 | ||
7774 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, | 7812 | gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, |
7775 | "GPC%d TPC%d: locking down SM", gpc, tpc); | 7813 | "GPC%d TPC%d: locking down SM", gpc, tpc); |
@@ -7828,9 +7866,9 @@ void gk20a_suspend_single_sm(struct gk20a *g, | |||
7828 | u32 offset; | 7866 | u32 offset; |
7829 | int err; | 7867 | int err; |
7830 | u32 dbgr_control0; | 7868 | u32 dbgr_control0; |
7831 | 7869 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | |
7832 | offset = proj_gpc_stride_v() * gpc + | 7870 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
7833 | proj_tpc_in_gpc_stride_v() * tpc; | 7871 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
7834 | 7872 | ||
7835 | /* if an SM debugger isn't attached, skip suspend */ | 7873 | /* if an SM debugger isn't attached, skip suspend */ |
7836 | if (!gk20a_gr_sm_debugger_attached(g)) { | 7874 | if (!gk20a_gr_sm_debugger_attached(g)) { |
@@ -7899,6 +7937,8 @@ void gk20a_resume_single_sm(struct gk20a *g, | |||
7899 | { | 7937 | { |
7900 | u32 dbgr_control0; | 7938 | u32 dbgr_control0; |
7901 | u32 offset; | 7939 | u32 offset; |
7940 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
7941 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
7902 | /* | 7942 | /* |
7903 | * The following requires some clarification. Despite the fact that both | 7943 | * The following requires some clarification. Despite the fact that both |
7904 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their | 7944 | * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their |
@@ -7912,8 +7952,7 @@ void gk20a_resume_single_sm(struct gk20a *g, | |||
7912 | * effect, before enabling the run trigger. | 7952 | * effect, before enabling the run trigger. |
7913 | */ | 7953 | */ |
7914 | 7954 | ||
7915 | offset = proj_gpc_stride_v() * gpc + | 7955 | offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
7916 | proj_tpc_in_gpc_stride_v() * tpc; | ||
7917 | 7956 | ||
7918 | /*De-assert stop trigger */ | 7957 | /*De-assert stop trigger */ |
7919 | dbgr_control0 = | 7958 | dbgr_control0 = |
@@ -8144,6 +8183,8 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
8144 | { | 8183 | { |
8145 | struct nvgpu_dbg_gpu_reg_op *ops; | 8184 | struct nvgpu_dbg_gpu_reg_op *ops; |
8146 | int i = 0, sm_id, err; | 8185 | int i = 0, sm_id, err; |
8186 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
8187 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8147 | 8188 | ||
8148 | ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL); | 8189 | ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL); |
8149 | if (!ops) | 8190 | if (!ops) |
@@ -8158,8 +8199,8 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, | |||
8158 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 8199 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
8159 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 8200 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
8160 | 8201 | ||
8161 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 8202 | tpc_offset = tpc_in_gpc_stride * tpc; |
8162 | gpc_offset = proj_gpc_stride_v() * gpc; | 8203 | gpc_offset = gpc_stride * gpc; |
8163 | reg_offset = tpc_offset + gpc_offset; | 8204 | reg_offset = tpc_offset + gpc_offset; |
8164 | 8205 | ||
8165 | ops[i].op = REGOP(WRITE_32); | 8206 | ops[i].op = REGOP(WRITE_32); |
@@ -8199,13 +8240,15 @@ static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | |||
8199 | u32 gpc, tpc, sm_id; | 8240 | u32 gpc, tpc, sm_id; |
8200 | u32 tpc_offset, gpc_offset, reg_offset; | 8241 | u32 tpc_offset, gpc_offset, reg_offset; |
8201 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 8242 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; |
8243 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
8244 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
8202 | 8245 | ||
8203 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | 8246 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { |
8204 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 8247 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
8205 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 8248 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
8206 | 8249 | ||
8207 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 8250 | tpc_offset = tpc_in_gpc_stride * tpc; |
8208 | gpc_offset = proj_gpc_stride_v() * gpc; | 8251 | gpc_offset = gpc_stride * gpc; |
8209 | reg_offset = tpc_offset + gpc_offset; | 8252 | reg_offset = tpc_offset + gpc_offset; |
8210 | 8253 | ||
8211 | /* 64 bit read */ | 8254 | /* 64 bit read */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h index 0f70e8aa..248fa291 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | |||
@@ -34,30 +34,37 @@ static inline u32 pri_gpccs_addr_mask(u32 addr) | |||
34 | { | 34 | { |
35 | return addr & ((1 << pri_gpccs_addr_width()) - 1); | 35 | return addr & ((1 << pri_gpccs_addr_width()) - 1); |
36 | } | 36 | } |
37 | static inline u32 pri_gpc_addr(u32 addr, u32 gpc) | 37 | static inline u32 pri_gpc_addr(struct gk20a *g, u32 addr, u32 gpc) |
38 | { | 38 | { |
39 | return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + addr; | 39 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); |
40 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
41 | return gpc_base + (gpc * gpc_stride) + addr; | ||
40 | } | 42 | } |
41 | static inline bool pri_is_gpc_addr_shared(u32 addr) | 43 | static inline bool pri_is_gpc_addr_shared(struct gk20a *g, u32 addr) |
42 | { | 44 | { |
43 | return (addr >= proj_gpc_shared_base_v()) && | 45 | u32 gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_SHARED_BASE); |
44 | (addr < proj_gpc_shared_base_v() + proj_gpc_stride_v()); | 46 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
47 | return (addr >= gpc_shared_base) && | ||
48 | (addr < gpc_shared_base + gpc_stride); | ||
45 | } | 49 | } |
46 | static inline bool pri_is_gpc_addr(u32 addr) | 50 | static inline bool pri_is_gpc_addr(struct gk20a *g, u32 addr) |
47 | { | 51 | { |
48 | return ((addr >= proj_gpc_base_v()) && | 52 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); |
49 | (addr < proj_gpc_base_v() + | 53 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
50 | proj_scal_litter_num_gpcs_v() * proj_gpc_stride_v())) || | 54 | u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); |
51 | pri_is_gpc_addr_shared(addr); | 55 | return ((addr >= gpc_base) && |
56 | (addr < gpc_base) + num_gpcs * gpc_stride) || | ||
57 | pri_is_gpc_addr_shared(g, addr); | ||
52 | } | 58 | } |
53 | static inline u32 pri_get_gpc_num(u32 addr) | 59 | static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr) |
54 | { | 60 | { |
55 | u32 i, start; | 61 | u32 i, start; |
56 | u32 num_gpcs = proj_scal_litter_num_gpcs_v(); | 62 | u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); |
57 | 63 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); | |
64 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
58 | for (i = 0; i < num_gpcs; i++) { | 65 | for (i = 0; i < num_gpcs; i++) { |
59 | start = proj_gpc_base_v() + (i * proj_gpc_stride_v()); | 66 | start = gpc_base + (i * gpc_stride); |
60 | if ((addr >= start) && (addr < (start + proj_gpc_stride_v()))) | 67 | if ((addr >= start) && (addr < (start + gpc_stride))) |
61 | return i; | 68 | return i; |
62 | } | 69 | } |
63 | return 0; | 70 | return 0; |
@@ -73,17 +80,23 @@ static inline u32 pri_tpccs_addr_mask(u32 addr) | |||
73 | { | 80 | { |
74 | return addr & ((1 << pri_tpccs_addr_width()) - 1); | 81 | return addr & ((1 << pri_tpccs_addr_width()) - 1); |
75 | } | 82 | } |
76 | static inline u32 pri_tpc_addr(u32 addr, u32 gpc, u32 tpc) | 83 | static inline u32 pri_tpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc) |
77 | { | 84 | { |
78 | return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + | 85 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); |
79 | proj_tpc_in_gpc_base_v() + (tpc * proj_tpc_in_gpc_stride_v()) + | 86 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
87 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
88 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
89 | return gpc_base + (gpc * gpc_stride) + | ||
90 | tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) + | ||
80 | addr; | 91 | addr; |
81 | } | 92 | } |
82 | static inline bool pri_is_tpc_addr_shared(u32 addr) | 93 | static inline bool pri_is_tpc_addr_shared(struct gk20a *g, u32 addr) |
83 | { | 94 | { |
84 | return (addr >= proj_tpc_in_gpc_shared_base_v()) && | 95 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
85 | (addr < (proj_tpc_in_gpc_shared_base_v() + | 96 | u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); |
86 | proj_tpc_in_gpc_stride_v())); | 97 | return (addr >= tpc_in_gpc_shared_base) && |
98 | (addr < (tpc_in_gpc_shared_base + | ||
99 | tpc_in_gpc_stride)); | ||
87 | } | 100 | } |
88 | 101 | ||
89 | /* | 102 | /* |
@@ -97,29 +110,37 @@ static inline u32 pri_becs_addr_mask(u32 addr) | |||
97 | { | 110 | { |
98 | return addr & ((1 << pri_becs_addr_width()) - 1); | 111 | return addr & ((1 << pri_becs_addr_width()) - 1); |
99 | } | 112 | } |
100 | static inline bool pri_is_be_addr_shared(u32 addr) | 113 | static inline bool pri_is_be_addr_shared(struct gk20a *g, u32 addr) |
101 | { | 114 | { |
102 | return (addr >= proj_rop_shared_base_v()) && | 115 | u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE); |
103 | (addr < proj_rop_shared_base_v() + proj_rop_stride_v()); | 116 | u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE); |
117 | return (addr >= rop_shared_base) && | ||
118 | (addr < rop_shared_base + rop_stride); | ||
104 | } | 119 | } |
105 | static inline u32 pri_be_shared_addr(u32 addr) | 120 | static inline u32 pri_be_shared_addr(struct gk20a *g, u32 addr) |
106 | { | 121 | { |
107 | return proj_rop_shared_base_v() + pri_becs_addr_mask(addr); | 122 | u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE); |
123 | return rop_shared_base + pri_becs_addr_mask(addr); | ||
108 | } | 124 | } |
109 | static inline bool pri_is_be_addr(u32 addr) | 125 | static inline bool pri_is_be_addr(struct gk20a *g, u32 addr) |
110 | { | 126 | { |
111 | return ((addr >= proj_rop_base_v()) && | 127 | u32 num_fbps = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS); |
112 | (addr < proj_rop_base_v()+proj_scal_litter_num_fbps_v() * proj_rop_stride_v())) || | 128 | u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE); |
113 | pri_is_be_addr_shared(addr); | 129 | u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE); |
130 | return ((addr >= rop_base) && | ||
131 | (addr < rop_base + num_fbps * rop_stride)) || | ||
132 | pri_is_be_addr_shared(g, addr); | ||
114 | } | 133 | } |
115 | 134 | ||
116 | static inline u32 pri_get_be_num(u32 addr) | 135 | static inline u32 pri_get_be_num(struct gk20a *g, u32 addr) |
117 | { | 136 | { |
118 | u32 i, start; | 137 | u32 i, start; |
119 | u32 num_fbps = proj_scal_litter_num_fbps_v(); | 138 | u32 num_fbps = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS); |
139 | u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE); | ||
140 | u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE); | ||
120 | for (i = 0; i < num_fbps; i++) { | 141 | for (i = 0; i < num_fbps; i++) { |
121 | start = proj_rop_base_v() + (i * proj_rop_stride_v()); | 142 | start = rop_base + (i * rop_stride); |
122 | if ((addr >= start) && (addr < (start + proj_rop_stride_v()))) | 143 | if ((addr >= start) && (addr < (start + rop_stride))) |
123 | return i; | 144 | return i; |
124 | } | 145 | } |
125 | return 0; | 146 | return 0; |
@@ -136,10 +157,14 @@ static inline u32 pri_ppccs_addr_mask(u32 addr) | |||
136 | { | 157 | { |
137 | return addr & ((1 << pri_ppccs_addr_width()) - 1); | 158 | return addr & ((1 << pri_ppccs_addr_width()) - 1); |
138 | } | 159 | } |
139 | static inline u32 pri_ppc_addr(u32 addr, u32 gpc, u32 ppc) | 160 | static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc) |
140 | { | 161 | { |
141 | return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + | 162 | u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); |
142 | proj_ppc_in_gpc_base_v() + (ppc * proj_ppc_in_gpc_stride_v()) + addr; | 163 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
164 | u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE); | ||
165 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
166 | return gpc_base + (gpc * gpc_stride) + | ||
167 | ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr; | ||
143 | } | 168 | } |
144 | 169 | ||
145 | enum ctxsw_addr_type { | 170 | enum ctxsw_addr_type { |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 6df8f37c..fb3b3e55 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "clk_gk20a.h" | 29 | #include "clk_gk20a.h" |
30 | #include "regops_gk20a.h" | 30 | #include "regops_gk20a.h" |
31 | #include "therm_gk20a.h" | 31 | #include "therm_gk20a.h" |
32 | #include "hw_proj_gk20a.h" | ||
32 | 33 | ||
33 | static struct gpu_ops gk20a_ops = { | 34 | static struct gpu_ops gk20a_ops = { |
34 | .clock_gating = { | 35 | .clock_gating = { |
@@ -47,6 +48,78 @@ static struct gpu_ops gk20a_ops = { | |||
47 | }, | 48 | }, |
48 | }; | 49 | }; |
49 | 50 | ||
51 | static int gk20a_get_litter_value(struct gk20a *g, | ||
52 | enum nvgpu_litter_value value) | ||
53 | { | ||
54 | int ret = EINVAL; | ||
55 | switch (value) { | ||
56 | case GPU_LIT_NUM_GPCS: | ||
57 | ret = proj_scal_litter_num_gpcs_v(); | ||
58 | break; | ||
59 | case GPU_LIT_NUM_PES_PER_GPC: | ||
60 | ret = proj_scal_litter_num_pes_per_gpc_v(); | ||
61 | break; | ||
62 | case GPU_LIT_NUM_ZCULL_BANKS: | ||
63 | ret = proj_scal_litter_num_zcull_banks_v(); | ||
64 | break; | ||
65 | case GPU_LIT_NUM_TPC_PER_GPC: | ||
66 | ret = proj_scal_litter_num_tpc_per_gpc_v(); | ||
67 | break; | ||
68 | case GPU_LIT_NUM_FBPS: | ||
69 | ret = proj_scal_litter_num_fbps_v(); | ||
70 | break; | ||
71 | case GPU_LIT_GPC_BASE: | ||
72 | ret = proj_gpc_base_v(); | ||
73 | break; | ||
74 | case GPU_LIT_GPC_STRIDE: | ||
75 | ret = proj_gpc_stride_v(); | ||
76 | break; | ||
77 | case GPU_LIT_GPC_SHARED_BASE: | ||
78 | ret = proj_gpc_shared_base_v(); | ||
79 | break; | ||
80 | case GPU_LIT_TPC_IN_GPC_BASE: | ||
81 | ret = proj_tpc_in_gpc_base_v(); | ||
82 | break; | ||
83 | case GPU_LIT_TPC_IN_GPC_STRIDE: | ||
84 | ret = proj_tpc_in_gpc_stride_v(); | ||
85 | break; | ||
86 | case GPU_LIT_TPC_IN_GPC_SHARED_BASE: | ||
87 | ret = proj_tpc_in_gpc_shared_base_v(); | ||
88 | break; | ||
89 | case GPU_LIT_PPC_IN_GPC_STRIDE: | ||
90 | ret = proj_ppc_in_gpc_stride_v(); | ||
91 | break; | ||
92 | case GPU_LIT_ROP_BASE: | ||
93 | ret = proj_rop_base_v(); | ||
94 | break; | ||
95 | case GPU_LIT_ROP_STRIDE: | ||
96 | ret = proj_rop_stride_v(); | ||
97 | break; | ||
98 | case GPU_LIT_ROP_SHARED_BASE: | ||
99 | ret = proj_rop_shared_base_v(); | ||
100 | break; | ||
101 | case GPU_LIT_HOST_NUM_PBDMA: | ||
102 | ret = proj_host_num_pbdma_v(); | ||
103 | break; | ||
104 | case GPU_LIT_LTC_STRIDE: | ||
105 | ret = proj_ltc_stride_v(); | ||
106 | break; | ||
107 | case GPU_LIT_LTS_STRIDE: | ||
108 | ret = proj_lts_stride_v(); | ||
109 | break; | ||
110 | case GPU_LIT_NUM_FBPAS: | ||
111 | ret = proj_scal_litter_num_fbpas_v(); | ||
112 | break; | ||
113 | case GPU_LIT_FBPA_STRIDE: | ||
114 | ret = proj_fbpa_stride_v(); | ||
115 | break; | ||
116 | default: | ||
117 | break; | ||
118 | } | ||
119 | |||
120 | return ret; | ||
121 | } | ||
122 | |||
50 | int gk20a_init_hal(struct gk20a *g) | 123 | int gk20a_init_hal(struct gk20a *g) |
51 | { | 124 | { |
52 | struct gpu_ops *gops = &g->ops; | 125 | struct gpu_ops *gops = &g->ops; |
@@ -71,6 +144,7 @@ int gk20a_init_hal(struct gk20a *g) | |||
71 | gk20a_init_therm_ops(gops); | 144 | gk20a_init_therm_ops(gops); |
72 | gops->name = "gk20a"; | 145 | gops->name = "gk20a"; |
73 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 146 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
147 | gops->get_litter_value = gk20a_get_litter_value; | ||
74 | 148 | ||
75 | c->twod_class = FERMI_TWOD_A; | 149 | c->twod_class = FERMI_TWOD_A; |
76 | c->threed_class = KEPLER_C; | 150 | c->threed_class = KEPLER_C; |
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 963f6bb7..0ce4f91a 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <trace/events/gk20a.h> | 20 | #include <trace/events/gk20a.h> |
21 | 21 | ||
22 | #include "hw_ltc_gk20a.h" | 22 | #include "hw_ltc_gk20a.h" |
23 | #include "hw_proj_gk20a.h" | ||
24 | 23 | ||
25 | #include "ltc_common.c" | 24 | #include "ltc_common.c" |
26 | 25 | ||
@@ -108,6 +107,8 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
108 | u32 slices_per_fbp = | 107 | u32 slices_per_fbp = |
109 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( | 108 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( |
110 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 109 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
110 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
111 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
111 | 112 | ||
112 | gk20a_dbg_fn(""); | 113 | gk20a_dbg_fn(""); |
113 | 114 | ||
@@ -140,8 +141,8 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
140 | 141 | ||
141 | 142 | ||
142 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 143 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + |
143 | fbp * proj_ltc_stride_v() + | 144 | fbp * ltc_stride + |
144 | slice * proj_lts_stride_v(); | 145 | slice * lts_stride; |
145 | 146 | ||
146 | retry = 200; | 147 | retry = 200; |
147 | do { | 148 | do { |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 050c2bee..b49f2301 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include "hw_fifo_gm20b.h" | 27 | #include "hw_fifo_gm20b.h" |
28 | #include "hw_fb_gm20b.h" | 28 | #include "hw_fb_gm20b.h" |
29 | #include "hw_top_gm20b.h" | 29 | #include "hw_top_gm20b.h" |
30 | #include "hw_proj_gm20b.h" | ||
31 | #include "hw_ctxsw_prog_gm20b.h" | 30 | #include "hw_ctxsw_prog_gm20b.h" |
32 | #include "hw_fuse_gm20b.h" | 31 | #include "hw_fuse_gm20b.h" |
33 | #include "pmu_gm20b.h" | 32 | #include "pmu_gm20b.h" |
@@ -178,6 +177,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
178 | u32 gpc_index, ppc_index; | 177 | u32 gpc_index, ppc_index; |
179 | u32 temp; | 178 | u32 temp; |
180 | u32 cbm_cfg_size1, cbm_cfg_size2; | 179 | u32 cbm_cfg_size1, cbm_cfg_size2; |
180 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
181 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
181 | 182 | ||
182 | gk20a_dbg_fn(""); | 183 | gk20a_dbg_fn(""); |
183 | 184 | ||
@@ -198,7 +199,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
198 | gr->tpc_count * gr->attrib_cb_size; | 199 | gr->tpc_count * gr->attrib_cb_size; |
199 | 200 | ||
200 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 201 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
201 | temp = proj_gpc_stride_v() * gpc_index; | 202 | temp = gpc_stride * gpc_index; |
202 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 203 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
203 | ppc_index++) { | 204 | ppc_index++) { |
204 | cbm_cfg_size1 = gr->attrib_cb_default_size * | 205 | cbm_cfg_size1 = gr->attrib_cb_default_size * |
@@ -208,12 +209,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
208 | 209 | ||
209 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 210 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
210 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | 211 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + |
211 | proj_ppc_in_gpc_stride_v() * ppc_index, | 212 | ppc_in_gpc_stride * ppc_index, |
212 | cbm_cfg_size1, patch); | 213 | cbm_cfg_size1, patch); |
213 | 214 | ||
214 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 215 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
215 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | 216 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + |
216 | proj_ppc_in_gpc_stride_v() * ppc_index, | 217 | ppc_in_gpc_stride * ppc_index, |
217 | attrib_offset_in_chunk, patch); | 218 | attrib_offset_in_chunk, patch); |
218 | 219 | ||
219 | attrib_offset_in_chunk += gr->attrib_cb_size * | 220 | attrib_offset_in_chunk += gr->attrib_cb_size * |
@@ -221,12 +222,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
221 | 222 | ||
222 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 223 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
223 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | 224 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + |
224 | proj_ppc_in_gpc_stride_v() * ppc_index, | 225 | ppc_in_gpc_stride * ppc_index, |
225 | cbm_cfg_size2, patch); | 226 | cbm_cfg_size2, patch); |
226 | 227 | ||
227 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 228 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
228 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | 229 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + |
229 | proj_ppc_in_gpc_stride_v() * ppc_index, | 230 | ppc_in_gpc_stride * ppc_index, |
230 | alpha_offset_in_chunk, patch); | 231 | alpha_offset_in_chunk, patch); |
231 | 232 | ||
232 | alpha_offset_in_chunk += gr->alpha_cb_size * | 233 | alpha_offset_in_chunk += gr->alpha_cb_size * |
@@ -297,6 +298,8 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
297 | u32 gpc_index, ppc_index, stride, val; | 298 | u32 gpc_index, ppc_index, stride, val; |
298 | u32 pd_ab_max_output; | 299 | u32 pd_ab_max_output; |
299 | u32 alpha_cb_size = data * 4; | 300 | u32 alpha_cb_size = data * 4; |
301 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
302 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
300 | 303 | ||
301 | gk20a_dbg_fn(""); | 304 | gk20a_dbg_fn(""); |
302 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | 305 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) |
@@ -319,14 +322,14 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
319 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | 322 | gr_pd_ab_dist_cfg1_max_batches_init_f()); |
320 | 323 | ||
321 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 324 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
322 | stride = proj_gpc_stride_v() * gpc_index; | 325 | stride = gpc_stride * gpc_index; |
323 | 326 | ||
324 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 327 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
325 | ppc_index++) { | 328 | ppc_index++) { |
326 | 329 | ||
327 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | 330 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + |
328 | stride + | 331 | stride + |
329 | proj_ppc_in_gpc_stride_v() * ppc_index); | 332 | ppc_in_gpc_stride * ppc_index); |
330 | 333 | ||
331 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | 334 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), |
332 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | 335 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * |
@@ -334,7 +337,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
334 | 337 | ||
335 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | 338 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + |
336 | stride + | 339 | stride + |
337 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 340 | ppc_in_gpc_stride * ppc_index, val); |
338 | } | 341 | } |
339 | } | 342 | } |
340 | } | 343 | } |
@@ -344,6 +347,8 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
344 | struct gr_gk20a *gr = &g->gr; | 347 | struct gr_gk20a *gr = &g->gr; |
345 | u32 gpc_index, ppc_index, stride, val; | 348 | u32 gpc_index, ppc_index, stride, val; |
346 | u32 cb_size = data * 4; | 349 | u32 cb_size = data * 4; |
350 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
351 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
347 | 352 | ||
348 | gk20a_dbg_fn(""); | 353 | gk20a_dbg_fn(""); |
349 | 354 | ||
@@ -356,14 +361,14 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
356 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | 361 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); |
357 | 362 | ||
358 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 363 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
359 | stride = proj_gpc_stride_v() * gpc_index; | 364 | stride = gpc_stride * gpc_index; |
360 | 365 | ||
361 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 366 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
362 | ppc_index++) { | 367 | ppc_index++) { |
363 | 368 | ||
364 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | 369 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + |
365 | stride + | 370 | stride + |
366 | proj_ppc_in_gpc_stride_v() * ppc_index); | 371 | ppc_in_gpc_stride * ppc_index); |
367 | 372 | ||
368 | val = set_field(val, | 373 | val = set_field(val, |
369 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | 374 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), |
@@ -372,7 +377,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
372 | 377 | ||
373 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | 378 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + |
374 | stride + | 379 | stride + |
375 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 380 | ppc_in_gpc_stride * ppc_index, val); |
376 | 381 | ||
377 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( | 382 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( |
378 | ppc_index + gpc_index)); | 383 | ppc_index + gpc_index)); |
@@ -527,14 +532,16 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
527 | u32 tpc_per_gpc = 0; | 532 | u32 tpc_per_gpc = 0; |
528 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; | 533 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; |
529 | u32 pes_tpc_mask = 0, pes_index; | 534 | u32 pes_tpc_mask = 0, pes_index; |
535 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
536 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
530 | 537 | ||
531 | gk20a_dbg_fn(""); | 538 | gk20a_dbg_fn(""); |
532 | 539 | ||
533 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 540 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
534 | gpc_offset = proj_gpc_stride_v() * gpc_index; | 541 | gpc_offset = gpc_stride * gpc_index; |
535 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; | 542 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; |
536 | tpc_index++) { | 543 | tpc_index++) { |
537 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | 544 | tpc_offset = tpc_in_gpc_stride * tpc_index; |
538 | 545 | ||
539 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() | 546 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() |
540 | + gpc_offset + tpc_offset, | 547 | + gpc_offset + tpc_offset, |
@@ -640,32 +647,37 @@ static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | |||
640 | return 0; | 647 | return 0; |
641 | } | 648 | } |
642 | 649 | ||
643 | static bool gr_gm20b_is_tpc_addr_shared(u32 addr) | 650 | static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) |
644 | { | 651 | { |
645 | return (addr >= proj_tpc_in_gpc_shared_base_v()) && | 652 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
646 | (addr < (proj_tpc_in_gpc_shared_base_v() + | 653 | u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); |
647 | proj_tpc_in_gpc_stride_v())); | 654 | return (addr >= tpc_in_gpc_shared_base) && |
655 | (addr < (tpc_in_gpc_shared_base + | ||
656 | tpc_in_gpc_stride)); | ||
648 | } | 657 | } |
649 | 658 | ||
650 | static bool gr_gm20b_is_tpc_addr(u32 addr) | 659 | static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) |
651 | { | 660 | { |
652 | return ((addr >= proj_tpc_in_gpc_base_v()) && | 661 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); |
653 | (addr < proj_tpc_in_gpc_base_v() + | 662 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
654 | (proj_scal_litter_num_tpc_per_gpc_v() * | 663 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
655 | proj_tpc_in_gpc_stride_v()))) | 664 | return ((addr >= tpc_in_gpc_base) && |
656 | || gr_gm20b_is_tpc_addr_shared(addr); | 665 | (addr < tpc_in_gpc_base + |
666 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
667 | || gr_gm20b_is_tpc_addr_shared(g, addr); | ||
657 | } | 668 | } |
658 | 669 | ||
659 | static u32 gr_gm20b_get_tpc_num(u32 addr) | 670 | static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) |
660 | { | 671 | { |
661 | u32 i, start; | 672 | u32 i, start; |
662 | u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); | 673 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
674 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
675 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
663 | 676 | ||
664 | for (i = 0; i < num_tpcs; i++) { | 677 | for (i = 0; i < num_tpcs; i++) { |
665 | start = proj_tpc_in_gpc_base_v() + | 678 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); |
666 | (i * proj_tpc_in_gpc_stride_v()); | ||
667 | if ((addr >= start) && | 679 | if ((addr >= start) && |
668 | (addr < (start + proj_tpc_in_gpc_stride_v()))) | 680 | (addr < (start + tpc_in_gpc_stride))) |
669 | return i; | 681 | return i; |
670 | } | 682 | } |
671 | return 0; | 683 | return 0; |
@@ -1066,6 +1078,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | |||
1066 | u32 gpc, tpc, sm_id; | 1078 | u32 gpc, tpc, sm_id; |
1067 | u32 tpc_offset, gpc_offset, reg_offset; | 1079 | u32 tpc_offset, gpc_offset, reg_offset; |
1068 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 1080 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; |
1081 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1082 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1069 | 1083 | ||
1070 | /* for maxwell & kepler */ | 1084 | /* for maxwell & kepler */ |
1071 | u32 numSmPerTpc = 1; | 1085 | u32 numSmPerTpc = 1; |
@@ -1075,8 +1089,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | |||
1075 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 1089 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
1076 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 1090 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
1077 | 1091 | ||
1078 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 1092 | tpc_offset = tpc_in_gpc_stride * tpc; |
1079 | gpc_offset = proj_gpc_stride_v() * gpc; | 1093 | gpc_offset = gpc_stride * gpc; |
1080 | reg_offset = tpc_offset + gpc_offset; | 1094 | reg_offset = tpc_offset + gpc_offset; |
1081 | 1095 | ||
1082 | /* 64 bit read */ | 1096 | /* 64 bit read */ |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 559fee61..df25be5e 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "debug_gm20b.h" | 34 | #include "debug_gm20b.h" |
35 | #include "cde_gm20b.h" | 35 | #include "cde_gm20b.h" |
36 | #include "therm_gm20b.h" | 36 | #include "therm_gm20b.h" |
37 | #include "hw_proj_gm20b.h" | ||
37 | 38 | ||
38 | #define FUSE_OPT_PRIV_SEC_DIS_0 0x264 | 39 | #define FUSE_OPT_PRIV_SEC_DIS_0 0x264 |
39 | #define PRIV_SECURITY_DISABLE 0x01 | 40 | #define PRIV_SECURITY_DISABLE 0x01 |
@@ -89,6 +90,78 @@ static struct gpu_ops gm20b_ops = { | |||
89 | }, | 90 | }, |
90 | }; | 91 | }; |
91 | 92 | ||
93 | static int gm20b_get_litter_value(struct gk20a *g, | ||
94 | enum nvgpu_litter_value value) | ||
95 | { | ||
96 | int ret = EINVAL; | ||
97 | switch (value) { | ||
98 | case GPU_LIT_NUM_GPCS: | ||
99 | ret = proj_scal_litter_num_gpcs_v(); | ||
100 | break; | ||
101 | case GPU_LIT_NUM_PES_PER_GPC: | ||
102 | ret = proj_scal_litter_num_pes_per_gpc_v(); | ||
103 | break; | ||
104 | case GPU_LIT_NUM_ZCULL_BANKS: | ||
105 | ret = proj_scal_litter_num_zcull_banks_v(); | ||
106 | break; | ||
107 | case GPU_LIT_NUM_TPC_PER_GPC: | ||
108 | ret = proj_scal_litter_num_tpc_per_gpc_v(); | ||
109 | break; | ||
110 | case GPU_LIT_NUM_FBPS: | ||
111 | ret = proj_scal_litter_num_fbps_v(); | ||
112 | break; | ||
113 | case GPU_LIT_GPC_BASE: | ||
114 | ret = proj_gpc_base_v(); | ||
115 | break; | ||
116 | case GPU_LIT_GPC_STRIDE: | ||
117 | ret = proj_gpc_stride_v(); | ||
118 | break; | ||
119 | case GPU_LIT_GPC_SHARED_BASE: | ||
120 | ret = proj_gpc_shared_base_v(); | ||
121 | break; | ||
122 | case GPU_LIT_TPC_IN_GPC_BASE: | ||
123 | ret = proj_tpc_in_gpc_base_v(); | ||
124 | break; | ||
125 | case GPU_LIT_TPC_IN_GPC_STRIDE: | ||
126 | ret = proj_tpc_in_gpc_stride_v(); | ||
127 | break; | ||
128 | case GPU_LIT_TPC_IN_GPC_SHARED_BASE: | ||
129 | ret = proj_tpc_in_gpc_shared_base_v(); | ||
130 | break; | ||
131 | case GPU_LIT_PPC_IN_GPC_STRIDE: | ||
132 | ret = proj_ppc_in_gpc_stride_v(); | ||
133 | break; | ||
134 | case GPU_LIT_ROP_BASE: | ||
135 | ret = proj_rop_base_v(); | ||
136 | break; | ||
137 | case GPU_LIT_ROP_STRIDE: | ||
138 | ret = proj_rop_stride_v(); | ||
139 | break; | ||
140 | case GPU_LIT_ROP_SHARED_BASE: | ||
141 | ret = proj_rop_shared_base_v(); | ||
142 | break; | ||
143 | case GPU_LIT_HOST_NUM_PBDMA: | ||
144 | ret = proj_host_num_pbdma_v(); | ||
145 | break; | ||
146 | case GPU_LIT_LTC_STRIDE: | ||
147 | ret = proj_ltc_stride_v(); | ||
148 | break; | ||
149 | case GPU_LIT_LTS_STRIDE: | ||
150 | ret = proj_lts_stride_v(); | ||
151 | break; | ||
152 | case GPU_LIT_NUM_FBPAS: | ||
153 | ret = proj_scal_litter_num_fbpas_v(); | ||
154 | break; | ||
155 | case GPU_LIT_FBPA_STRIDE: | ||
156 | ret = proj_fbpa_stride_v(); | ||
157 | break; | ||
158 | default: | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | return ret; | ||
163 | } | ||
164 | |||
92 | int gm20b_init_hal(struct gk20a *g) | 165 | int gm20b_init_hal(struct gk20a *g) |
93 | { | 166 | { |
94 | struct gpu_ops *gops = &g->ops; | 167 | struct gpu_ops *gops = &g->ops; |
@@ -140,6 +213,7 @@ int gm20b_init_hal(struct gk20a *g) | |||
140 | gm20b_init_therm_ops(gops); | 213 | gm20b_init_therm_ops(gops); |
141 | gops->name = "gm20b"; | 214 | gops->name = "gm20b"; |
142 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; | 215 | gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; |
216 | gops->get_litter_value = gm20b_get_litter_value; | ||
143 | 217 | ||
144 | c->twod_class = FERMI_TWOD_A; | 218 | c->twod_class = FERMI_TWOD_A; |
145 | c->threed_class = MAXWELL_B; | 219 | c->threed_class = MAXWELL_B; |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index e4e27764..4fc9d51b 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include "hw_mc_gm20b.h" | 20 | #include "hw_mc_gm20b.h" |
21 | #include "hw_ltc_gm20b.h" | 21 | #include "hw_ltc_gm20b.h" |
22 | #include "hw_top_gm20b.h" | 22 | #include "hw_top_gm20b.h" |
23 | #include "hw_proj_gm20b.h" | ||
24 | #include "hw_pri_ringmaster_gm20b.h" | 23 | #include "hw_pri_ringmaster_gm20b.h" |
25 | 24 | ||
26 | #include "gk20a/ltc_common.c" | 25 | #include "gk20a/ltc_common.c" |
@@ -109,6 +108,8 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
109 | s32 retry = 200; | 108 | s32 retry = 200; |
110 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | 109 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( |
111 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 110 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
111 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
112 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
112 | 113 | ||
113 | gk20a_dbg_fn(""); | 114 | gk20a_dbg_fn(""); |
114 | 115 | ||
@@ -139,8 +140,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
139 | for (slice = 0; slice < slices_per_ltc; slice++) { | 140 | for (slice = 0; slice < slices_per_ltc; slice++) { |
140 | 141 | ||
141 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 142 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + |
142 | ltc * proj_ltc_stride_v() + | 143 | ltc * ltc_stride + slice * lts_stride; |
143 | slice * proj_lts_stride_v(); | ||
144 | 144 | ||
145 | retry = 200; | 145 | retry = 200; |
146 | do { | 146 | do { |
@@ -198,6 +198,8 @@ void gm20b_ltc_isr(struct gk20a *g) | |||
198 | { | 198 | { |
199 | u32 mc_intr, ltc_intr; | 199 | u32 mc_intr, ltc_intr; |
200 | int ltc, slice; | 200 | int ltc, slice; |
201 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
202 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
201 | 203 | ||
202 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | 204 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); |
203 | gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", | 205 | gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", |
@@ -207,13 +209,13 @@ void gm20b_ltc_isr(struct gk20a *g) | |||
207 | continue; | 209 | continue; |
208 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | 210 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { |
209 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + | 211 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + |
210 | proj_ltc_stride_v() * ltc + | 212 | ltc_stride * ltc + |
211 | proj_lts_stride_v() * slice); | 213 | lts_stride * slice); |
212 | gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", | 214 | gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", |
213 | ltc, slice, ltc_intr); | 215 | ltc, slice, ltc_intr); |
214 | gk20a_writel(g, ltc_ltc0_lts0_intr_r() + | 216 | gk20a_writel(g, ltc_ltc0_lts0_intr_r() + |
215 | proj_ltc_stride_v() * ltc + | 217 | ltc_stride * ltc + |
216 | proj_lts_stride_v() * slice, | 218 | lts_stride * slice, |
217 | ltc_intr); | 219 | ltc_intr); |
218 | } | 220 | } |
219 | } | 221 | } |
@@ -287,6 +289,7 @@ void gm20b_flush_ltc(struct gk20a *g) | |||
287 | { | 289 | { |
288 | unsigned long timeout; | 290 | unsigned long timeout; |
289 | int ltc; | 291 | int ltc; |
292 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
290 | 293 | ||
291 | #define __timeout_init() \ | 294 | #define __timeout_init() \ |
292 | do { \ | 295 | do { \ |
@@ -317,7 +320,7 @@ void gm20b_flush_ltc(struct gk20a *g) | |||
317 | __timeout_init(); | 320 | __timeout_init(); |
318 | do { | 321 | do { |
319 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + | 322 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + |
320 | ltc * proj_ltc_stride_v(); | 323 | ltc * ltc_stride; |
321 | op_pending = gk20a_readl(g, cmgmt1); | 324 | op_pending = gk20a_readl(g, cmgmt1); |
322 | __timeout_check(); | 325 | __timeout_check(); |
323 | } while (op_pending & | 326 | } while (op_pending & |
@@ -338,7 +341,7 @@ void gm20b_flush_ltc(struct gk20a *g) | |||
338 | __timeout_init(); | 341 | __timeout_init(); |
339 | do { | 342 | do { |
340 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + | 343 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + |
341 | ltc * proj_ltc_stride_v(); | 344 | ltc * ltc_stride; |
342 | op_pending = gk20a_readl(g, cmgmt0); | 345 | op_pending = gk20a_readl(g, cmgmt0); |
343 | __timeout_check(); | 346 | __timeout_check(); |
344 | } while (op_pending & | 347 | } while (op_pending & |