summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-04-06 16:10:32 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-15 11:48:20 -0400
commit6839341bf8ffafa115cfc0427bba694ee1d131f3 (patch)
tree1f9369a3bacf0f1a2cc23371f5de988efdc07c31 /drivers
parent61e009c0f8874898335e6c47a610233c3382be47 (diff)
gpu: nvgpu: Add litter values HAL
Move per-chip constants to be returned by a chip specific function. Implement get_litter_value() for each chip. Change-Id: I2a2730fce14010924d2507f6fa15cc2ea0795113 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1121383
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h31
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c303
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h101
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c74
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c76
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c74
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c21
11 files changed, 486 insertions, 223 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index a3b02481..00635c4d 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -32,7 +32,6 @@
32#include "hw_pbdma_gk20a.h" 32#include "hw_pbdma_gk20a.h"
33#include "hw_ccsr_gk20a.h" 33#include "hw_ccsr_gk20a.h"
34#include "hw_ram_gk20a.h" 34#include "hw_ram_gk20a.h"
35#include "hw_proj_gk20a.h"
36#include "hw_top_gk20a.h" 35#include "hw_top_gk20a.h"
37#include "hw_mc_gk20a.h" 36#include "hw_mc_gk20a.h"
38#include "hw_gr_gk20a.h" 37#include "hw_gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 87f0683f..b1d35141 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -28,7 +28,6 @@
28#include "regops_gk20a.h" 28#include "regops_gk20a.h"
29#include "hw_gr_gk20a.h" 29#include "hw_gr_gk20a.h"
30#include "hw_fb_gk20a.h" 30#include "hw_fb_gk20a.h"
31#include "hw_proj_gk20a.h"
32#include "hw_timer_gk20a.h" 31#include "hw_timer_gk20a.h"
33 32
34int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) 33int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
@@ -451,15 +450,17 @@ static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
451 u32 gpc_offset, tpc_offset, gpc, tpc; 450 u32 gpc_offset, tpc_offset, gpc, tpc;
452 struct gr_gk20a *gr = &g->gr; 451 struct gr_gk20a *gr = &g->gr;
453 u32 global_esr; 452 u32 global_esr;
453 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
454 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
454 455
455 for (gpc = 0; gpc < gr->gpc_count; gpc++) { 456 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
456 457
457 gpc_offset = proj_gpc_stride_v() * gpc; 458 gpc_offset = gpc_stride * gpc;
458 459
459 /* check if any tpc has an exception */ 460 /* check if any tpc has an exception */
460 for (tpc = 0; tpc < gr->tpc_count; tpc++) { 461 for (tpc = 0; tpc < gr->tpc_count; tpc++) {
461 462
462 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; 463 tpc_offset = tpc_in_gpc_stride * tpc;
463 464
464 global_esr = gk20a_readl(g, 465 global_esr = gk20a_readl(g,
465 gr_gpc0_tpc0_sm_hww_global_esr_r() + 466 gr_gpc0_tpc0_sm_hww_global_esr_r() +
@@ -482,13 +483,15 @@ static int nvgpu_gpu_ioctl_has_any_exception(
482 struct gr_gk20a *gr = &g->gr; 483 struct gr_gk20a *gr = &g->gr;
483 u32 sm_id, tpc_exception_en = 0; 484 u32 sm_id, tpc_exception_en = 0;
484 u32 offset, regval, tpc_offset, gpc_offset; 485 u32 offset, regval, tpc_offset, gpc_offset;
486 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
487 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
485 488
486 mutex_lock(&g->dbg_sessions_lock); 489 mutex_lock(&g->dbg_sessions_lock);
487 490
488 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { 491 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
489 492
490 tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index; 493 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
491 gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index; 494 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
492 offset = tpc_offset + gpc_offset; 495 offset = tpc_offset + gpc_offset;
493 496
494 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + 497 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 44329a53..33ed9a04 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -31,7 +31,6 @@
31#include "hw_pbdma_gk20a.h" 31#include "hw_pbdma_gk20a.h"
32#include "hw_ccsr_gk20a.h" 32#include "hw_ccsr_gk20a.h"
33#include "hw_ram_gk20a.h" 33#include "hw_ram_gk20a.h"
34#include "hw_proj_gk20a.h"
35#include "hw_top_gk20a.h" 34#include "hw_top_gk20a.h"
36#include "hw_mc_gk20a.h" 35#include "hw_mc_gk20a.h"
37#include "hw_gr_gk20a.h" 36#include "hw_gr_gk20a.h"
@@ -349,6 +348,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
349 u32 timeout; 348 u32 timeout;
350 int i; 349 int i;
351 struct gk20a_platform *platform = dev_get_drvdata(g->dev); 350 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
351 u32 host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
352 352
353 gk20a_dbg_fn(""); 353 gk20a_dbg_fn("");
354 /* enable pmc pfifo */ 354 /* enable pmc pfifo */
@@ -367,7 +367,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
367 367
368 /* enable pbdma */ 368 /* enable pbdma */
369 mask = 0; 369 mask = 0;
370 for (i = 0; i < proj_host_num_pbdma_v(); ++i) 370 for (i = 0; i < host_num_pbdma; ++i)
371 mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i); 371 mask |= mc_enable_pb_sel_f(mc_enable_pb_0_enabled_v(), i);
372 gk20a_writel(g, mc_enable_pb_r(), mask); 372 gk20a_writel(g, mc_enable_pb_r(), mask);
373 373
@@ -378,7 +378,7 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
378 378
379 /* enable pbdma interrupt */ 379 /* enable pbdma interrupt */
380 mask = 0; 380 mask = 0;
381 for (i = 0; i < proj_host_num_pbdma_v(); i++) { 381 for (i = 0; i < host_num_pbdma; i++) {
382 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i)); 382 intr_stall = gk20a_readl(g, pbdma_intr_stall_r(i));
383 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f(); 383 intr_stall &= ~pbdma_intr_stall_lbreq_enabled_f();
384 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall); 384 gk20a_writel(g, pbdma_intr_stall_r(i), intr_stall);
@@ -487,7 +487,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
487 487
488 f->num_channels = g->ops.fifo.get_num_fifos(g); 488 f->num_channels = g->ops.fifo.get_num_fifos(g);
489 f->num_runlist_entries = fifo_eng_runlist_length_max_v(); 489 f->num_runlist_entries = fifo_eng_runlist_length_max_v();
490 f->num_pbdma = proj_host_num_pbdma_v(); 490 f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
491 f->max_engines = ENGINE_INVAL_GK20A; 491 f->max_engines = ENGINE_INVAL_GK20A;
492 492
493 f->userd_entry_size = 1 << ram_userd_base_shift_v(); 493 f->userd_entry_size = 1 << ram_userd_base_shift_v();
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 71271a2c..e17392d0 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -72,6 +72,32 @@ enum gk20a_cbc_op {
72#define MC_INTR_UNIT_DISABLE false 72#define MC_INTR_UNIT_DISABLE false
73#define MC_INTR_UNIT_ENABLE true 73#define MC_INTR_UNIT_ENABLE true
74 74
75enum nvgpu_litter_value {
76 GPU_LIT_NUM_GPCS,
77 GPU_LIT_NUM_PES_PER_GPC,
78 GPU_LIT_NUM_ZCULL_BANKS,
79 GPU_LIT_NUM_TPC_PER_GPC,
80 GPU_LIT_NUM_FBPS,
81 GPU_LIT_GPC_BASE,
82 GPU_LIT_GPC_STRIDE,
83 GPU_LIT_GPC_SHARED_BASE,
84 GPU_LIT_TPC_IN_GPC_BASE,
85 GPU_LIT_TPC_IN_GPC_STRIDE,
86 GPU_LIT_TPC_IN_GPC_SHARED_BASE,
87 GPU_LIT_PPC_IN_GPC_BASE,
88 GPU_LIT_PPC_IN_GPC_STRIDE,
89 GPU_LIT_ROP_BASE,
90 GPU_LIT_ROP_STRIDE,
91 GPU_LIT_ROP_SHARED_BASE,
92 GPU_LIT_HOST_NUM_PBDMA,
93 GPU_LIT_LTC_STRIDE,
94 GPU_LIT_LTS_STRIDE,
95 GPU_LIT_NUM_FBPAS,
96 GPU_LIT_FBPA_STRIDE,
97};
98
99#define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v)
100
75struct gpu_ops { 101struct gpu_ops {
76 struct { 102 struct {
77 int (*determine_L2_size_bytes)(struct gk20a *gk20a); 103 int (*determine_L2_size_bytes)(struct gk20a *gk20a);
@@ -151,8 +177,8 @@ struct gpu_ops {
151 u32 mode); 177 u32 mode);
152 int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr, 178 int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
153 struct gr_zcull_info *zcull_params); 179 struct gr_zcull_info *zcull_params);
154 bool (*is_tpc_addr)(u32 addr); 180 bool (*is_tpc_addr)(struct gk20a *g, u32 addr);
155 u32 (*get_tpc_num)(u32 addr); 181 u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
156 void (*detect_sm_arch)(struct gk20a *g); 182 void (*detect_sm_arch)(struct gk20a *g);
157 int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, 183 int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr,
158 struct zbc_entry *color_val, u32 index); 184 struct zbc_entry *color_val, u32 index);
@@ -526,6 +552,7 @@ struct gpu_ops {
526 size_t scatter_buffer_size); 552 size_t scatter_buffer_size);
527 } cde; 553 } cde;
528 554
555 int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value);
529 int (*chip_init_gpu_characteristics)(struct gk20a *g); 556 int (*chip_init_gpu_characteristics)(struct gk20a *g);
530}; 557};
531 558
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index aa42e1dd..51a61de3 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -46,7 +46,6 @@
46#include "hw_pri_ringstation_sys_gk20a.h" 46#include "hw_pri_ringstation_sys_gk20a.h"
47#include "hw_pri_ringstation_gpc_gk20a.h" 47#include "hw_pri_ringstation_gpc_gk20a.h"
48#include "hw_pri_ringstation_fbp_gk20a.h" 48#include "hw_pri_ringstation_fbp_gk20a.h"
49#include "hw_proj_gk20a.h"
50#include "hw_top_gk20a.h" 49#include "hw_top_gk20a.h"
51#include "hw_ltc_gk20a.h" 50#include "hw_ltc_gk20a.h"
52#include "hw_fb_gk20a.h" 51#include "hw_fb_gk20a.h"
@@ -815,6 +814,8 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
815 u32 gpc_index, ppc_index; 814 u32 gpc_index, ppc_index;
816 u32 temp; 815 u32 temp;
817 u32 cbm_cfg_size1, cbm_cfg_size2; 816 u32 cbm_cfg_size1, cbm_cfg_size2;
817 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
818 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
818 819
819 gk20a_dbg_fn(""); 820 gk20a_dbg_fn("");
820 821
@@ -835,7 +836,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
835 gr->tpc_count * gr->attrib_cb_size; 836 gr->tpc_count * gr->attrib_cb_size;
836 837
837 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 838 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
838 temp = proj_gpc_stride_v() * gpc_index; 839 temp = gpc_stride * gpc_index;
839 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 840 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
840 ppc_index++) { 841 ppc_index++) {
841 cbm_cfg_size1 = gr->attrib_cb_default_size * 842 cbm_cfg_size1 = gr->attrib_cb_default_size *
@@ -845,7 +846,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
845 846
846 gr_gk20a_ctx_patch_write(g, ch_ctx, 847 gr_gk20a_ctx_patch_write(g, ch_ctx,
847 gr_gpc0_ppc0_cbm_cfg_r() + temp + 848 gr_gpc0_ppc0_cbm_cfg_r() + temp +
848 proj_ppc_in_gpc_stride_v() * ppc_index, 849 ppc_in_gpc_stride * ppc_index,
849 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) | 850 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
850 gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) | 851 gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
851 gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch); 852 gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
@@ -855,7 +856,7 @@ static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
855 856
856 gr_gk20a_ctx_patch_write(g, ch_ctx, 857 gr_gk20a_ctx_patch_write(g, ch_ctx,
857 gr_gpc0_ppc0_cbm_cfg2_r() + temp + 858 gr_gpc0_ppc0_cbm_cfg2_r() + temp +
858 proj_ppc_in_gpc_stride_v() * ppc_index, 859 ppc_in_gpc_stride * ppc_index,
859 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) | 860 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
860 gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch); 861 gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
861 862
@@ -1209,7 +1210,7 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
1209 u32 gpcs_per_reg = 4; 1210 u32 gpcs_per_reg = 4;
1210 u32 pes_index; 1211 u32 pes_index;
1211 u32 tpc_count_pes; 1212 u32 tpc_count_pes;
1212 u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); 1213 u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
1213 1214
1214 u32 alpha_target, beta_target; 1215 u32 alpha_target, beta_target;
1215 u32 alpha_bits, beta_bits; 1216 u32 alpha_bits, beta_bits;
@@ -1309,14 +1310,16 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1309 u32 tpc_per_gpc; 1310 u32 tpc_per_gpc;
1310 u32 max_ways_evict = INVALID_MAX_WAYS; 1311 u32 max_ways_evict = INVALID_MAX_WAYS;
1311 u32 l1c_dbg_reg_val; 1312 u32 l1c_dbg_reg_val;
1313 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1314 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1312 1315
1313 gk20a_dbg_fn(""); 1316 gk20a_dbg_fn("");
1314 1317
1315 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { 1318 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
1316 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 1319 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1317 gpc_offset = proj_gpc_stride_v() * gpc_index; 1320 gpc_offset = gpc_stride * gpc_index;
1318 if (tpc_index < gr->gpc_tpc_count[gpc_index]) { 1321 if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
1319 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; 1322 tpc_offset = tpc_in_gpc_stride * tpc_index;
1320 1323
1321 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, 1324 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1322 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); 1325 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
@@ -3196,6 +3199,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3196 u32 pes_heavy_index; 3199 u32 pes_heavy_index;
3197 u32 gpc_new_skip_mask; 3200 u32 gpc_new_skip_mask;
3198 u32 tmp; 3201 u32 tmp;
3202 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
3199 3203
3200 tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r()); 3204 tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
3201 gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp); 3205 gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
@@ -3219,8 +3223,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3219 tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r()); 3223 tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
3220 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp); 3224 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
3221 3225
3222 gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); 3226 gr->pe_count_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
3223 gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v(); 3227 gr->max_zcull_per_gpc_count = nvgpu_get_litter_value(g, GPU_LIT_NUM_ZCULL_BANKS);
3224 3228
3225 if (!gr->gpc_count) { 3229 if (!gr->gpc_count) {
3226 gk20a_err(dev_from_gk20a(g), "gpc_count==0!"); 3230 gk20a_err(dev_from_gk20a(g), "gpc_count==0!");
@@ -3270,7 +3274,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3270 3274
3271 tmp = gk20a_readl(g, 3275 tmp = gk20a_readl(g,
3272 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + 3276 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
3273 gpc_index * proj_gpc_stride_v()); 3277 gpc_index * gpc_stride);
3274 3278
3275 pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp); 3279 pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
3276 pes_tpc_count = count_bits(pes_tpc_mask); 3280 pes_tpc_count = count_bits(pes_tpc_mask);
@@ -3414,16 +3418,17 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3414 bool delete_map = false; 3418 bool delete_map = false;
3415 bool gpc_sorted; 3419 bool gpc_sorted;
3416 int ret = 0; 3420 int ret = 0;
3421 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
3422 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
3417 3423
3418 init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); 3424 init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
3419 init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); 3425 init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
3420 run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); 3426 run_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
3421 sorted_num_tpcs = 3427 sorted_num_tpcs =
3422 kzalloc(proj_scal_max_gpcs_v() * 3428 kzalloc(num_gpcs * num_tpc_per_gpc * sizeof(s32),
3423 proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
3424 GFP_KERNEL); 3429 GFP_KERNEL);
3425 sorted_to_unsorted_gpc_map = 3430 sorted_to_unsorted_gpc_map =
3426 kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL); 3431 kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
3427 3432
3428 if (!(init_frac && init_err && run_err && sorted_num_tpcs && 3433 if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
3429 sorted_to_unsorted_gpc_map)) { 3434 sorted_to_unsorted_gpc_map)) {
@@ -3490,9 +3495,9 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3490 } 3495 }
3491 3496
3492 if (gr->map_tiles == NULL) { 3497 if (gr->map_tiles == NULL) {
3493 gr->map_tile_count = proj_scal_max_gpcs_v(); 3498 gr->map_tile_count = num_gpcs;
3494 3499
3495 gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL); 3500 gr->map_tiles = kzalloc(num_gpcs * sizeof(u8), GFP_KERNEL);
3496 if (gr->map_tiles == NULL) { 3501 if (gr->map_tiles == NULL) {
3497 ret = -ENOMEM; 3502 ret = -ENOMEM;
3498 goto clean_up; 3503 goto clean_up;
@@ -3628,11 +3633,11 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
3628 zcull_params->region_byte_multiplier = 3633 zcull_params->region_byte_multiplier =
3629 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v(); 3634 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
3630 zcull_params->region_header_size = 3635 zcull_params->region_header_size =
3631 proj_scal_litter_num_gpcs_v() * 3636 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
3632 gr_zcull_save_restore_header_bytes_per_gpc_v(); 3637 gr_zcull_save_restore_header_bytes_per_gpc_v();
3633 3638
3634 zcull_params->subregion_header_size = 3639 zcull_params->subregion_header_size =
3635 proj_scal_litter_num_gpcs_v() * 3640 nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS) *
3636 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v(); 3641 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
3637 3642
3638 zcull_params->subregion_width_align_pixels = 3643 zcull_params->subregion_width_align_pixels =
@@ -4082,19 +4087,22 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4082 u32 rcp_conserv; 4087 u32 rcp_conserv;
4083 u32 offset; 4088 u32 offset;
4084 bool floorsweep = false; 4089 bool floorsweep = false;
4090 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
4091 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
4092 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
4085 4093
4086 if (!gr->map_tiles) 4094 if (!gr->map_tiles)
4087 return -1; 4095 return -1;
4088 4096
4089 zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() * 4097 zcull_map_tiles = kzalloc(num_gpcs *
4090 proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); 4098 num_tpc_per_gpc * sizeof(u32), GFP_KERNEL);
4091 if (!zcull_map_tiles) { 4099 if (!zcull_map_tiles) {
4092 gk20a_err(dev_from_gk20a(g), 4100 gk20a_err(dev_from_gk20a(g),
4093 "failed to allocate zcull temp buffers"); 4101 "failed to allocate zcull temp buffers");
4094 return -ENOMEM; 4102 return -ENOMEM;
4095 } 4103 }
4096 zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() * 4104 zcull_bank_counters = kzalloc(num_gpcs *
4097 proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL); 4105 num_tpc_per_gpc * sizeof(u32), GFP_KERNEL);
4098 4106
4099 if (!zcull_bank_counters) { 4107 if (!zcull_bank_counters) {
4100 gk20a_err(dev_from_gk20a(g), 4108 gk20a_err(dev_from_gk20a(g),
@@ -4173,7 +4181,7 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4173 gr->gpc_tpc_count[0]); 4181 gr->gpc_tpc_count[0]);
4174 4182
4175 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 4183 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4176 offset = gpc_index * proj_gpc_stride_v(); 4184 offset = gpc_index * gpc_stride;
4177 4185
4178 if (floorsweep) { 4186 if (floorsweep) {
4179 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset, 4187 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
@@ -4836,6 +4844,8 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
4836 struct gr_gk20a *gr = &g->gr; 4844 struct gr_gk20a *gr = &g->gr;
4837 u32 gpc_index, ppc_index, stride, val, offset; 4845 u32 gpc_index, ppc_index, stride, val, offset;
4838 u32 cb_size = data * 4; 4846 u32 cb_size = data * 4;
4847 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
4848 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
4839 4849
4840 gk20a_dbg_fn(""); 4850 gk20a_dbg_fn("");
4841 4851
@@ -4848,14 +4858,14 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
4848 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); 4858 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
4849 4859
4850 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 4860 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4851 stride = proj_gpc_stride_v() * gpc_index; 4861 stride = gpc_stride * gpc_index;
4852 4862
4853 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 4863 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
4854 ppc_index++) { 4864 ppc_index++) {
4855 4865
4856 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() + 4866 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
4857 stride + 4867 stride +
4858 proj_ppc_in_gpc_stride_v() * ppc_index); 4868 ppc_in_gpc_stride * ppc_index);
4859 4869
4860 offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val); 4870 offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
4861 4871
@@ -4869,7 +4879,7 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
4869 4879
4870 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + 4880 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
4871 stride + 4881 stride +
4872 proj_ppc_in_gpc_stride_v() * ppc_index, val); 4882 ppc_in_gpc_stride * ppc_index, val);
4873 4883
4874 val = set_field(val, 4884 val = set_field(val,
4875 gr_gpc0_ppc0_cbm_cfg_start_offset_m(), 4885 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
@@ -4877,7 +4887,7 @@ static void gk20a_gr_set_circular_buffer_size(struct gk20a *g, u32 data)
4877 4887
4878 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() + 4888 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
4879 stride + 4889 stride +
4880 proj_ppc_in_gpc_stride_v() * ppc_index, val); 4890 ppc_in_gpc_stride * ppc_index, val);
4881 } 4891 }
4882 } 4892 }
4883} 4893}
@@ -4888,6 +4898,8 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
4888 u32 gpc_index, ppc_index, stride, val; 4898 u32 gpc_index, ppc_index, stride, val;
4889 u32 pd_ab_max_output; 4899 u32 pd_ab_max_output;
4890 u32 alpha_cb_size = data * 4; 4900 u32 alpha_cb_size = data * 4;
4901 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
4902 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
4891 4903
4892 gk20a_dbg_fn(""); 4904 gk20a_dbg_fn("");
4893 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) 4905 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
@@ -4910,22 +4922,20 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
4910 gr_pd_ab_dist_cfg1_max_batches_init_f()); 4922 gr_pd_ab_dist_cfg1_max_batches_init_f());
4911 4923
4912 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 4924 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4913 stride = proj_gpc_stride_v() * gpc_index; 4925 stride = gpc_stride * gpc_index;
4914 4926
4915 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 4927 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
4916 ppc_index++) { 4928 ppc_index++) {
4917 4929
4918 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() + 4930 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
4919 stride + 4931 stride + ppc_in_gpc_stride * ppc_index);
4920 proj_ppc_in_gpc_stride_v() * ppc_index);
4921 4932
4922 val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(), 4933 val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
4923 gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size * 4934 gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
4924 gr->pes_tpc_count[ppc_index][gpc_index])); 4935 gr->pes_tpc_count[ppc_index][gpc_index]));
4925 4936
4926 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() + 4937 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
4927 stride + 4938 stride + ppc_in_gpc_stride * ppc_index, val);
4928 proj_ppc_in_gpc_stride_v() * ppc_index, val);
4929 } 4939 }
4930 } 4940 }
4931} 4941}
@@ -5421,8 +5431,9 @@ int gk20a_gr_lock_down_sm(struct gk20a *g,
5421 u32 gpc, u32 tpc, u32 global_esr_mask, 5431 u32 gpc, u32 tpc, u32 global_esr_mask,
5422 bool check_errors) 5432 bool check_errors)
5423{ 5433{
5424 u32 offset = 5434 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5425 proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; 5435 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5436 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5426 u32 dbgr_control0; 5437 u32 dbgr_control0;
5427 5438
5428 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5439 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
@@ -5456,8 +5467,9 @@ bool gk20a_gr_sm_debugger_attached(struct gk20a *g)
5456void gk20a_gr_clear_sm_hww(struct gk20a *g, 5467void gk20a_gr_clear_sm_hww(struct gk20a *g,
5457 u32 gpc, u32 tpc, u32 global_esr) 5468 u32 gpc, u32 tpc, u32 global_esr)
5458{ 5469{
5459 u32 offset = proj_gpc_stride_v() * gpc + 5470 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5460 proj_tpc_in_gpc_stride_v() * tpc; 5471 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5472 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5461 5473
5462 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset, 5474 gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
5463 global_esr); 5475 global_esr);
@@ -5477,8 +5489,9 @@ int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
5477{ 5489{
5478 int ret = 0; 5490 int ret = 0;
5479 bool do_warp_sync = false, early_exit = false, ignore_debugger = false; 5491 bool do_warp_sync = false, early_exit = false, ignore_debugger = false;
5480 u32 offset = proj_gpc_stride_v() * gpc + 5492 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5481 proj_tpc_in_gpc_stride_v() * tpc; 5493 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5494 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5482 5495
5483 /* these three interrupts don't require locking down the SM. They can 5496 /* these three interrupts don't require locking down the SM. They can
5484 * be handled by usermode clients as they aren't fatal. Additionally, 5497 * be handled by usermode clients as they aren't fatal. Additionally,
@@ -5590,8 +5603,9 @@ int gr_gk20a_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
5590 bool *post_event) 5603 bool *post_event)
5591{ 5604{
5592 int ret = 0; 5605 int ret = 0;
5593 u32 offset = proj_gpc_stride_v() * gpc + 5606 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5594 proj_tpc_in_gpc_stride_v() * tpc; 5607 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5608 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5595 u32 esr; 5609 u32 esr;
5596 5610
5597 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); 5611 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
@@ -5611,8 +5625,9 @@ static int gk20a_gr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
5611 bool *post_event, struct channel_gk20a *fault_ch) 5625 bool *post_event, struct channel_gk20a *fault_ch)
5612{ 5626{
5613 int ret = 0; 5627 int ret = 0;
5614 u32 offset = proj_gpc_stride_v() * gpc + 5628 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5615 proj_tpc_in_gpc_stride_v() * tpc; 5629 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5630 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
5616 u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r() 5631 u32 tpc_exception = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_r()
5617 + offset); 5632 + offset);
5618 5633
@@ -5646,6 +5661,8 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5646 struct gr_gk20a *gr = &g->gr; 5661 struct gr_gk20a *gr = &g->gr;
5647 u32 exception1 = gk20a_readl(g, gr_exception1_r()); 5662 u32 exception1 = gk20a_readl(g, gr_exception1_r());
5648 u32 gpc_exception, global_esr; 5663 u32 gpc_exception, global_esr;
5664 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
5665 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
5649 5666
5650 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, ""); 5667 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "");
5651 5668
@@ -5656,7 +5673,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5656 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5673 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5657 "GPC%d exception pending", gpc); 5674 "GPC%d exception pending", gpc);
5658 5675
5659 gpc_offset = proj_gpc_stride_v() * gpc; 5676 gpc_offset = gpc_stride * gpc;
5660 5677
5661 gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r() 5678 gpc_exception = gk20a_readl(g, gr_gpc0_gpccs_gpc_exception_r()
5662 + gpc_offset); 5679 + gpc_offset);
@@ -5670,7 +5687,7 @@ static int gk20a_gr_handle_gpc_exception(struct gk20a *g, bool *post_event,
5670 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 5687 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
5671 "GPC%d: TPC%d exception pending", gpc, tpc); 5688 "GPC%d: TPC%d exception pending", gpc, tpc);
5672 5689
5673 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; 5690 tpc_offset = tpc_in_gpc_stride * tpc;
5674 5691
5675 global_esr = gk20a_readl(g, 5692 global_esr = gk20a_readl(g,
5676 gr_gpc0_tpc0_sm_hww_global_esr_r() + 5693 gr_gpc0_tpc0_sm_hww_global_esr_r() +
@@ -6045,31 +6062,31 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6045 *ppc_num = 0; 6062 *ppc_num = 0;
6046 *be_num = 0; 6063 *be_num = 0;
6047 6064
6048 if (pri_is_gpc_addr(addr)) { 6065 if (pri_is_gpc_addr(g, addr)) {
6049 *addr_type = CTXSW_ADDR_TYPE_GPC; 6066 *addr_type = CTXSW_ADDR_TYPE_GPC;
6050 gpc_addr = pri_gpccs_addr_mask(addr); 6067 gpc_addr = pri_gpccs_addr_mask(addr);
6051 if (pri_is_gpc_addr_shared(addr)) { 6068 if (pri_is_gpc_addr_shared(g, addr)) {
6052 *addr_type = CTXSW_ADDR_TYPE_GPC; 6069 *addr_type = CTXSW_ADDR_TYPE_GPC;
6053 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC; 6070 *broadcast_flags |= PRI_BROADCAST_FLAGS_GPC;
6054 } else 6071 } else
6055 *gpc_num = pri_get_gpc_num(addr); 6072 *gpc_num = pri_get_gpc_num(g, addr);
6056 6073
6057 if (g->ops.gr.is_tpc_addr(gpc_addr)) { 6074 if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
6058 *addr_type = CTXSW_ADDR_TYPE_TPC; 6075 *addr_type = CTXSW_ADDR_TYPE_TPC;
6059 if (pri_is_tpc_addr_shared(gpc_addr)) { 6076 if (pri_is_tpc_addr_shared(g, gpc_addr)) {
6060 *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC; 6077 *broadcast_flags |= PRI_BROADCAST_FLAGS_TPC;
6061 return 0; 6078 return 0;
6062 } 6079 }
6063 *tpc_num = g->ops.gr.get_tpc_num(gpc_addr); 6080 *tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6064 } 6081 }
6065 return 0; 6082 return 0;
6066 } else if (pri_is_be_addr(addr)) { 6083 } else if (pri_is_be_addr(g, addr)) {
6067 *addr_type = CTXSW_ADDR_TYPE_BE; 6084 *addr_type = CTXSW_ADDR_TYPE_BE;
6068 if (pri_is_be_addr_shared(addr)) { 6085 if (pri_is_be_addr_shared(g, addr)) {
6069 *broadcast_flags |= PRI_BROADCAST_FLAGS_BE; 6086 *broadcast_flags |= PRI_BROADCAST_FLAGS_BE;
6070 return 0; 6087 return 0;
6071 } 6088 }
6072 *be_num = pri_get_be_num(addr); 6089 *be_num = pri_get_be_num(g, addr);
6073 return 0; 6090 return 0;
6074 } else { 6091 } else {
6075 *addr_type = CTXSW_ADDR_TYPE_SYS; 6092 *addr_type = CTXSW_ADDR_TYPE_SYS;
@@ -6090,7 +6107,7 @@ static int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr,
6090 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 6107 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
6091 6108
6092 for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++) 6109 for (ppc_num = 0; ppc_num < g->gr.pe_count_per_gpc; ppc_num++)
6093 priv_addr_table[(*t)++] = pri_ppc_addr(pri_ppccs_addr_mask(addr), 6110 priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr),
6094 gpc_num, ppc_num); 6111 gpc_num, ppc_num);
6095 6112
6096 return 0; 6113 return 0;
@@ -6133,7 +6150,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6133 * so that we can look up the offset. */ 6150 * so that we can look up the offset. */
6134 if ((addr_type == CTXSW_ADDR_TYPE_BE) && 6151 if ((addr_type == CTXSW_ADDR_TYPE_BE) &&
6135 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) 6152 !(broadcast_flags & PRI_BROADCAST_FLAGS_BE))
6136 priv_addr_table[t++] = pri_be_shared_addr(addr); 6153 priv_addr_table[t++] = pri_be_shared_addr(g, addr);
6137 else 6154 else
6138 priv_addr_table[t++] = addr; 6155 priv_addr_table[t++] = addr;
6139 6156
@@ -6152,7 +6169,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6152 tpc_num < g->gr.gpc_tpc_count[gpc_num]; 6169 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6153 tpc_num++) 6170 tpc_num++)
6154 priv_addr_table[t++] = 6171 priv_addr_table[t++] =
6155 pri_tpc_addr(pri_tpccs_addr_mask(addr), 6172 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6156 gpc_num, tpc_num); 6173 gpc_num, tpc_num);
6157 6174
6158 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { 6175 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) {
@@ -6162,7 +6179,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6162 return err; 6179 return err;
6163 } else 6180 } else
6164 priv_addr_table[t++] = 6181 priv_addr_table[t++] =
6165 pri_gpc_addr(pri_gpccs_addr_mask(addr), 6182 pri_gpc_addr(g, pri_gpccs_addr_mask(addr),
6166 gpc_num); 6183 gpc_num);
6167 } 6184 }
6168 } else { 6185 } else {
@@ -6171,7 +6188,7 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6171 tpc_num < g->gr.gpc_tpc_count[gpc_num]; 6188 tpc_num < g->gr.gpc_tpc_count[gpc_num];
6172 tpc_num++) 6189 tpc_num++)
6173 priv_addr_table[t++] = 6190 priv_addr_table[t++] =
6174 pri_tpc_addr(pri_tpccs_addr_mask(addr), 6191 pri_tpc_addr(g, pri_tpccs_addr_mask(addr),
6175 gpc_num, tpc_num); 6192 gpc_num, tpc_num);
6176 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) 6193 else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC)
6177 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num, 6194 err = gr_gk20a_split_ppc_broadcast_addr(g, addr, gpc_num,
@@ -6403,6 +6420,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6403 u32 vaddr_lo; 6420 u32 vaddr_lo;
6404 u32 vaddr_hi; 6421 u32 vaddr_hi;
6405 u32 tmp; 6422 u32 tmp;
6423 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6424 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6406 6425
6407 init_ovr_perf_reg_info(); 6426 init_ovr_perf_reg_info();
6408 g->ops.gr.init_sm_dsm_reg_info(); 6427 g->ops.gr.init_sm_dsm_reg_info();
@@ -6413,8 +6432,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6413 for (gpc = 0; gpc < num_gpc; gpc++) { 6432 for (gpc = 0; gpc < num_gpc; gpc++) {
6414 num_tpc = g->gr.gpc_tpc_count[gpc]; 6433 num_tpc = g->gr.gpc_tpc_count[gpc];
6415 for (tpc = 0; tpc < num_tpc; tpc++) { 6434 for (tpc = 0; tpc < num_tpc; tpc++) {
6416 chk_addr = ((proj_gpc_stride_v() * gpc) + 6435 chk_addr = ((gpc_stride * gpc) +
6417 (proj_tpc_in_gpc_stride_v() * tpc) + 6436 (tpc_in_gpc_stride * tpc) +
6418 _ovr_perf_regs[reg]); 6437 _ovr_perf_regs[reg]);
6419 if (chk_addr != addr) 6438 if (chk_addr != addr)
6420 continue; 6439 continue;
@@ -6461,18 +6480,19 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
6461 u32 tpc, gpc; 6480 u32 tpc, gpc;
6462 u32 gpc_tpc_addr; 6481 u32 gpc_tpc_addr;
6463 u32 gpc_tpc_stride; 6482 u32 gpc_tpc_stride;
6483 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6484 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6464 6485
6465 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset); 6486 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "offset=0x%x", offset);
6466 6487
6467 gpc = pri_get_gpc_num(offset); 6488 gpc = pri_get_gpc_num(g, offset);
6468 gpc_tpc_addr = pri_gpccs_addr_mask(offset); 6489 gpc_tpc_addr = pri_gpccs_addr_mask(offset);
6469 tpc = g->ops.gr.get_tpc_num(gpc_tpc_addr); 6490 tpc = g->ops.gr.get_tpc_num(g, gpc_tpc_addr);
6470 6491
6471 quad_ctrl = quad & 0x1; /* first bit tells us quad */ 6492 quad_ctrl = quad & 0x1; /* first bit tells us quad */
6472 half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */ 6493 half_ctrl = (quad >> 1) & 0x1; /* second bit tells us half */
6473 6494
6474 gpc_tpc_stride = gpc * proj_gpc_stride_v() + 6495 gpc_tpc_stride = gpc * gpc_stride + tpc * tpc_in_gpc_stride;
6475 tpc * proj_tpc_in_gpc_stride_v();
6476 gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride; 6496 gpc_tpc_addr = gr_gpc0_tpc0_sm_halfctl_ctrl_r() + gpc_tpc_stride;
6477 6497
6478 reg = gk20a_readl(g, gpc_tpc_addr); 6498 reg = gk20a_readl(g, gpc_tpc_addr);
@@ -6552,7 +6572,6 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6552 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; 6572 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
6553 u32 num_ext_gpccs_ext_buffer_segments; 6573 u32 num_ext_gpccs_ext_buffer_segments;
6554 u32 inter_seg_offset; 6574 u32 inter_seg_offset;
6555 u32 tpc_gpc_mask = (proj_tpc_in_gpc_stride_v() - 1);
6556 u32 max_tpc_count; 6575 u32 max_tpc_count;
6557 u32 *sm_dsm_perf_ctrl_regs = NULL; 6576 u32 *sm_dsm_perf_ctrl_regs = NULL;
6558 u32 num_sm_dsm_perf_ctrl_regs = 0; 6577 u32 num_sm_dsm_perf_ctrl_regs = 0;
@@ -6563,15 +6582,20 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6563 u32 control_register_stride = 0; 6582 u32 control_register_stride = 0;
6564 u32 perf_register_stride = 0; 6583 u32 perf_register_stride = 0;
6565 struct gr_gk20a *gr = &g->gr; 6584 struct gr_gk20a *gr = &g->gr;
6585 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
6586 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6587 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
6588 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6589 u32 tpc_gpc_mask = (tpc_in_gpc_stride - 1);
6566 6590
6567 /* Only have TPC registers in extended region, so if not a TPC reg, 6591 /* Only have TPC registers in extended region, so if not a TPC reg,
6568 then return error so caller can look elsewhere. */ 6592 then return error so caller can look elsewhere. */
6569 if (pri_is_gpc_addr(addr)) { 6593 if (pri_is_gpc_addr(g, addr)) {
6570 u32 gpc_addr = 0; 6594 u32 gpc_addr = 0;
6571 gpc_num = pri_get_gpc_num(addr); 6595 gpc_num = pri_get_gpc_num(g, addr);
6572 gpc_addr = pri_gpccs_addr_mask(addr); 6596 gpc_addr = pri_gpccs_addr_mask(addr);
6573 if (g->ops.gr.is_tpc_addr(gpc_addr)) 6597 if (g->ops.gr.is_tpc_addr(g, gpc_addr))
6574 tpc_num = g->ops.gr.get_tpc_num(gpc_addr); 6598 tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr);
6575 else 6599 else
6576 return -EINVAL; 6600 return -EINVAL;
6577 6601
@@ -6639,11 +6663,10 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6639 gk20a_dbg_info("register match: 0x%08x", 6663 gk20a_dbg_info("register match: 0x%08x",
6640 sm_dsm_perf_regs[i]); 6664 sm_dsm_perf_regs[i]);
6641 6665
6642 chk_addr = (proj_gpc_base_v() + 6666 chk_addr = (gpc_base + gpc_stride * gpc_num) +
6643 (proj_gpc_stride_v() * gpc_num) + 6667 tpc_in_gpc_base +
6644 proj_tpc_in_gpc_base_v() + 6668 (tpc_in_gpc_stride * tpc_num) +
6645 (proj_tpc_in_gpc_stride_v() * tpc_num) + 6669 (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask);
6646 (sm_dsm_perf_regs[sm_dsm_perf_reg_id] & tpc_gpc_mask));
6647 6670
6648 if (chk_addr != addr) { 6671 if (chk_addr != addr) {
6649 gk20a_err(dev_from_gk20a(g), 6672 gk20a_err(dev_from_gk20a(g),
@@ -6670,12 +6693,11 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6670 gk20a_dbg_info("register match: 0x%08x", 6693 gk20a_dbg_info("register match: 0x%08x",
6671 sm_dsm_perf_ctrl_regs[i]); 6694 sm_dsm_perf_ctrl_regs[i]);
6672 6695
6673 chk_addr = (proj_gpc_base_v() + 6696 chk_addr = (gpc_base + gpc_stride * gpc_num) +
6674 (proj_gpc_stride_v() * gpc_num) + 6697 tpc_in_gpc_base +
6675 proj_tpc_in_gpc_base_v() + 6698 tpc_in_gpc_stride * tpc_num +
6676 (proj_tpc_in_gpc_stride_v() * tpc_num) +
6677 (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] & 6699 (sm_dsm_perf_ctrl_regs[sm_dsm_perf_ctrl_reg_id] &
6678 tpc_gpc_mask)); 6700 tpc_gpc_mask);
6679 6701
6680 if (chk_addr != addr) { 6702 if (chk_addr != addr) {
6681 gk20a_err(dev_from_gk20a(g), 6703 gk20a_err(dev_from_gk20a(g),
@@ -6772,6 +6794,12 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6772 u32 sys_offset, gpc_offset, tpc_offset, ppc_offset; 6794 u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
6773 u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr; 6795 u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
6774 struct aiv_gk20a *reg; 6796 struct aiv_gk20a *reg;
6797 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
6798 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6799 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
6800 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
6801 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
6802 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6775 6803
6776 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); 6804 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
6777 6805
@@ -6800,10 +6828,10 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6800 reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i]; 6828 reg = &g->gr.ctx_vars.ctxsw_regs.tpc.l[i];
6801 address = reg->addr; 6829 address = reg->addr;
6802 tpc_addr = pri_tpccs_addr_mask(address); 6830 tpc_addr = pri_tpccs_addr_mask(address);
6803 base_address = proj_gpc_base_v() + 6831 base_address = gpc_base +
6804 (gpc_num * proj_gpc_stride_v()) + 6832 (gpc_num * gpc_stride) +
6805 proj_tpc_in_gpc_base_v() + 6833 tpc_in_gpc_base +
6806 (tpc_num * proj_tpc_in_gpc_stride_v()); 6834 (tpc_num * tpc_in_gpc_stride);
6807 address = base_address + tpc_addr; 6835 address = base_address + tpc_addr;
6808 /* 6836 /*
6809 * The data for the TPCs is interleaved in the context buffer. 6837 * The data for the TPCs is interleaved in the context buffer.
@@ -6828,10 +6856,10 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6828 reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i]; 6856 reg = &g->gr.ctx_vars.ctxsw_regs.ppc.l[i];
6829 address = reg->addr; 6857 address = reg->addr;
6830 ppc_addr = pri_ppccs_addr_mask(address); 6858 ppc_addr = pri_ppccs_addr_mask(address);
6831 base_address = proj_gpc_base_v() + 6859 base_address = gpc_base +
6832 (gpc_num * proj_gpc_stride_v()) + 6860 (gpc_num * gpc_stride) +
6833 proj_ppc_in_gpc_base_v() + 6861 ppc_in_gpc_base +
6834 (ppc_num * proj_ppc_in_gpc_stride_v()); 6862 (ppc_num * ppc_in_gpc_stride);
6835 address = base_address + ppc_addr; 6863 address = base_address + ppc_addr;
6836 /* 6864 /*
6837 * The data for the PPCs is interleaved in the context buffer. 6865 * The data for the PPCs is interleaved in the context buffer.
@@ -6859,8 +6887,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
6859 gpc_addr = pri_gpccs_addr_mask(address); 6887 gpc_addr = pri_gpccs_addr_mask(address);
6860 gpc_offset = reg->index; 6888 gpc_offset = reg->index;
6861 6889
6862 base_address = proj_gpc_base_v() + 6890 base_address = gpc_base + (gpc_num * gpc_stride);
6863 (gpc_num * proj_gpc_stride_v());
6864 address = base_address + gpc_addr; 6891 address = base_address + gpc_addr;
6865 6892
6866 if (pri_addr == address) { 6893 if (pri_addr == address) {
@@ -6879,7 +6906,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
6879 u32 *reg_ppc_count) 6906 u32 *reg_ppc_count)
6880{ 6907{
6881 u32 data32; 6908 u32 data32;
6882 u32 litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v(); 6909 u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
6883 6910
6884 /* 6911 /*
6885 * if there is only 1 PES_PER_GPC, then we put the PES registers 6912 * if there is only 1 PES_PER_GPC, then we put the PES registers
@@ -6887,7 +6914,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
6887 */ 6914 */
6888 if ((!g->gr.ctx_vars.valid) || 6915 if ((!g->gr.ctx_vars.valid) ||
6889 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) && 6916 ((g->gr.ctx_vars.ctxsw_regs.ppc.count == 0) &&
6890 (litter_num_pes_per_gpc > 1))) 6917 (num_pes_per_gpc > 1)))
6891 return -EINVAL; 6918 return -EINVAL;
6892 6919
6893 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); 6920 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0);
@@ -7028,9 +7055,9 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7028 /* The ucode stores TPC/PPC data before GPC data. 7055 /* The ucode stores TPC/PPC data before GPC data.
7029 * Advance offset past TPC/PPC data to GPC data. */ 7056 * Advance offset past TPC/PPC data to GPC data. */
7030 /* note 1 PES_PER_GPC case */ 7057 /* note 1 PES_PER_GPC case */
7031 u32 litter_num_pes_per_gpc = 7058 u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
7032 proj_scal_litter_num_pes_per_gpc_v(); 7059 GPU_LIT_NUM_PES_PER_GPC);
7033 if (litter_num_pes_per_gpc > 1) { 7060 if (num_pes_per_gpc > 1) {
7034 offset_to_segment += 7061 offset_to_segment +=
7035 (((gr->ctx_vars.ctxsw_regs.tpc.count * 7062 (((gr->ctx_vars.ctxsw_regs.tpc.count *
7036 num_tpcs) << 2) + 7063 num_tpcs) << 2) +
@@ -7136,33 +7163,37 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7136{ 7163{
7137 u32 num_gpcs = g->gr.gpc_count; 7164 u32 num_gpcs = g->gr.gpc_count;
7138 u32 num_ppcs, num_tpcs, gpc_num, base; 7165 u32 num_ppcs, num_tpcs, gpc_num, base;
7166 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
7167 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7168 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
7169 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
7170 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7171 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7139 7172
7140 for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { 7173 for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
7141 num_tpcs = g->gr.gpc_tpc_count[gpc_num]; 7174 num_tpcs = g->gr.gpc_tpc_count[gpc_num];
7142 base = proj_gpc_base_v() + 7175 base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base;
7143 (proj_gpc_stride_v() * gpc_num) + proj_tpc_in_gpc_base_v();
7144 if (add_ctxsw_buffer_map_entries_subunits(map, 7176 if (add_ctxsw_buffer_map_entries_subunits(map,
7145 &g->gr.ctx_vars.ctxsw_regs.pm_tpc, 7177 &g->gr.ctx_vars.ctxsw_regs.pm_tpc,
7146 count, offset, max_cnt, base, num_tpcs, 7178 count, offset, max_cnt, base, num_tpcs,
7147 proj_tpc_in_gpc_stride_v(), 7179 tpc_in_gpc_stride,
7148 (proj_tpc_in_gpc_stride_v() - 1))) 7180 (tpc_in_gpc_stride - 1)))
7149 return -EINVAL; 7181 return -EINVAL;
7150 7182
7151 num_ppcs = g->gr.gpc_ppc_count[gpc_num]; 7183 num_ppcs = g->gr.gpc_ppc_count[gpc_num];
7152 base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num) + 7184 base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
7153 proj_ppc_in_gpc_base_v();
7154 if (add_ctxsw_buffer_map_entries_subunits(map, 7185 if (add_ctxsw_buffer_map_entries_subunits(map,
7155 &g->gr.ctx_vars.ctxsw_regs.pm_ppc, 7186 &g->gr.ctx_vars.ctxsw_regs.pm_ppc,
7156 count, offset, max_cnt, base, num_ppcs, 7187 count, offset, max_cnt, base, num_ppcs,
7157 proj_ppc_in_gpc_stride_v(), 7188 ppc_in_gpc_stride,
7158 (proj_ppc_in_gpc_stride_v() - 1))) 7189 (ppc_in_gpc_stride - 1)))
7159 return -EINVAL; 7190 return -EINVAL;
7160 7191
7161 base = proj_gpc_base_v() + (proj_gpc_stride_v() * gpc_num); 7192 base = gpc_base + (gpc_stride * gpc_num);
7162 if (add_ctxsw_buffer_map_entries(map, 7193 if (add_ctxsw_buffer_map_entries(map,
7163 &g->gr.ctx_vars.ctxsw_regs.pm_gpc, 7194 &g->gr.ctx_vars.ctxsw_regs.pm_gpc,
7164 count, offset, max_cnt, base, 7195 count, offset, max_cnt, base,
7165 (proj_gpc_stride_v() - 1))) 7196 (gpc_stride - 1)))
7166 return -EINVAL; 7197 return -EINVAL;
7167 7198
7168 base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); 7199 base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
@@ -7242,6 +7273,9 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7242 u32 i, count = 0; 7273 u32 i, count = 0;
7243 u32 offset = 0; 7274 u32 offset = 0;
7244 struct ctxsw_buf_offset_map_entry *map; 7275 struct ctxsw_buf_offset_map_entry *map;
7276 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
7277 u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
7278 u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
7245 7279
7246 if (hwpm_ctxsw_buffer_size == 0) { 7280 if (hwpm_ctxsw_buffer_size == 0) {
7247 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, 7281 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
@@ -7289,8 +7323,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7289 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, 7323 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
7290 &count, &offset, 7324 &count, &offset,
7291 hwpm_ctxsw_reg_count_max, 0, 7325 hwpm_ctxsw_reg_count_max, 0,
7292 proj_scal_litter_num_fbpas_v(), 7326 num_fbpas, fbpa_stride, ~0))
7293 proj_fbpa_stride_v(), ~0))
7294 goto cleanup; 7327 goto cleanup;
7295 7328
7296 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ 7329 /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
@@ -7298,7 +7331,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7298 &g->gr.ctx_vars.ctxsw_regs.pm_ltc, 7331 &g->gr.ctx_vars.ctxsw_regs.pm_ltc,
7299 &count, &offset, 7332 &count, &offset,
7300 hwpm_ctxsw_reg_count_max, 0, 7333 hwpm_ctxsw_reg_count_max, 0,
7301 g->ltc_count, proj_ltc_stride_v(), ~0)) 7334 g->ltc_count, ltc_stride, ~0))
7302 goto cleanup; 7335 goto cleanup;
7303 7336
7304 offset = ALIGN(offset, 256); 7337 offset = ALIGN(offset, 256);
@@ -7737,25 +7770,28 @@ void gk20a_init_gr(struct gk20a *g)
7737 init_waitqueue_head(&g->gr.init_wq); 7770 init_waitqueue_head(&g->gr.init_wq);
7738} 7771}
7739 7772
7740static bool gr_gk20a_is_tpc_addr(u32 addr) 7773static bool gr_gk20a_is_tpc_addr(struct gk20a *g, u32 addr)
7741{ 7774{
7742 return ((addr >= proj_tpc_in_gpc_base_v()) && 7775 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7743 (addr < proj_tpc_in_gpc_base_v() + 7776 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7744 (proj_scal_litter_num_tpc_per_gpc_v() * 7777 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
7745 proj_tpc_in_gpc_stride_v()))) 7778 return ((addr >= tpc_in_gpc_base) &&
7746 || pri_is_tpc_addr_shared(addr); 7779 (addr < tpc_in_gpc_base +
7780 (num_tpc_per_gpc * tpc_in_gpc_stride)))
7781 || pri_is_tpc_addr_shared(g, addr);
7747} 7782}
7748 7783
7749static u32 gr_gk20a_get_tpc_num(u32 addr) 7784static u32 gr_gk20a_get_tpc_num(struct gk20a *g, u32 addr)
7750{ 7785{
7751 u32 i, start; 7786 u32 i, start;
7752 u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); 7787 u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
7788 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
7789 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7753 7790
7754 for (i = 0; i < num_tpcs; i++) { 7791 for (i = 0; i < num_tpcs; i++) {
7755 start = proj_tpc_in_gpc_base_v() + 7792 start = tpc_in_gpc_base + (i * tpc_in_gpc_stride);
7756 (i * proj_tpc_in_gpc_stride_v());
7757 if ((addr >= start) && 7793 if ((addr >= start) &&
7758 (addr < (start + proj_tpc_in_gpc_stride_v()))) 7794 (addr < (start + tpc_in_gpc_stride)))
7759 return i; 7795 return i;
7760 } 7796 }
7761 return 0; 7797 return 0;
@@ -7768,8 +7804,10 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
7768 bool no_error_pending; 7804 bool no_error_pending;
7769 u32 delay = GR_IDLE_CHECK_DEFAULT; 7805 u32 delay = GR_IDLE_CHECK_DEFAULT;
7770 bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g); 7806 bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
7807 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7808 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7771 u32 offset = 7809 u32 offset =
7772 proj_gpc_stride_v() * gpc + proj_tpc_in_gpc_stride_v() * tpc; 7810 gpc_stride * gpc + tpc_in_gpc_stride * tpc;
7773 7811
7774 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, 7812 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg,
7775 "GPC%d TPC%d: locking down SM", gpc, tpc); 7813 "GPC%d TPC%d: locking down SM", gpc, tpc);
@@ -7828,9 +7866,9 @@ void gk20a_suspend_single_sm(struct gk20a *g,
7828 u32 offset; 7866 u32 offset;
7829 int err; 7867 int err;
7830 u32 dbgr_control0; 7868 u32 dbgr_control0;
7831 7869 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7832 offset = proj_gpc_stride_v() * gpc + 7870 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7833 proj_tpc_in_gpc_stride_v() * tpc; 7871 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
7834 7872
7835 /* if an SM debugger isn't attached, skip suspend */ 7873 /* if an SM debugger isn't attached, skip suspend */
7836 if (!gk20a_gr_sm_debugger_attached(g)) { 7874 if (!gk20a_gr_sm_debugger_attached(g)) {
@@ -7899,6 +7937,8 @@ void gk20a_resume_single_sm(struct gk20a *g,
7899{ 7937{
7900 u32 dbgr_control0; 7938 u32 dbgr_control0;
7901 u32 offset; 7939 u32 offset;
7940 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
7941 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
7902 /* 7942 /*
7903 * The following requires some clarification. Despite the fact that both 7943 * The following requires some clarification. Despite the fact that both
7904 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their 7944 * RUN_TRIGGER and STOP_TRIGGER have the word "TRIGGER" in their
@@ -7912,8 +7952,7 @@ void gk20a_resume_single_sm(struct gk20a *g,
7912 * effect, before enabling the run trigger. 7952 * effect, before enabling the run trigger.
7913 */ 7953 */
7914 7954
7915 offset = proj_gpc_stride_v() * gpc + 7955 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
7916 proj_tpc_in_gpc_stride_v() * tpc;
7917 7956
7918 /*De-assert stop trigger */ 7957 /*De-assert stop trigger */
7919 dbgr_control0 = 7958 dbgr_control0 =
@@ -8144,6 +8183,8 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
8144{ 8183{
8145 struct nvgpu_dbg_gpu_reg_op *ops; 8184 struct nvgpu_dbg_gpu_reg_op *ops;
8146 int i = 0, sm_id, err; 8185 int i = 0, sm_id, err;
8186 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
8187 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
8147 8188
8148 ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL); 8189 ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL);
8149 if (!ops) 8190 if (!ops)
@@ -8158,8 +8199,8 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
8158 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 8199 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8159 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 8200 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8160 8201
8161 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; 8202 tpc_offset = tpc_in_gpc_stride * tpc;
8162 gpc_offset = proj_gpc_stride_v() * gpc; 8203 gpc_offset = gpc_stride * gpc;
8163 reg_offset = tpc_offset + gpc_offset; 8204 reg_offset = tpc_offset + gpc_offset;
8164 8205
8165 ops[i].op = REGOP(WRITE_32); 8206 ops[i].op = REGOP(WRITE_32);
@@ -8199,13 +8240,15 @@ static void gr_gk20a_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
8199 u32 gpc, tpc, sm_id; 8240 u32 gpc, tpc, sm_id;
8200 u32 tpc_offset, gpc_offset, reg_offset; 8241 u32 tpc_offset, gpc_offset, reg_offset;
8201 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; 8242 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
8243 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
8244 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
8202 8245
8203 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { 8246 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
8204 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 8247 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
8205 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 8248 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
8206 8249
8207 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; 8250 tpc_offset = tpc_in_gpc_stride * tpc;
8208 gpc_offset = proj_gpc_stride_v() * gpc; 8251 gpc_offset = gpc_stride * gpc;
8209 reg_offset = tpc_offset + gpc_offset; 8252 reg_offset = tpc_offset + gpc_offset;
8210 8253
8211 /* 64 bit read */ 8254 /* 64 bit read */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
index 0f70e8aa..248fa291 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
@@ -34,30 +34,37 @@ static inline u32 pri_gpccs_addr_mask(u32 addr)
34{ 34{
35 return addr & ((1 << pri_gpccs_addr_width()) - 1); 35 return addr & ((1 << pri_gpccs_addr_width()) - 1);
36} 36}
37static inline u32 pri_gpc_addr(u32 addr, u32 gpc) 37static inline u32 pri_gpc_addr(struct gk20a *g, u32 addr, u32 gpc)
38{ 38{
39 return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + addr; 39 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
40 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
41 return gpc_base + (gpc * gpc_stride) + addr;
40} 42}
41static inline bool pri_is_gpc_addr_shared(u32 addr) 43static inline bool pri_is_gpc_addr_shared(struct gk20a *g, u32 addr)
42{ 44{
43 return (addr >= proj_gpc_shared_base_v()) && 45 u32 gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_SHARED_BASE);
44 (addr < proj_gpc_shared_base_v() + proj_gpc_stride_v()); 46 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
47 return (addr >= gpc_shared_base) &&
48 (addr < gpc_shared_base + gpc_stride);
45} 49}
46static inline bool pri_is_gpc_addr(u32 addr) 50static inline bool pri_is_gpc_addr(struct gk20a *g, u32 addr)
47{ 51{
48 return ((addr >= proj_gpc_base_v()) && 52 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
49 (addr < proj_gpc_base_v() + 53 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
50 proj_scal_litter_num_gpcs_v() * proj_gpc_stride_v())) || 54 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
51 pri_is_gpc_addr_shared(addr); 55 return ((addr >= gpc_base) &&
56 (addr < gpc_base) + num_gpcs * gpc_stride) ||
57 pri_is_gpc_addr_shared(g, addr);
52} 58}
53static inline u32 pri_get_gpc_num(u32 addr) 59static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr)
54{ 60{
55 u32 i, start; 61 u32 i, start;
56 u32 num_gpcs = proj_scal_litter_num_gpcs_v(); 62 u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
57 63 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
64 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
58 for (i = 0; i < num_gpcs; i++) { 65 for (i = 0; i < num_gpcs; i++) {
59 start = proj_gpc_base_v() + (i * proj_gpc_stride_v()); 66 start = gpc_base + (i * gpc_stride);
60 if ((addr >= start) && (addr < (start + proj_gpc_stride_v()))) 67 if ((addr >= start) && (addr < (start + gpc_stride)))
61 return i; 68 return i;
62 } 69 }
63 return 0; 70 return 0;
@@ -73,17 +80,23 @@ static inline u32 pri_tpccs_addr_mask(u32 addr)
73{ 80{
74 return addr & ((1 << pri_tpccs_addr_width()) - 1); 81 return addr & ((1 << pri_tpccs_addr_width()) - 1);
75} 82}
76static inline u32 pri_tpc_addr(u32 addr, u32 gpc, u32 tpc) 83static inline u32 pri_tpc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 tpc)
77{ 84{
78 return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + 85 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
79 proj_tpc_in_gpc_base_v() + (tpc * proj_tpc_in_gpc_stride_v()) + 86 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
87 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
88 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
89 return gpc_base + (gpc * gpc_stride) +
90 tpc_in_gpc_base + (tpc * tpc_in_gpc_stride) +
80 addr; 91 addr;
81} 92}
82static inline bool pri_is_tpc_addr_shared(u32 addr) 93static inline bool pri_is_tpc_addr_shared(struct gk20a *g, u32 addr)
83{ 94{
84 return (addr >= proj_tpc_in_gpc_shared_base_v()) && 95 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
85 (addr < (proj_tpc_in_gpc_shared_base_v() + 96 u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
86 proj_tpc_in_gpc_stride_v())); 97 return (addr >= tpc_in_gpc_shared_base) &&
98 (addr < (tpc_in_gpc_shared_base +
99 tpc_in_gpc_stride));
87} 100}
88 101
89/* 102/*
@@ -97,29 +110,37 @@ static inline u32 pri_becs_addr_mask(u32 addr)
97{ 110{
98 return addr & ((1 << pri_becs_addr_width()) - 1); 111 return addr & ((1 << pri_becs_addr_width()) - 1);
99} 112}
100static inline bool pri_is_be_addr_shared(u32 addr) 113static inline bool pri_is_be_addr_shared(struct gk20a *g, u32 addr)
101{ 114{
102 return (addr >= proj_rop_shared_base_v()) && 115 u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE);
103 (addr < proj_rop_shared_base_v() + proj_rop_stride_v()); 116 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
117 return (addr >= rop_shared_base) &&
118 (addr < rop_shared_base + rop_stride);
104} 119}
105static inline u32 pri_be_shared_addr(u32 addr) 120static inline u32 pri_be_shared_addr(struct gk20a *g, u32 addr)
106{ 121{
107 return proj_rop_shared_base_v() + pri_becs_addr_mask(addr); 122 u32 rop_shared_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_SHARED_BASE);
123 return rop_shared_base + pri_becs_addr_mask(addr);
108} 124}
109static inline bool pri_is_be_addr(u32 addr) 125static inline bool pri_is_be_addr(struct gk20a *g, u32 addr)
110{ 126{
111 return ((addr >= proj_rop_base_v()) && 127 u32 num_fbps = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS);
112 (addr < proj_rop_base_v()+proj_scal_litter_num_fbps_v() * proj_rop_stride_v())) || 128 u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE);
113 pri_is_be_addr_shared(addr); 129 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
130 return ((addr >= rop_base) &&
131 (addr < rop_base + num_fbps * rop_stride)) ||
132 pri_is_be_addr_shared(g, addr);
114} 133}
115 134
116static inline u32 pri_get_be_num(u32 addr) 135static inline u32 pri_get_be_num(struct gk20a *g, u32 addr)
117{ 136{
118 u32 i, start; 137 u32 i, start;
119 u32 num_fbps = proj_scal_litter_num_fbps_v(); 138 u32 num_fbps = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPS);
139 u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE);
140 u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);
120 for (i = 0; i < num_fbps; i++) { 141 for (i = 0; i < num_fbps; i++) {
121 start = proj_rop_base_v() + (i * proj_rop_stride_v()); 142 start = rop_base + (i * rop_stride);
122 if ((addr >= start) && (addr < (start + proj_rop_stride_v()))) 143 if ((addr >= start) && (addr < (start + rop_stride)))
123 return i; 144 return i;
124 } 145 }
125 return 0; 146 return 0;
@@ -136,10 +157,14 @@ static inline u32 pri_ppccs_addr_mask(u32 addr)
136{ 157{
137 return addr & ((1 << pri_ppccs_addr_width()) - 1); 158 return addr & ((1 << pri_ppccs_addr_width()) - 1);
138} 159}
139static inline u32 pri_ppc_addr(u32 addr, u32 gpc, u32 ppc) 160static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc)
140{ 161{
141 return proj_gpc_base_v() + (gpc * proj_gpc_stride_v()) + 162 u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
142 proj_ppc_in_gpc_base_v() + (ppc * proj_ppc_in_gpc_stride_v()) + addr; 163 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
164 u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
165 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
166 return gpc_base + (gpc * gpc_stride) +
167 ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr;
143} 168}
144 169
145enum ctxsw_addr_type { 170enum ctxsw_addr_type {
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 6df8f37c..fb3b3e55 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -29,6 +29,7 @@
29#include "clk_gk20a.h" 29#include "clk_gk20a.h"
30#include "regops_gk20a.h" 30#include "regops_gk20a.h"
31#include "therm_gk20a.h" 31#include "therm_gk20a.h"
32#include "hw_proj_gk20a.h"
32 33
33static struct gpu_ops gk20a_ops = { 34static struct gpu_ops gk20a_ops = {
34 .clock_gating = { 35 .clock_gating = {
@@ -47,6 +48,78 @@ static struct gpu_ops gk20a_ops = {
47 }, 48 },
48}; 49};
49 50
51static int gk20a_get_litter_value(struct gk20a *g,
52 enum nvgpu_litter_value value)
53{
54 int ret = EINVAL;
55 switch (value) {
56 case GPU_LIT_NUM_GPCS:
57 ret = proj_scal_litter_num_gpcs_v();
58 break;
59 case GPU_LIT_NUM_PES_PER_GPC:
60 ret = proj_scal_litter_num_pes_per_gpc_v();
61 break;
62 case GPU_LIT_NUM_ZCULL_BANKS:
63 ret = proj_scal_litter_num_zcull_banks_v();
64 break;
65 case GPU_LIT_NUM_TPC_PER_GPC:
66 ret = proj_scal_litter_num_tpc_per_gpc_v();
67 break;
68 case GPU_LIT_NUM_FBPS:
69 ret = proj_scal_litter_num_fbps_v();
70 break;
71 case GPU_LIT_GPC_BASE:
72 ret = proj_gpc_base_v();
73 break;
74 case GPU_LIT_GPC_STRIDE:
75 ret = proj_gpc_stride_v();
76 break;
77 case GPU_LIT_GPC_SHARED_BASE:
78 ret = proj_gpc_shared_base_v();
79 break;
80 case GPU_LIT_TPC_IN_GPC_BASE:
81 ret = proj_tpc_in_gpc_base_v();
82 break;
83 case GPU_LIT_TPC_IN_GPC_STRIDE:
84 ret = proj_tpc_in_gpc_stride_v();
85 break;
86 case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
87 ret = proj_tpc_in_gpc_shared_base_v();
88 break;
89 case GPU_LIT_PPC_IN_GPC_STRIDE:
90 ret = proj_ppc_in_gpc_stride_v();
91 break;
92 case GPU_LIT_ROP_BASE:
93 ret = proj_rop_base_v();
94 break;
95 case GPU_LIT_ROP_STRIDE:
96 ret = proj_rop_stride_v();
97 break;
98 case GPU_LIT_ROP_SHARED_BASE:
99 ret = proj_rop_shared_base_v();
100 break;
101 case GPU_LIT_HOST_NUM_PBDMA:
102 ret = proj_host_num_pbdma_v();
103 break;
104 case GPU_LIT_LTC_STRIDE:
105 ret = proj_ltc_stride_v();
106 break;
107 case GPU_LIT_LTS_STRIDE:
108 ret = proj_lts_stride_v();
109 break;
110 case GPU_LIT_NUM_FBPAS:
111 ret = proj_scal_litter_num_fbpas_v();
112 break;
113 case GPU_LIT_FBPA_STRIDE:
114 ret = proj_fbpa_stride_v();
115 break;
116 default:
117 break;
118 }
119
120 return ret;
121}
122
50int gk20a_init_hal(struct gk20a *g) 123int gk20a_init_hal(struct gk20a *g)
51{ 124{
52 struct gpu_ops *gops = &g->ops; 125 struct gpu_ops *gops = &g->ops;
@@ -71,6 +144,7 @@ int gk20a_init_hal(struct gk20a *g)
71 gk20a_init_therm_ops(gops); 144 gk20a_init_therm_ops(gops);
72 gops->name = "gk20a"; 145 gops->name = "gk20a";
73 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; 146 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
147 gops->get_litter_value = gk20a_get_litter_value;
74 148
75 c->twod_class = FERMI_TWOD_A; 149 c->twod_class = FERMI_TWOD_A;
76 c->threed_class = KEPLER_C; 150 c->threed_class = KEPLER_C;
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 963f6bb7..0ce4f91a 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -20,7 +20,6 @@
20#include <trace/events/gk20a.h> 20#include <trace/events/gk20a.h>
21 21
22#include "hw_ltc_gk20a.h" 22#include "hw_ltc_gk20a.h"
23#include "hw_proj_gk20a.h"
24 23
25#include "ltc_common.c" 24#include "ltc_common.c"
26 25
@@ -108,6 +107,8 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
108 u32 slices_per_fbp = 107 u32 slices_per_fbp =
109 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( 108 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
110 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 109 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
110 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
111 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
111 112
112 gk20a_dbg_fn(""); 113 gk20a_dbg_fn("");
113 114
@@ -140,8 +141,8 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
140 141
141 142
142 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 143 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
143 fbp * proj_ltc_stride_v() + 144 fbp * ltc_stride +
144 slice * proj_lts_stride_v(); 145 slice * lts_stride;
145 146
146 retry = 200; 147 retry = 200;
147 do { 148 do {
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 050c2bee..b49f2301 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -27,7 +27,6 @@
27#include "hw_fifo_gm20b.h" 27#include "hw_fifo_gm20b.h"
28#include "hw_fb_gm20b.h" 28#include "hw_fb_gm20b.h"
29#include "hw_top_gm20b.h" 29#include "hw_top_gm20b.h"
30#include "hw_proj_gm20b.h"
31#include "hw_ctxsw_prog_gm20b.h" 30#include "hw_ctxsw_prog_gm20b.h"
32#include "hw_fuse_gm20b.h" 31#include "hw_fuse_gm20b.h"
33#include "pmu_gm20b.h" 32#include "pmu_gm20b.h"
@@ -178,6 +177,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
178 u32 gpc_index, ppc_index; 177 u32 gpc_index, ppc_index;
179 u32 temp; 178 u32 temp;
180 u32 cbm_cfg_size1, cbm_cfg_size2; 179 u32 cbm_cfg_size1, cbm_cfg_size2;
180 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
181 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
181 182
182 gk20a_dbg_fn(""); 183 gk20a_dbg_fn("");
183 184
@@ -198,7 +199,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
198 gr->tpc_count * gr->attrib_cb_size; 199 gr->tpc_count * gr->attrib_cb_size;
199 200
200 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 201 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
201 temp = proj_gpc_stride_v() * gpc_index; 202 temp = gpc_stride * gpc_index;
202 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 203 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
203 ppc_index++) { 204 ppc_index++) {
204 cbm_cfg_size1 = gr->attrib_cb_default_size * 205 cbm_cfg_size1 = gr->attrib_cb_default_size *
@@ -208,12 +209,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
208 209
209 gr_gk20a_ctx_patch_write(g, ch_ctx, 210 gr_gk20a_ctx_patch_write(g, ch_ctx,
210 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + 211 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
211 proj_ppc_in_gpc_stride_v() * ppc_index, 212 ppc_in_gpc_stride * ppc_index,
212 cbm_cfg_size1, patch); 213 cbm_cfg_size1, patch);
213 214
214 gr_gk20a_ctx_patch_write(g, ch_ctx, 215 gr_gk20a_ctx_patch_write(g, ch_ctx,
215 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + 216 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
216 proj_ppc_in_gpc_stride_v() * ppc_index, 217 ppc_in_gpc_stride * ppc_index,
217 attrib_offset_in_chunk, patch); 218 attrib_offset_in_chunk, patch);
218 219
219 attrib_offset_in_chunk += gr->attrib_cb_size * 220 attrib_offset_in_chunk += gr->attrib_cb_size *
@@ -221,12 +222,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
221 222
222 gr_gk20a_ctx_patch_write(g, ch_ctx, 223 gr_gk20a_ctx_patch_write(g, ch_ctx,
223 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + 224 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
224 proj_ppc_in_gpc_stride_v() * ppc_index, 225 ppc_in_gpc_stride * ppc_index,
225 cbm_cfg_size2, patch); 226 cbm_cfg_size2, patch);
226 227
227 gr_gk20a_ctx_patch_write(g, ch_ctx, 228 gr_gk20a_ctx_patch_write(g, ch_ctx,
228 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + 229 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
229 proj_ppc_in_gpc_stride_v() * ppc_index, 230 ppc_in_gpc_stride * ppc_index,
230 alpha_offset_in_chunk, patch); 231 alpha_offset_in_chunk, patch);
231 232
232 alpha_offset_in_chunk += gr->alpha_cb_size * 233 alpha_offset_in_chunk += gr->alpha_cb_size *
@@ -297,6 +298,8 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
297 u32 gpc_index, ppc_index, stride, val; 298 u32 gpc_index, ppc_index, stride, val;
298 u32 pd_ab_max_output; 299 u32 pd_ab_max_output;
299 u32 alpha_cb_size = data * 4; 300 u32 alpha_cb_size = data * 4;
301 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
302 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
300 303
301 gk20a_dbg_fn(""); 304 gk20a_dbg_fn("");
302 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) 305 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
@@ -319,14 +322,14 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
319 gr_pd_ab_dist_cfg1_max_batches_init_f()); 322 gr_pd_ab_dist_cfg1_max_batches_init_f());
320 323
321 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 324 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
322 stride = proj_gpc_stride_v() * gpc_index; 325 stride = gpc_stride * gpc_index;
323 326
324 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 327 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
325 ppc_index++) { 328 ppc_index++) {
326 329
327 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + 330 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
328 stride + 331 stride +
329 proj_ppc_in_gpc_stride_v() * ppc_index); 332 ppc_in_gpc_stride * ppc_index);
330 333
331 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), 334 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
332 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * 335 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
@@ -334,7 +337,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
334 337
335 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + 338 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
336 stride + 339 stride +
337 proj_ppc_in_gpc_stride_v() * ppc_index, val); 340 ppc_in_gpc_stride * ppc_index, val);
338 } 341 }
339 } 342 }
340} 343}
@@ -344,6 +347,8 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
344 struct gr_gk20a *gr = &g->gr; 347 struct gr_gk20a *gr = &g->gr;
345 u32 gpc_index, ppc_index, stride, val; 348 u32 gpc_index, ppc_index, stride, val;
346 u32 cb_size = data * 4; 349 u32 cb_size = data * 4;
350 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
351 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
347 352
348 gk20a_dbg_fn(""); 353 gk20a_dbg_fn("");
349 354
@@ -356,14 +361,14 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
356 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); 361 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
357 362
358 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 363 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
359 stride = proj_gpc_stride_v() * gpc_index; 364 stride = gpc_stride * gpc_index;
360 365
361 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 366 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
362 ppc_index++) { 367 ppc_index++) {
363 368
364 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + 369 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
365 stride + 370 stride +
366 proj_ppc_in_gpc_stride_v() * ppc_index); 371 ppc_in_gpc_stride * ppc_index);
367 372
368 val = set_field(val, 373 val = set_field(val,
369 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), 374 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
@@ -372,7 +377,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
372 377
373 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + 378 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
374 stride + 379 stride +
375 proj_ppc_in_gpc_stride_v() * ppc_index, val); 380 ppc_in_gpc_stride * ppc_index, val);
376 381
377 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( 382 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
378 ppc_index + gpc_index)); 383 ppc_index + gpc_index));
@@ -527,14 +532,16 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
527 u32 tpc_per_gpc = 0; 532 u32 tpc_per_gpc = 0;
528 u32 tpc_sm_id = 0, gpc_tpc_id = 0; 533 u32 tpc_sm_id = 0, gpc_tpc_id = 0;
529 u32 pes_tpc_mask = 0, pes_index; 534 u32 pes_tpc_mask = 0, pes_index;
535 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
536 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
530 537
531 gk20a_dbg_fn(""); 538 gk20a_dbg_fn("");
532 539
533 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 540 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
534 gpc_offset = proj_gpc_stride_v() * gpc_index; 541 gpc_offset = gpc_stride * gpc_index;
535 for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; 542 for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
536 tpc_index++) { 543 tpc_index++) {
537 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; 544 tpc_offset = tpc_in_gpc_stride * tpc_index;
538 545
539 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() 546 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r()
540 + gpc_offset + tpc_offset, 547 + gpc_offset + tpc_offset,
@@ -640,32 +647,37 @@ static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
640 return 0; 647 return 0;
641} 648}
642 649
643static bool gr_gm20b_is_tpc_addr_shared(u32 addr) 650static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
644{ 651{
645 return (addr >= proj_tpc_in_gpc_shared_base_v()) && 652 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
646 (addr < (proj_tpc_in_gpc_shared_base_v() + 653 u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
647 proj_tpc_in_gpc_stride_v())); 654 return (addr >= tpc_in_gpc_shared_base) &&
655 (addr < (tpc_in_gpc_shared_base +
656 tpc_in_gpc_stride));
648} 657}
649 658
650static bool gr_gm20b_is_tpc_addr(u32 addr) 659static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
651{ 660{
652 return ((addr >= proj_tpc_in_gpc_base_v()) && 661 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
653 (addr < proj_tpc_in_gpc_base_v() + 662 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
654 (proj_scal_litter_num_tpc_per_gpc_v() * 663 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
655 proj_tpc_in_gpc_stride_v()))) 664 return ((addr >= tpc_in_gpc_base) &&
656 || gr_gm20b_is_tpc_addr_shared(addr); 665 (addr < tpc_in_gpc_base +
666 (num_tpc_per_gpc * tpc_in_gpc_stride)))
667 || gr_gm20b_is_tpc_addr_shared(g, addr);
657} 668}
658 669
659static u32 gr_gm20b_get_tpc_num(u32 addr) 670static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
660{ 671{
661 u32 i, start; 672 u32 i, start;
662 u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); 673 u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
674 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
675 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
663 676
664 for (i = 0; i < num_tpcs; i++) { 677 for (i = 0; i < num_tpcs; i++) {
665 start = proj_tpc_in_gpc_base_v() + 678 start = tpc_in_gpc_base + (i * tpc_in_gpc_stride);
666 (i * proj_tpc_in_gpc_stride_v());
667 if ((addr >= start) && 679 if ((addr >= start) &&
668 (addr < (start + proj_tpc_in_gpc_stride_v()))) 680 (addr < (start + tpc_in_gpc_stride)))
669 return i; 681 return i;
670 } 682 }
671 return 0; 683 return 0;
@@ -1066,6 +1078,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1066 u32 gpc, tpc, sm_id; 1078 u32 gpc, tpc, sm_id;
1067 u32 tpc_offset, gpc_offset, reg_offset; 1079 u32 tpc_offset, gpc_offset, reg_offset;
1068 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; 1080 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
1081 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1082 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1069 1083
1070 /* for maxwell & kepler */ 1084 /* for maxwell & kepler */
1071 u32 numSmPerTpc = 1; 1085 u32 numSmPerTpc = 1;
@@ -1075,8 +1089,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1075 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 1089 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1076 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 1090 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1077 1091
1078 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; 1092 tpc_offset = tpc_in_gpc_stride * tpc;
1079 gpc_offset = proj_gpc_stride_v() * gpc; 1093 gpc_offset = gpc_stride * gpc;
1080 reg_offset = tpc_offset + gpc_offset; 1094 reg_offset = tpc_offset + gpc_offset;
1081 1095
1082 /* 64 bit read */ 1096 /* 64 bit read */
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 559fee61..df25be5e 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -34,6 +34,7 @@
34#include "debug_gm20b.h" 34#include "debug_gm20b.h"
35#include "cde_gm20b.h" 35#include "cde_gm20b.h"
36#include "therm_gm20b.h" 36#include "therm_gm20b.h"
37#include "hw_proj_gm20b.h"
37 38
38#define FUSE_OPT_PRIV_SEC_DIS_0 0x264 39#define FUSE_OPT_PRIV_SEC_DIS_0 0x264
39#define PRIV_SECURITY_DISABLE 0x01 40#define PRIV_SECURITY_DISABLE 0x01
@@ -89,6 +90,78 @@ static struct gpu_ops gm20b_ops = {
89 }, 90 },
90}; 91};
91 92
93static int gm20b_get_litter_value(struct gk20a *g,
94 enum nvgpu_litter_value value)
95{
96 int ret = EINVAL;
97 switch (value) {
98 case GPU_LIT_NUM_GPCS:
99 ret = proj_scal_litter_num_gpcs_v();
100 break;
101 case GPU_LIT_NUM_PES_PER_GPC:
102 ret = proj_scal_litter_num_pes_per_gpc_v();
103 break;
104 case GPU_LIT_NUM_ZCULL_BANKS:
105 ret = proj_scal_litter_num_zcull_banks_v();
106 break;
107 case GPU_LIT_NUM_TPC_PER_GPC:
108 ret = proj_scal_litter_num_tpc_per_gpc_v();
109 break;
110 case GPU_LIT_NUM_FBPS:
111 ret = proj_scal_litter_num_fbps_v();
112 break;
113 case GPU_LIT_GPC_BASE:
114 ret = proj_gpc_base_v();
115 break;
116 case GPU_LIT_GPC_STRIDE:
117 ret = proj_gpc_stride_v();
118 break;
119 case GPU_LIT_GPC_SHARED_BASE:
120 ret = proj_gpc_shared_base_v();
121 break;
122 case GPU_LIT_TPC_IN_GPC_BASE:
123 ret = proj_tpc_in_gpc_base_v();
124 break;
125 case GPU_LIT_TPC_IN_GPC_STRIDE:
126 ret = proj_tpc_in_gpc_stride_v();
127 break;
128 case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
129 ret = proj_tpc_in_gpc_shared_base_v();
130 break;
131 case GPU_LIT_PPC_IN_GPC_STRIDE:
132 ret = proj_ppc_in_gpc_stride_v();
133 break;
134 case GPU_LIT_ROP_BASE:
135 ret = proj_rop_base_v();
136 break;
137 case GPU_LIT_ROP_STRIDE:
138 ret = proj_rop_stride_v();
139 break;
140 case GPU_LIT_ROP_SHARED_BASE:
141 ret = proj_rop_shared_base_v();
142 break;
143 case GPU_LIT_HOST_NUM_PBDMA:
144 ret = proj_host_num_pbdma_v();
145 break;
146 case GPU_LIT_LTC_STRIDE:
147 ret = proj_ltc_stride_v();
148 break;
149 case GPU_LIT_LTS_STRIDE:
150 ret = proj_lts_stride_v();
151 break;
152 case GPU_LIT_NUM_FBPAS:
153 ret = proj_scal_litter_num_fbpas_v();
154 break;
155 case GPU_LIT_FBPA_STRIDE:
156 ret = proj_fbpa_stride_v();
157 break;
158 default:
159 break;
160 }
161
162 return ret;
163}
164
92int gm20b_init_hal(struct gk20a *g) 165int gm20b_init_hal(struct gk20a *g)
93{ 166{
94 struct gpu_ops *gops = &g->ops; 167 struct gpu_ops *gops = &g->ops;
@@ -140,6 +213,7 @@ int gm20b_init_hal(struct gk20a *g)
140 gm20b_init_therm_ops(gops); 213 gm20b_init_therm_ops(gops);
141 gops->name = "gm20b"; 214 gops->name = "gm20b";
142 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; 215 gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
216 gops->get_litter_value = gm20b_get_litter_value;
143 217
144 c->twod_class = FERMI_TWOD_A; 218 c->twod_class = FERMI_TWOD_A;
145 c->threed_class = MAXWELL_B; 219 c->threed_class = MAXWELL_B;
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index e4e27764..4fc9d51b 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -20,7 +20,6 @@
20#include "hw_mc_gm20b.h" 20#include "hw_mc_gm20b.h"
21#include "hw_ltc_gm20b.h" 21#include "hw_ltc_gm20b.h"
22#include "hw_top_gm20b.h" 22#include "hw_top_gm20b.h"
23#include "hw_proj_gm20b.h"
24#include "hw_pri_ringmaster_gm20b.h" 23#include "hw_pri_ringmaster_gm20b.h"
25 24
26#include "gk20a/ltc_common.c" 25#include "gk20a/ltc_common.c"
@@ -109,6 +108,8 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
109 s32 retry = 200; 108 s32 retry = 200;
110 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( 109 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
111 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 110 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
111 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
112 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
112 113
113 gk20a_dbg_fn(""); 114 gk20a_dbg_fn("");
114 115
@@ -139,8 +140,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
139 for (slice = 0; slice < slices_per_ltc; slice++) { 140 for (slice = 0; slice < slices_per_ltc; slice++) {
140 141
141 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 142 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
142 ltc * proj_ltc_stride_v() + 143 ltc * ltc_stride + slice * lts_stride;
143 slice * proj_lts_stride_v();
144 144
145 retry = 200; 145 retry = 200;
146 do { 146 do {
@@ -198,6 +198,8 @@ void gm20b_ltc_isr(struct gk20a *g)
198{ 198{
199 u32 mc_intr, ltc_intr; 199 u32 mc_intr, ltc_intr;
200 int ltc, slice; 200 int ltc, slice;
201 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
202 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
201 203
202 mc_intr = gk20a_readl(g, mc_intr_ltc_r()); 204 mc_intr = gk20a_readl(g, mc_intr_ltc_r());
203 gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", 205 gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x",
@@ -207,13 +209,13 @@ void gm20b_ltc_isr(struct gk20a *g)
207 continue; 209 continue;
208 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { 210 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
209 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + 211 ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
210 proj_ltc_stride_v() * ltc + 212 ltc_stride * ltc +
211 proj_lts_stride_v() * slice); 213 lts_stride * slice);
212 gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", 214 gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
213 ltc, slice, ltc_intr); 215 ltc, slice, ltc_intr);
214 gk20a_writel(g, ltc_ltc0_lts0_intr_r() + 216 gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
215 proj_ltc_stride_v() * ltc + 217 ltc_stride * ltc +
216 proj_lts_stride_v() * slice, 218 lts_stride * slice,
217 ltc_intr); 219 ltc_intr);
218 } 220 }
219 } 221 }
@@ -287,6 +289,7 @@ void gm20b_flush_ltc(struct gk20a *g)
287{ 289{
288 unsigned long timeout; 290 unsigned long timeout;
289 int ltc; 291 int ltc;
292 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
290 293
291#define __timeout_init() \ 294#define __timeout_init() \
292 do { \ 295 do { \
@@ -317,7 +320,7 @@ void gm20b_flush_ltc(struct gk20a *g)
317 __timeout_init(); 320 __timeout_init();
318 do { 321 do {
319 int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + 322 int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
320 ltc * proj_ltc_stride_v(); 323 ltc * ltc_stride;
321 op_pending = gk20a_readl(g, cmgmt1); 324 op_pending = gk20a_readl(g, cmgmt1);
322 __timeout_check(); 325 __timeout_check();
323 } while (op_pending & 326 } while (op_pending &
@@ -338,7 +341,7 @@ void gm20b_flush_ltc(struct gk20a *g)
338 __timeout_init(); 341 __timeout_init();
339 do { 342 do {
340 int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + 343 int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
341 ltc * proj_ltc_stride_v(); 344 ltc * ltc_stride;
342 op_pending = gk20a_readl(g, cmgmt0); 345 op_pending = gk20a_readl(g, cmgmt0);
343 __timeout_check(); 346 __timeout_check();
344 } while (op_pending & 347 } while (op_pending &