summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-04-06 16:10:32 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-15 11:48:20 -0400
commit6839341bf8ffafa115cfc0427bba694ee1d131f3 (patch)
tree1f9369a3bacf0f1a2cc23371f5de988efdc07c31 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent61e009c0f8874898335e6c47a610233c3382be47 (diff)
gpu: nvgpu: Add litter values HAL
Move per-chip constants to be returned by a chip specific function. Implement get_litter_value() for each chip. Change-Id: I2a2730fce14010924d2507f6fa15cc2ea0795113 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1121383
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c76
1 files changed, 45 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 050c2bee..b49f2301 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -27,7 +27,6 @@
27#include "hw_fifo_gm20b.h" 27#include "hw_fifo_gm20b.h"
28#include "hw_fb_gm20b.h" 28#include "hw_fb_gm20b.h"
29#include "hw_top_gm20b.h" 29#include "hw_top_gm20b.h"
30#include "hw_proj_gm20b.h"
31#include "hw_ctxsw_prog_gm20b.h" 30#include "hw_ctxsw_prog_gm20b.h"
32#include "hw_fuse_gm20b.h" 31#include "hw_fuse_gm20b.h"
33#include "pmu_gm20b.h" 32#include "pmu_gm20b.h"
@@ -178,6 +177,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
178 u32 gpc_index, ppc_index; 177 u32 gpc_index, ppc_index;
179 u32 temp; 178 u32 temp;
180 u32 cbm_cfg_size1, cbm_cfg_size2; 179 u32 cbm_cfg_size1, cbm_cfg_size2;
180 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
181 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
181 182
182 gk20a_dbg_fn(""); 183 gk20a_dbg_fn("");
183 184
@@ -198,7 +199,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
198 gr->tpc_count * gr->attrib_cb_size; 199 gr->tpc_count * gr->attrib_cb_size;
199 200
200 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 201 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
201 temp = proj_gpc_stride_v() * gpc_index; 202 temp = gpc_stride * gpc_index;
202 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 203 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
203 ppc_index++) { 204 ppc_index++) {
204 cbm_cfg_size1 = gr->attrib_cb_default_size * 205 cbm_cfg_size1 = gr->attrib_cb_default_size *
@@ -208,12 +209,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
208 209
209 gr_gk20a_ctx_patch_write(g, ch_ctx, 210 gr_gk20a_ctx_patch_write(g, ch_ctx,
210 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + 211 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
211 proj_ppc_in_gpc_stride_v() * ppc_index, 212 ppc_in_gpc_stride * ppc_index,
212 cbm_cfg_size1, patch); 213 cbm_cfg_size1, patch);
213 214
214 gr_gk20a_ctx_patch_write(g, ch_ctx, 215 gr_gk20a_ctx_patch_write(g, ch_ctx,
215 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + 216 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
216 proj_ppc_in_gpc_stride_v() * ppc_index, 217 ppc_in_gpc_stride * ppc_index,
217 attrib_offset_in_chunk, patch); 218 attrib_offset_in_chunk, patch);
218 219
219 attrib_offset_in_chunk += gr->attrib_cb_size * 220 attrib_offset_in_chunk += gr->attrib_cb_size *
@@ -221,12 +222,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
221 222
222 gr_gk20a_ctx_patch_write(g, ch_ctx, 223 gr_gk20a_ctx_patch_write(g, ch_ctx,
223 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + 224 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
224 proj_ppc_in_gpc_stride_v() * ppc_index, 225 ppc_in_gpc_stride * ppc_index,
225 cbm_cfg_size2, patch); 226 cbm_cfg_size2, patch);
226 227
227 gr_gk20a_ctx_patch_write(g, ch_ctx, 228 gr_gk20a_ctx_patch_write(g, ch_ctx,
228 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + 229 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
229 proj_ppc_in_gpc_stride_v() * ppc_index, 230 ppc_in_gpc_stride * ppc_index,
230 alpha_offset_in_chunk, patch); 231 alpha_offset_in_chunk, patch);
231 232
232 alpha_offset_in_chunk += gr->alpha_cb_size * 233 alpha_offset_in_chunk += gr->alpha_cb_size *
@@ -297,6 +298,8 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
297 u32 gpc_index, ppc_index, stride, val; 298 u32 gpc_index, ppc_index, stride, val;
298 u32 pd_ab_max_output; 299 u32 pd_ab_max_output;
299 u32 alpha_cb_size = data * 4; 300 u32 alpha_cb_size = data * 4;
301 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
302 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
300 303
301 gk20a_dbg_fn(""); 304 gk20a_dbg_fn("");
302 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) 305 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
@@ -319,14 +322,14 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
319 gr_pd_ab_dist_cfg1_max_batches_init_f()); 322 gr_pd_ab_dist_cfg1_max_batches_init_f());
320 323
321 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 324 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
322 stride = proj_gpc_stride_v() * gpc_index; 325 stride = gpc_stride * gpc_index;
323 326
324 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 327 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
325 ppc_index++) { 328 ppc_index++) {
326 329
327 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + 330 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
328 stride + 331 stride +
329 proj_ppc_in_gpc_stride_v() * ppc_index); 332 ppc_in_gpc_stride * ppc_index);
330 333
331 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), 334 val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(),
332 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * 335 gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size *
@@ -334,7 +337,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data)
334 337
335 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + 338 gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() +
336 stride + 339 stride +
337 proj_ppc_in_gpc_stride_v() * ppc_index, val); 340 ppc_in_gpc_stride * ppc_index, val);
338 } 341 }
339 } 342 }
340} 343}
@@ -344,6 +347,8 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
344 struct gr_gk20a *gr = &g->gr; 347 struct gr_gk20a *gr = &g->gr;
345 u32 gpc_index, ppc_index, stride, val; 348 u32 gpc_index, ppc_index, stride, val;
346 u32 cb_size = data * 4; 349 u32 cb_size = data * 4;
350 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
351 u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
347 352
348 gk20a_dbg_fn(""); 353 gk20a_dbg_fn("");
349 354
@@ -356,14 +361,14 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
356 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); 361 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
357 362
358 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 363 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
359 stride = proj_gpc_stride_v() * gpc_index; 364 stride = gpc_stride * gpc_index;
360 365
361 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; 366 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
362 ppc_index++) { 367 ppc_index++) {
363 368
364 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + 369 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
365 stride + 370 stride +
366 proj_ppc_in_gpc_stride_v() * ppc_index); 371 ppc_in_gpc_stride * ppc_index);
367 372
368 val = set_field(val, 373 val = set_field(val,
369 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), 374 gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
@@ -372,7 +377,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data)
372 377
373 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + 378 gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
374 stride + 379 stride +
375 proj_ppc_in_gpc_stride_v() * ppc_index, val); 380 ppc_in_gpc_stride * ppc_index, val);
376 381
377 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( 382 val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
378 ppc_index + gpc_index)); 383 ppc_index + gpc_index));
@@ -527,14 +532,16 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
527 u32 tpc_per_gpc = 0; 532 u32 tpc_per_gpc = 0;
528 u32 tpc_sm_id = 0, gpc_tpc_id = 0; 533 u32 tpc_sm_id = 0, gpc_tpc_id = 0;
529 u32 pes_tpc_mask = 0, pes_index; 534 u32 pes_tpc_mask = 0, pes_index;
535 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
536 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
530 537
531 gk20a_dbg_fn(""); 538 gk20a_dbg_fn("");
532 539
533 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 540 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
534 gpc_offset = proj_gpc_stride_v() * gpc_index; 541 gpc_offset = gpc_stride * gpc_index;
535 for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; 542 for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index];
536 tpc_index++) { 543 tpc_index++) {
537 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; 544 tpc_offset = tpc_in_gpc_stride * tpc_index;
538 545
539 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() 546 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r()
540 + gpc_offset + tpc_offset, 547 + gpc_offset + tpc_offset,
@@ -640,32 +647,37 @@ static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
640 return 0; 647 return 0;
641} 648}
642 649
643static bool gr_gm20b_is_tpc_addr_shared(u32 addr) 650static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr)
644{ 651{
645 return (addr >= proj_tpc_in_gpc_shared_base_v()) && 652 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
646 (addr < (proj_tpc_in_gpc_shared_base_v() + 653 u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE);
647 proj_tpc_in_gpc_stride_v())); 654 return (addr >= tpc_in_gpc_shared_base) &&
655 (addr < (tpc_in_gpc_shared_base +
656 tpc_in_gpc_stride));
648} 657}
649 658
650static bool gr_gm20b_is_tpc_addr(u32 addr) 659static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr)
651{ 660{
652 return ((addr >= proj_tpc_in_gpc_base_v()) && 661 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
653 (addr < proj_tpc_in_gpc_base_v() + 662 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
654 (proj_scal_litter_num_tpc_per_gpc_v() * 663 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
655 proj_tpc_in_gpc_stride_v()))) 664 return ((addr >= tpc_in_gpc_base) &&
656 || gr_gm20b_is_tpc_addr_shared(addr); 665 (addr < tpc_in_gpc_base +
666 (num_tpc_per_gpc * tpc_in_gpc_stride)))
667 || gr_gm20b_is_tpc_addr_shared(g, addr);
657} 668}
658 669
659static u32 gr_gm20b_get_tpc_num(u32 addr) 670static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr)
660{ 671{
661 u32 i, start; 672 u32 i, start;
662 u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); 673 u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
674 u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
675 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
663 676
664 for (i = 0; i < num_tpcs; i++) { 677 for (i = 0; i < num_tpcs; i++) {
665 start = proj_tpc_in_gpc_base_v() + 678 start = tpc_in_gpc_base + (i * tpc_in_gpc_stride);
666 (i * proj_tpc_in_gpc_stride_v());
667 if ((addr >= start) && 679 if ((addr >= start) &&
668 (addr < (start + proj_tpc_in_gpc_stride_v()))) 680 (addr < (start + tpc_in_gpc_stride)))
669 return i; 681 return i;
670 } 682 }
671 return 0; 683 return 0;
@@ -1066,6 +1078,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1066 u32 gpc, tpc, sm_id; 1078 u32 gpc, tpc, sm_id;
1067 u32 tpc_offset, gpc_offset, reg_offset; 1079 u32 tpc_offset, gpc_offset, reg_offset;
1068 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; 1080 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
1081 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1082 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1069 1083
1070 /* for maxwell & kepler */ 1084 /* for maxwell & kepler */
1071 u32 numSmPerTpc = 1; 1085 u32 numSmPerTpc = 1;
@@ -1075,8 +1089,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
1075 gpc = g->gr.sm_to_cluster[sm_id].gpc_index; 1089 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
1076 tpc = g->gr.sm_to_cluster[sm_id].tpc_index; 1090 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
1077 1091
1078 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; 1092 tpc_offset = tpc_in_gpc_stride * tpc;
1079 gpc_offset = proj_gpc_stride_v() * gpc; 1093 gpc_offset = gpc_stride * gpc;
1080 reg_offset = tpc_offset + gpc_offset; 1094 reg_offset = tpc_offset + gpc_offset;
1081 1095
1082 /* 64 bit read */ 1096 /* 64 bit read */