diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-06 16:10:32 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-04-15 11:48:20 -0400 |
commit | 6839341bf8ffafa115cfc0427bba694ee1d131f3 (patch) | |
tree | 1f9369a3bacf0f1a2cc23371f5de988efdc07c31 /drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |
parent | 61e009c0f8874898335e6c47a610233c3382be47 (diff) |
gpu: nvgpu: Add litter values HAL
Move per-chip constants to be returned by a chip specific function.
Implement get_litter_value() for each chip.
Change-Id: I2a2730fce14010924d2507f6fa15cc2ea0795113
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1121383
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 76 |
1 files changed, 45 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 050c2bee..b49f2301 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include "hw_fifo_gm20b.h" | 27 | #include "hw_fifo_gm20b.h" |
28 | #include "hw_fb_gm20b.h" | 28 | #include "hw_fb_gm20b.h" |
29 | #include "hw_top_gm20b.h" | 29 | #include "hw_top_gm20b.h" |
30 | #include "hw_proj_gm20b.h" | ||
31 | #include "hw_ctxsw_prog_gm20b.h" | 30 | #include "hw_ctxsw_prog_gm20b.h" |
32 | #include "hw_fuse_gm20b.h" | 31 | #include "hw_fuse_gm20b.h" |
33 | #include "pmu_gm20b.h" | 32 | #include "pmu_gm20b.h" |
@@ -178,6 +177,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
178 | u32 gpc_index, ppc_index; | 177 | u32 gpc_index, ppc_index; |
179 | u32 temp; | 178 | u32 temp; |
180 | u32 cbm_cfg_size1, cbm_cfg_size2; | 179 | u32 cbm_cfg_size1, cbm_cfg_size2; |
180 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
181 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
181 | 182 | ||
182 | gk20a_dbg_fn(""); | 183 | gk20a_dbg_fn(""); |
183 | 184 | ||
@@ -198,7 +199,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
198 | gr->tpc_count * gr->attrib_cb_size; | 199 | gr->tpc_count * gr->attrib_cb_size; |
199 | 200 | ||
200 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 201 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
201 | temp = proj_gpc_stride_v() * gpc_index; | 202 | temp = gpc_stride * gpc_index; |
202 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 203 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
203 | ppc_index++) { | 204 | ppc_index++) { |
204 | cbm_cfg_size1 = gr->attrib_cb_default_size * | 205 | cbm_cfg_size1 = gr->attrib_cb_default_size * |
@@ -208,12 +209,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
208 | 209 | ||
209 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 210 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
210 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | 211 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + |
211 | proj_ppc_in_gpc_stride_v() * ppc_index, | 212 | ppc_in_gpc_stride * ppc_index, |
212 | cbm_cfg_size1, patch); | 213 | cbm_cfg_size1, patch); |
213 | 214 | ||
214 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 215 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
215 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | 216 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + |
216 | proj_ppc_in_gpc_stride_v() * ppc_index, | 217 | ppc_in_gpc_stride * ppc_index, |
217 | attrib_offset_in_chunk, patch); | 218 | attrib_offset_in_chunk, patch); |
218 | 219 | ||
219 | attrib_offset_in_chunk += gr->attrib_cb_size * | 220 | attrib_offset_in_chunk += gr->attrib_cb_size * |
@@ -221,12 +222,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
221 | 222 | ||
222 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 223 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
223 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | 224 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + |
224 | proj_ppc_in_gpc_stride_v() * ppc_index, | 225 | ppc_in_gpc_stride * ppc_index, |
225 | cbm_cfg_size2, patch); | 226 | cbm_cfg_size2, patch); |
226 | 227 | ||
227 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 228 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
228 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | 229 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + |
229 | proj_ppc_in_gpc_stride_v() * ppc_index, | 230 | ppc_in_gpc_stride * ppc_index, |
230 | alpha_offset_in_chunk, patch); | 231 | alpha_offset_in_chunk, patch); |
231 | 232 | ||
232 | alpha_offset_in_chunk += gr->alpha_cb_size * | 233 | alpha_offset_in_chunk += gr->alpha_cb_size * |
@@ -297,6 +298,8 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
297 | u32 gpc_index, ppc_index, stride, val; | 298 | u32 gpc_index, ppc_index, stride, val; |
298 | u32 pd_ab_max_output; | 299 | u32 pd_ab_max_output; |
299 | u32 alpha_cb_size = data * 4; | 300 | u32 alpha_cb_size = data * 4; |
301 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
302 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
300 | 303 | ||
301 | gk20a_dbg_fn(""); | 304 | gk20a_dbg_fn(""); |
302 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) | 305 | /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) |
@@ -319,14 +322,14 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
319 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | 322 | gr_pd_ab_dist_cfg1_max_batches_init_f()); |
320 | 323 | ||
321 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 324 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
322 | stride = proj_gpc_stride_v() * gpc_index; | 325 | stride = gpc_stride * gpc_index; |
323 | 326 | ||
324 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 327 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
325 | ppc_index++) { | 328 | ppc_index++) { |
326 | 329 | ||
327 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | 330 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + |
328 | stride + | 331 | stride + |
329 | proj_ppc_in_gpc_stride_v() * ppc_index); | 332 | ppc_in_gpc_stride * ppc_index); |
330 | 333 | ||
331 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | 334 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), |
332 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | 335 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * |
@@ -334,7 +337,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
334 | 337 | ||
335 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | 338 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + |
336 | stride + | 339 | stride + |
337 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 340 | ppc_in_gpc_stride * ppc_index, val); |
338 | } | 341 | } |
339 | } | 342 | } |
340 | } | 343 | } |
@@ -344,6 +347,8 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
344 | struct gr_gk20a *gr = &g->gr; | 347 | struct gr_gk20a *gr = &g->gr; |
345 | u32 gpc_index, ppc_index, stride, val; | 348 | u32 gpc_index, ppc_index, stride, val; |
346 | u32 cb_size = data * 4; | 349 | u32 cb_size = data * 4; |
350 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
351 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
347 | 352 | ||
348 | gk20a_dbg_fn(""); | 353 | gk20a_dbg_fn(""); |
349 | 354 | ||
@@ -356,14 +361,14 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
356 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); | 361 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); |
357 | 362 | ||
358 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 363 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
359 | stride = proj_gpc_stride_v() * gpc_index; | 364 | stride = gpc_stride * gpc_index; |
360 | 365 | ||
361 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 366 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
362 | ppc_index++) { | 367 | ppc_index++) { |
363 | 368 | ||
364 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | 369 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + |
365 | stride + | 370 | stride + |
366 | proj_ppc_in_gpc_stride_v() * ppc_index); | 371 | ppc_in_gpc_stride * ppc_index); |
367 | 372 | ||
368 | val = set_field(val, | 373 | val = set_field(val, |
369 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | 374 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), |
@@ -372,7 +377,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
372 | 377 | ||
373 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | 378 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + |
374 | stride + | 379 | stride + |
375 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 380 | ppc_in_gpc_stride * ppc_index, val); |
376 | 381 | ||
377 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( | 382 | val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( |
378 | ppc_index + gpc_index)); | 383 | ppc_index + gpc_index)); |
@@ -527,14 +532,16 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
527 | u32 tpc_per_gpc = 0; | 532 | u32 tpc_per_gpc = 0; |
528 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; | 533 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; |
529 | u32 pes_tpc_mask = 0, pes_index; | 534 | u32 pes_tpc_mask = 0, pes_index; |
535 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
536 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
530 | 537 | ||
531 | gk20a_dbg_fn(""); | 538 | gk20a_dbg_fn(""); |
532 | 539 | ||
533 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 540 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
534 | gpc_offset = proj_gpc_stride_v() * gpc_index; | 541 | gpc_offset = gpc_stride * gpc_index; |
535 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; | 542 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; |
536 | tpc_index++) { | 543 | tpc_index++) { |
537 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; | 544 | tpc_offset = tpc_in_gpc_stride * tpc_index; |
538 | 545 | ||
539 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() | 546 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() |
540 | + gpc_offset + tpc_offset, | 547 | + gpc_offset + tpc_offset, |
@@ -640,32 +647,37 @@ static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | |||
640 | return 0; | 647 | return 0; |
641 | } | 648 | } |
642 | 649 | ||
643 | static bool gr_gm20b_is_tpc_addr_shared(u32 addr) | 650 | static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) |
644 | { | 651 | { |
645 | return (addr >= proj_tpc_in_gpc_shared_base_v()) && | 652 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
646 | (addr < (proj_tpc_in_gpc_shared_base_v() + | 653 | u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); |
647 | proj_tpc_in_gpc_stride_v())); | 654 | return (addr >= tpc_in_gpc_shared_base) && |
655 | (addr < (tpc_in_gpc_shared_base + | ||
656 | tpc_in_gpc_stride)); | ||
648 | } | 657 | } |
649 | 658 | ||
650 | static bool gr_gm20b_is_tpc_addr(u32 addr) | 659 | static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) |
651 | { | 660 | { |
652 | return ((addr >= proj_tpc_in_gpc_base_v()) && | 661 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); |
653 | (addr < proj_tpc_in_gpc_base_v() + | 662 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
654 | (proj_scal_litter_num_tpc_per_gpc_v() * | 663 | u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
655 | proj_tpc_in_gpc_stride_v()))) | 664 | return ((addr >= tpc_in_gpc_base) && |
656 | || gr_gm20b_is_tpc_addr_shared(addr); | 665 | (addr < tpc_in_gpc_base + |
666 | (num_tpc_per_gpc * tpc_in_gpc_stride))) | ||
667 | || gr_gm20b_is_tpc_addr_shared(g, addr); | ||
657 | } | 668 | } |
658 | 669 | ||
659 | static u32 gr_gm20b_get_tpc_num(u32 addr) | 670 | static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) |
660 | { | 671 | { |
661 | u32 i, start; | 672 | u32 i, start; |
662 | u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); | 673 | u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); |
674 | u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); | ||
675 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
663 | 676 | ||
664 | for (i = 0; i < num_tpcs; i++) { | 677 | for (i = 0; i < num_tpcs; i++) { |
665 | start = proj_tpc_in_gpc_base_v() + | 678 | start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); |
666 | (i * proj_tpc_in_gpc_stride_v()); | ||
667 | if ((addr >= start) && | 679 | if ((addr >= start) && |
668 | (addr < (start + proj_tpc_in_gpc_stride_v()))) | 680 | (addr < (start + tpc_in_gpc_stride))) |
669 | return i; | 681 | return i; |
670 | } | 682 | } |
671 | return 0; | 683 | return 0; |
@@ -1066,6 +1078,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | |||
1066 | u32 gpc, tpc, sm_id; | 1078 | u32 gpc, tpc, sm_id; |
1067 | u32 tpc_offset, gpc_offset, reg_offset; | 1079 | u32 tpc_offset, gpc_offset, reg_offset; |
1068 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | 1080 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; |
1081 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1082 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1069 | 1083 | ||
1070 | /* for maxwell & kepler */ | 1084 | /* for maxwell & kepler */ |
1071 | u32 numSmPerTpc = 1; | 1085 | u32 numSmPerTpc = 1; |
@@ -1075,8 +1089,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | |||
1075 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | 1089 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; |
1076 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | 1090 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; |
1077 | 1091 | ||
1078 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | 1092 | tpc_offset = tpc_in_gpc_stride * tpc; |
1079 | gpc_offset = proj_gpc_stride_v() * gpc; | 1093 | gpc_offset = gpc_stride * gpc; |
1080 | reg_offset = tpc_offset + gpc_offset; | 1094 | reg_offset = tpc_offset + gpc_offset; |
1081 | 1095 | ||
1082 | /* 64 bit read */ | 1096 | /* 64 bit read */ |