diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 1aba35f3..6ab8e923 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include "gp10b/gr_gp10b.h" | 25 | #include "gp10b/gr_gp10b.h" |
26 | #include "hw_gr_gp10b.h" | 26 | #include "hw_gr_gp10b.h" |
27 | #include "hw_fifo_gp10b.h" | 27 | #include "hw_fifo_gp10b.h" |
28 | #include "hw_proj_gp10b.h" | ||
29 | #include "hw_ctxsw_prog_gp10b.h" | 28 | #include "hw_ctxsw_prog_gp10b.h" |
30 | #include "hw_mc_gp10b.h" | 29 | #include "hw_mc_gp10b.h" |
31 | #include "gp10b_sysfs.h" | 30 | #include "gp10b_sysfs.h" |
@@ -61,8 +60,9 @@ static int gr_gp10b_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
61 | bool *post_event, struct channel_gk20a *fault_ch) | 60 | bool *post_event, struct channel_gk20a *fault_ch) |
62 | { | 61 | { |
63 | int ret = 0; | 62 | int ret = 0; |
64 | u32 offset = proj_gpc_stride_v() * gpc + | 63 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
65 | proj_tpc_in_gpc_stride_v() * tpc; | 64 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
65 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
66 | u32 lrf_ecc_status, shm_ecc_status; | 66 | u32 lrf_ecc_status, shm_ecc_status; |
67 | 67 | ||
68 | gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch); | 68 | gr_gk20a_handle_sm_exception(g, gpc, tpc, post_event, fault_ch); |
@@ -170,8 +170,9 @@ static int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
170 | bool *post_event) | 170 | bool *post_event) |
171 | { | 171 | { |
172 | int ret = 0; | 172 | int ret = 0; |
173 | u32 offset = proj_gpc_stride_v() * gpc + | 173 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
174 | proj_tpc_in_gpc_stride_v() * tpc; | 174 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
175 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
175 | u32 esr; | 176 | u32 esr; |
176 | u32 ecc_stats_reg_val; | 177 | u32 ecc_stats_reg_val; |
177 | 178 | ||
@@ -314,6 +315,9 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
314 | u32 temp, temp2; | 315 | u32 temp, temp2; |
315 | u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate; | 316 | u32 cbm_cfg_size_beta, cbm_cfg_size_alpha, cbm_cfg_size_steadystate; |
316 | u32 attrib_size_in_chunk, cb_attrib_cache_size_init; | 317 | u32 attrib_size_in_chunk, cb_attrib_cache_size_init; |
318 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
319 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
320 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); | ||
317 | 321 | ||
318 | gk20a_dbg_fn(""); | 322 | gk20a_dbg_fn(""); |
319 | 323 | ||
@@ -346,8 +350,8 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
346 | gr->tpc_count * gr->alpha_cb_size; | 350 | gr->tpc_count * gr->alpha_cb_size; |
347 | 351 | ||
348 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 352 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
349 | temp = proj_gpc_stride_v() * gpc_index; | 353 | temp = gpc_stride * gpc_index; |
350 | temp2 = proj_scal_litter_num_pes_per_gpc_v() * gpc_index; | 354 | temp2 = num_pes_per_gpc * gpc_index; |
351 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 355 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
352 | ppc_index++) { | 356 | ppc_index++) { |
353 | cbm_cfg_size_beta = cb_attrib_cache_size_init * | 357 | cbm_cfg_size_beta = cb_attrib_cache_size_init * |
@@ -359,17 +363,17 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
359 | 363 | ||
360 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 364 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
361 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | 365 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + |
362 | proj_ppc_in_gpc_stride_v() * ppc_index, | 366 | ppc_in_gpc_stride * ppc_index, |
363 | cbm_cfg_size_beta, patch); | 367 | cbm_cfg_size_beta, patch); |
364 | 368 | ||
365 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 369 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
366 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | 370 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + |
367 | proj_ppc_in_gpc_stride_v() * ppc_index, | 371 | ppc_in_gpc_stride * ppc_index, |
368 | attrib_offset_in_chunk, patch); | 372 | attrib_offset_in_chunk, patch); |
369 | 373 | ||
370 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 374 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
371 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + | 375 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + |
372 | proj_ppc_in_gpc_stride_v() * ppc_index, | 376 | ppc_in_gpc_stride * ppc_index, |
373 | cbm_cfg_size_steadystate, | 377 | cbm_cfg_size_steadystate, |
374 | patch); | 378 | patch); |
375 | 379 | ||
@@ -378,12 +382,12 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
378 | 382 | ||
379 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 383 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
380 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | 384 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + |
381 | proj_ppc_in_gpc_stride_v() * ppc_index, | 385 | ppc_in_gpc_stride * ppc_index, |
382 | cbm_cfg_size_alpha, patch); | 386 | cbm_cfg_size_alpha, patch); |
383 | 387 | ||
384 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 388 | gr_gk20a_ctx_patch_write(g, ch_ctx, |
385 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | 389 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + |
386 | proj_ppc_in_gpc_stride_v() * ppc_index, | 390 | ppc_in_gpc_stride * ppc_index, |
387 | alpha_offset_in_chunk, patch); | 391 | alpha_offset_in_chunk, patch); |
388 | 392 | ||
389 | alpha_offset_in_chunk += gr->alpha_cb_size * | 393 | alpha_offset_in_chunk += gr->alpha_cb_size * |
@@ -618,6 +622,8 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
618 | u32 gpc_index, ppc_index, stride, val; | 622 | u32 gpc_index, ppc_index, stride, val; |
619 | u32 pd_ab_max_output; | 623 | u32 pd_ab_max_output; |
620 | u32 alpha_cb_size = data * 4; | 624 | u32 alpha_cb_size = data * 4; |
625 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
626 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
621 | 627 | ||
622 | gk20a_dbg_fn(""); | 628 | gk20a_dbg_fn(""); |
623 | 629 | ||
@@ -638,14 +644,14 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
638 | gr_pd_ab_dist_cfg1_max_batches_init_f()); | 644 | gr_pd_ab_dist_cfg1_max_batches_init_f()); |
639 | 645 | ||
640 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 646 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
641 | stride = proj_gpc_stride_v() * gpc_index; | 647 | stride = gpc_stride * gpc_index; |
642 | 648 | ||
643 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 649 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
644 | ppc_index++) { | 650 | ppc_index++) { |
645 | 651 | ||
646 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | 652 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + |
647 | stride + | 653 | stride + |
648 | proj_ppc_in_gpc_stride_v() * ppc_index); | 654 | ppc_in_gpc_stride * ppc_index); |
649 | 655 | ||
650 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), | 656 | val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), |
651 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * | 657 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * |
@@ -653,7 +659,7 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) | |||
653 | 659 | ||
654 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + | 660 | gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + |
655 | stride + | 661 | stride + |
656 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 662 | ppc_in_gpc_stride * ppc_index, val); |
657 | } | 663 | } |
658 | } | 664 | } |
659 | } | 665 | } |
@@ -663,6 +669,8 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
663 | struct gr_gk20a *gr = &g->gr; | 669 | struct gr_gk20a *gr = &g->gr; |
664 | u32 gpc_index, ppc_index, stride, val; | 670 | u32 gpc_index, ppc_index, stride, val; |
665 | u32 cb_size_steady = data * 4, cb_size; | 671 | u32 cb_size_steady = data * 4, cb_size; |
672 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
673 | u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); | ||
666 | 674 | ||
667 | gk20a_dbg_fn(""); | 675 | gk20a_dbg_fn(""); |
668 | 676 | ||
@@ -684,14 +692,14 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
684 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); | 692 | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady)); |
685 | 693 | ||
686 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 694 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
687 | stride = proj_gpc_stride_v() * gpc_index; | 695 | stride = gpc_stride * gpc_index; |
688 | 696 | ||
689 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; | 697 | for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; |
690 | ppc_index++) { | 698 | ppc_index++) { |
691 | 699 | ||
692 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | 700 | val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + |
693 | stride + | 701 | stride + |
694 | proj_ppc_in_gpc_stride_v() * ppc_index); | 702 | ppc_in_gpc_stride * ppc_index); |
695 | 703 | ||
696 | val = set_field(val, | 704 | val = set_field(val, |
697 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), | 705 | gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), |
@@ -700,9 +708,9 @@ static void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) | |||
700 | 708 | ||
701 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + | 709 | gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + |
702 | stride + | 710 | stride + |
703 | proj_ppc_in_gpc_stride_v() * ppc_index, val); | 711 | ppc_in_gpc_stride * ppc_index, val); |
704 | 712 | ||
705 | gk20a_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index + | 713 | gk20a_writel(g, ppc_in_gpc_stride * ppc_index + |
706 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + | 714 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + |
707 | stride, | 715 | stride, |
708 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f( | 716 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f( |
@@ -1542,8 +1550,9 @@ static int gr_gp10b_pre_process_sm_exception(struct gk20a *g, | |||
1542 | bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode == | 1550 | bool cilp_enabled = (fault_ch->ch_ctx.gr_ctx->preempt_mode == |
1543 | NVGPU_GR_PREEMPTION_MODE_CILP) ; | 1551 | NVGPU_GR_PREEMPTION_MODE_CILP) ; |
1544 | u32 global_mask = 0, dbgr_control0, global_esr_copy; | 1552 | u32 global_mask = 0, dbgr_control0, global_esr_copy; |
1545 | u32 offset = proj_gpc_stride_v() * gpc + | 1553 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
1546 | proj_tpc_in_gpc_stride_v() * tpc; | 1554 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
1555 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | ||
1547 | 1556 | ||
1548 | *early_exit = false; | 1557 | *early_exit = false; |
1549 | *ignore_debugger = false; | 1558 | *ignore_debugger = false; |