summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
diff options
context:
space:
mode:
authorKevin Huang <kevinh@nvidia.com>2014-04-09 21:47:29 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:51 -0400
commit67887cc61ae18c0278eec8ff8ae758297765f3b8 (patch)
tree72f943b78dc52eea21e63c9e11aa54af690abb0c /drivers/gpu/nvgpu/gm20b/gr_gm20b.c
parent932377f12a4d928ea1e4728f242abe3a1f07eaa6 (diff)
gpu: nvgpu: gm20b: support TPC floorsweeping
Bug 1450798 Change-Id: I371537d086ce1088c6d007676c1fe1e2770dd4e3 Signed-off-by: Kevin Huang <kevinh@nvidia.com> Reviewed-on: http://git-master/r/403877 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/gr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c46
1 files changed, 32 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 25d03736..72500b0e 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -23,6 +23,7 @@
23#include "hw_fb_gm20b.h" 23#include "hw_fb_gm20b.h"
24#include "hw_proj_gm20b.h" 24#include "hw_proj_gm20b.h"
25#include "hw_ctxsw_prog_gm20b.h" 25#include "hw_ctxsw_prog_gm20b.h"
26#include "hw_fuse_gm20b.h"
26 27
27static void gr_gm20b_init_gpc_mmu(struct gk20a *g) 28static void gr_gm20b_init_gpc_mmu(struct gk20a *g)
28{ 29{
@@ -478,6 +479,17 @@ static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
478 *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); 479 *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
479} 480}
480 481
482static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
483{
484 u32 val;
485 struct gr_gk20a *gr = &g->gr;
486
487 /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */
488 val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index));
489
490 return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1);
491}
492
481static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) 493static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
482{ 494{
483 struct gr_gk20a *gr = &g->gr; 495 struct gr_gk20a *gr = &g->gr;
@@ -486,6 +498,7 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
486 u32 sm_id = 0, gpc_id = 0; 498 u32 sm_id = 0, gpc_id = 0;
487 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; 499 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
488 u32 tpc_per_gpc; 500 u32 tpc_per_gpc;
501 u32 tpc_fs_mask = 0, tpc_sm_id, gpc_tpc_id;
489 502
490 gk20a_dbg_fn(""); 503 gk20a_dbg_fn("");
491 504
@@ -556,22 +569,26 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
556 gk20a_readl(g, gr_be0_crop_debug3_r()) | 569 gk20a_readl(g, gr_be0_crop_debug3_r()) |
557 gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); 570 gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
558 571
559 if (tegra_platform_is_silicon()) { 572 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
560 gk20a_writel(g, gr_fe_tpc_fs_r(), gr->pes_tpc_mask[0][0]); 573 tpc_fs_mask |= gr->gpc_tpc_mask[gpc_index] <<
561 574 (gr->max_tpc_per_gpc_count * gpc_index);
562 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0) | 575 }
563 gr_cwd_gpc_tpc_id_tpc1_f(1)); 576 gk20a_writel(g, gr_fe_tpc_fs_r(), tpc_fs_mask);
564
565 gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0) |
566 gr_cwd_sm_id_tpc1_f(1));
567 } else {
568 gk20a_writel(g, gr_fe_tpc_fs_r(), 1);
569
570 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0));
571
572 gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0));
573 577
578 if (tpc_fs_mask & (0x1 << 0)) {
579 tpc_sm_id |= gr_cwd_sm_id_tpc0_f(0);
580 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(0);
581 }
582 if (tpc_fs_mask & (0x1 << 1)) {
583 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(1);
584 tpc_sm_id |= gr_cwd_sm_id_tpc1_f(1);
574 } 585 }
586 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.
587 * Since we know TPC number is less than 5. We select
588 * index 0 directly. */
589 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gpc_tpc_id);
590
591 gk20a_writel(g, gr_cwd_sm_id_r(0), tpc_sm_id);
575 592
576 return 0; 593 return 0;
577} 594}
@@ -733,4 +750,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
733#else 750#else
734 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; 751 gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
735#endif 752#endif
753 gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask;
736} 754}