diff options
author | Kevin Huang <kevinh@nvidia.com> | 2014-04-09 21:47:29 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:10:51 -0400 |
commit | 67887cc61ae18c0278eec8ff8ae758297765f3b8 (patch) | |
tree | 72f943b78dc52eea21e63c9e11aa54af690abb0c /drivers/gpu/nvgpu/gm20b | |
parent | 932377f12a4d928ea1e4728f242abe3a1f07eaa6 (diff) |
gpu: nvgpu: gm20b: support TPC floorsweeping
Bug 1450798
Change-Id: I371537d086ce1088c6d007676c1fe1e2770dd4e3
Signed-off-by: Kevin Huang <kevinh@nvidia.com>
Reviewed-on: http://git-master/r/403877
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 46 |
1 files changed, 32 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 25d03736..72500b0e 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "hw_fb_gm20b.h" | 23 | #include "hw_fb_gm20b.h" |
24 | #include "hw_proj_gm20b.h" | 24 | #include "hw_proj_gm20b.h" |
25 | #include "hw_ctxsw_prog_gm20b.h" | 25 | #include "hw_ctxsw_prog_gm20b.h" |
26 | #include "hw_fuse_gm20b.h" | ||
26 | 27 | ||
27 | static void gr_gm20b_init_gpc_mmu(struct gk20a *g) | 28 | static void gr_gm20b_init_gpc_mmu(struct gk20a *g) |
28 | { | 29 | { |
@@ -478,6 +479,17 @@ static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | |||
478 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); | 479 | *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); |
479 | } | 480 | } |
480 | 481 | ||
482 | static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | ||
483 | { | ||
484 | u32 val; | ||
485 | struct gr_gk20a *gr = &g->gr; | ||
486 | |||
487 | /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ | ||
488 | val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index)); | ||
489 | |||
490 | return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); | ||
491 | } | ||
492 | |||
481 | static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | 493 | static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) |
482 | { | 494 | { |
483 | struct gr_gk20a *gr = &g->gr; | 495 | struct gr_gk20a *gr = &g->gr; |
@@ -486,6 +498,7 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
486 | u32 sm_id = 0, gpc_id = 0; | 498 | u32 sm_id = 0, gpc_id = 0; |
487 | u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; | 499 | u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()]; |
488 | u32 tpc_per_gpc; | 500 | u32 tpc_per_gpc; |
501 | u32 tpc_fs_mask = 0, tpc_sm_id, gpc_tpc_id; | ||
489 | 502 | ||
490 | gk20a_dbg_fn(""); | 503 | gk20a_dbg_fn(""); |
491 | 504 | ||
@@ -556,22 +569,26 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
556 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | 569 | gk20a_readl(g, gr_be0_crop_debug3_r()) | |
557 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | 570 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); |
558 | 571 | ||
559 | if (tegra_platform_is_silicon()) { | 572 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { |
560 | gk20a_writel(g, gr_fe_tpc_fs_r(), gr->pes_tpc_mask[0][0]); | 573 | tpc_fs_mask |= gr->gpc_tpc_mask[gpc_index] << |
561 | 574 | (gr->max_tpc_per_gpc_count * gpc_index); | |
562 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0) | | 575 | } |
563 | gr_cwd_gpc_tpc_id_tpc1_f(1)); | 576 | gk20a_writel(g, gr_fe_tpc_fs_r(), tpc_fs_mask); |
564 | |||
565 | gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0) | | ||
566 | gr_cwd_sm_id_tpc1_f(1)); | ||
567 | } else { | ||
568 | gk20a_writel(g, gr_fe_tpc_fs_r(), 1); | ||
569 | |||
570 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gr_cwd_gpc_tpc_id_tpc0_f(0)); | ||
571 | |||
572 | gk20a_writel(g, gr_cwd_sm_id_r(0), gr_cwd_sm_id_tpc0_f(0)); | ||
573 | 577 | ||
578 | if (tpc_fs_mask & (0x1 << 0)) { | ||
579 | tpc_sm_id |= gr_cwd_sm_id_tpc0_f(0); | ||
580 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(0); | ||
581 | } | ||
582 | if (tpc_fs_mask & (0x1 << 1)) { | ||
583 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(1); | ||
584 | tpc_sm_id |= gr_cwd_sm_id_tpc1_f(1); | ||
574 | } | 585 | } |
586 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. | ||
587 | * Since we know TPC number is less than 5. We select | ||
588 | * index 0 directly. */ | ||
589 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gpc_tpc_id); | ||
590 | |||
591 | gk20a_writel(g, gr_cwd_sm_id_r(0), tpc_sm_id); | ||
575 | 592 | ||
576 | return 0; | 593 | return 0; |
577 | } | 594 | } |
@@ -733,4 +750,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
733 | #else | 750 | #else |
734 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; | 751 | gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; |
735 | #endif | 752 | #endif |
753 | gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask; | ||
736 | } | 754 | } |