From 10c3d4447d4206302f5d51695bf1f193255dd889 Mon Sep 17 00:00:00 2001 From: Ashish Srivastava Date: Tue, 20 Feb 2018 17:10:27 +0530 Subject: gpu: nvgpu: gv11b: enable RMW for gpu atomics Separate HAL added in gv11b and gv100 for init_gpc_mmu function. In gv11b HAL, RMW is enabled for gpu atomics as default. In gv100 HAL, GPC atomic capability mode will get set based on the FB MMU capability. If GPU is connected through NVLINK then mmu will be set to RMW mode, else it will be in L2 mode. Bug 200390336 Change-Id: I224934f83d1762ec864ef8da7265dd01d86893a0 Signed-off-by: Ashish Srivastava Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/1735137 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv100/gr_gv100.c | 38 +++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 +- 3 files changed, 40 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/nvgpu/gv100') diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 98e61eb0..2180fa1c 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -34,6 +34,7 @@ #include "gv11b/subctx_gv11b.h" #include +#include #include #include @@ -280,6 +281,43 @@ exit_build_table: return err; } +void gr_gv100_init_gpc_mmu(struct gk20a *g) +{ + u32 temp; + + nvgpu_log_info(g, "initialize gpc mmu"); + + if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + /* Bypass MMU check for non-secure boot. For + * secure-boot,this register write has no-effect */ + gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); + } + temp = gk20a_readl(g, fb_mmu_ctrl_r()); + temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | + gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | + gr_gpcs_pri_mmu_ctrl_vol_fault_m() | + gr_gpcs_pri_mmu_ctrl_comp_fault_m() | + gr_gpcs_pri_mmu_ctrl_miss_gran_m() | + gr_gpcs_pri_mmu_ctrl_cache_mode_m() | + gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | + gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | + gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| + gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(); + + gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); + nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp); + + gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); + gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); + + gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), + gk20a_readl(g, fb_mmu_debug_ctrl_r())); + gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), + gk20a_readl(g, fb_mmu_debug_wr_r())); + gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), + gk20a_readl(g, fb_mmu_debug_rd_r())); +} + u32 gr_gv100_get_patch_slots(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index ccc73e28..821659aa 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -46,4 +46,5 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, u32 *t); +void gr_gv100_init_gpc_mmu(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index ae6c3d22..852fc1b7 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -290,7 +290,7 @@ static const struct gpu_ops gv100_ops = { }, .gr = { .get_patch_slots = gr_gv100_get_patch_slots, - .init_gpc_mmu = gr_gv11b_init_gpc_mmu, + .init_gpc_mmu = gr_gv100_init_gpc_mmu, .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, .cb_size_default = gr_gv100_cb_size_default, .calc_global_ctx_buffer_size = -- cgit v1.2.2