From f18f06f1043cd049d76520f2230ec40e3f3c1210 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Mon, 6 Aug 2018 17:06:14 -0700 Subject: Revert "gpu: nvgpu: gv11b: enable RMW for gpu atomics" The original change caused cuda atomic perf regression. Bug 2310618 This reverts commit 10c3d4447d4206302f5d51695bf1f193255dd889. Change-Id: Iea5391a89fdfadfb9a79cda57e71f1c9e87ca882 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1793880 (cherry picked from commit d0e51ddcb8139de70916335f124a80b8b588308b) Reviewed-on: https://git-master.nvidia.com/r/1804945 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv100/gr_gv100.c | 38 ------------------------------------- drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 - drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 +- 3 files changed, 1 insertion(+), 40 deletions(-) (limited to 'drivers/gpu/nvgpu/gv100') diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 79526947..d5ace998 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -35,7 +35,6 @@ #include "gv11b/subctx_gv11b.h" #include -#include #include #include #include @@ -286,43 +285,6 @@ exit_build_table: return err; } -void gr_gv100_init_gpc_mmu(struct gk20a *g) -{ - u32 temp; - - nvgpu_log_info(g, "initialize gpc mmu"); - - if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { - /* Bypass MMU check for non-secure boot. For - * secure-boot,this register write has no-effect */ - gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); - } - temp = gk20a_readl(g, fb_mmu_ctrl_r()); - temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | - gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | - gr_gpcs_pri_mmu_ctrl_vol_fault_m() | - gr_gpcs_pri_mmu_ctrl_comp_fault_m() | - gr_gpcs_pri_mmu_ctrl_miss_gran_m() | - gr_gpcs_pri_mmu_ctrl_cache_mode_m() | - gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | - gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | - gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| - gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(); - - gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); - nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp); - - gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); - - gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), - gk20a_readl(g, fb_mmu_debug_ctrl_r())); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), - gk20a_readl(g, fb_mmu_debug_wr_r())); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), - gk20a_readl(g, fb_mmu_debug_rd_r())); -} - u32 gr_gv100_get_patch_slots(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 81bf7e38..6d6b4170 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, u32 *t); -void gr_gv100_init_gpc_mmu(struct gk20a *g); u32 gr_gv100_get_hw_accessor_stream_out_mode(void); void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 6f07015d..295e896d 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -302,7 +302,7 @@ static const struct gpu_ops gv100_ops = { }, .gr = { .get_patch_slots = gr_gv100_get_patch_slots, - .init_gpc_mmu = gr_gv100_init_gpc_mmu, + .init_gpc_mmu = gr_gv11b_init_gpc_mmu, .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, .cb_size_default = gr_gv100_cb_size_default, .calc_global_ctx_buffer_size = -- cgit v1.2.2