From f18f06f1043cd049d76520f2230ec40e3f3c1210 Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Mon, 6 Aug 2018 17:06:14 -0700 Subject: Revert "gpu: nvgpu: gv11b: enable RMW for gpu atomics" The original change caused cuda atomic perf regression. Bug 2310618 This reverts commit 10c3d4447d4206302f5d51695bf1f193255dd889. Change-Id: Iea5391a89fdfadfb9a79cda57e71f1c9e87ca882 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1793880 (cherry picked from commit d0e51ddcb8139de70916335f124a80b8b588308b) Reviewed-on: https://git-master.nvidia.com/r/1804945 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv100/gr_gv100.c | 38 ---------------------- drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 - drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 +- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 ---- .../gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h | 8 ----- .../gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | 8 ----- 6 files changed, 1 insertion(+), 62 deletions(-) diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 79526947..d5ace998 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -35,7 +35,6 @@ #include "gv11b/subctx_gv11b.h" #include -#include #include #include #include @@ -286,43 +285,6 @@ exit_build_table: return err; } -void gr_gv100_init_gpc_mmu(struct gk20a *g) -{ - u32 temp; - - nvgpu_log_info(g, "initialize gpc mmu"); - - if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { - /* Bypass MMU check for non-secure boot. For - * secure-boot,this register write has no-effect */ - gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); - } - temp = gk20a_readl(g, fb_mmu_ctrl_r()); - temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | - gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | - gr_gpcs_pri_mmu_ctrl_vol_fault_m() | - gr_gpcs_pri_mmu_ctrl_comp_fault_m() | - gr_gpcs_pri_mmu_ctrl_miss_gran_m() | - gr_gpcs_pri_mmu_ctrl_cache_mode_m() | - gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | - gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | - gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| - gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(); - - gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); - nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp); - - gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); - - gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), - gk20a_readl(g, fb_mmu_debug_ctrl_r())); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), - gk20a_readl(g, fb_mmu_debug_wr_r())); - gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), - gk20a_readl(g, fb_mmu_debug_rd_r())); -} - u32 gr_gv100_get_patch_slots(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 81bf7e38..6d6b4170 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, u32 *t); -void gr_gv100_init_gpc_mmu(struct gk20a *g); u32 gr_gv100_get_hw_accessor_stream_out_mode(void); void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 6f07015d..295e896d 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -302,7 +302,7 @@ static const struct gpu_ops gv100_ops = { }, .gr = { .get_patch_slots = gr_gv100_get_patch_slots, - .init_gpc_mmu = gr_gv100_init_gpc_mmu, + .init_gpc_mmu = gr_gv11b_init_gpc_mmu, .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, .cb_size_default = gr_gv100_cb_size_default, .calc_global_ctx_buffer_size = diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index aeb49982..0fbba3a0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -4404,13 +4404,7 @@ void gr_gv11b_init_gpc_mmu(struct gk20a *g) gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); - - temp = set_field(temp, gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(), - gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f()); gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); - nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x, atomic_capability_mode_rmw", - temp); - gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h index d2a73286..0f83d6ba 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h @@ -4028,14 +4028,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void) { return 0x1U << 31U; } -static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void) -{ - return 0x3U << 24U; -} -static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void) -{ - return 0x2000000U; -} static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) { return 0x00418890U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h index 473eaff4..588452cd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h @@ -4972,14 +4972,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void) { return 0x1U << 31U; } -static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void) -{ - return 0x3U << 24U; -} -static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void) -{ - return 0x2000000U; -} static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) { return 0x00418890U; -- cgit v1.2.2