Revert "gpu: nvgpu: gv11b: enable RMW for gpu atomics"

The original change caused cuda atomic perf regression. Bug 2310618 This reverts commit 10c3d4447d4206302f5d51695bf1f193255dd889. Change-Id: Iea5391a89fdfadfb9a79cda57e71f1c9e87ca882 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1793880 (cherry picked from commit d0e51ddcb8139de70916335f124a80b8b588308b) Reviewed-on: https://git-master.nvidia.com/r/1804945 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Richard Zhao <rizhao@nvidia.com> 2018-08-06 20:06:14 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-09-04 19:16:20 -0400
commit: f18f06f1043cd049d76520f2230ec40e3f3c1210 (patch)
tree: 032bbaccb81ea362eb46b7fa0076a55c1b963ecf
parent: a6499fb9ceddd9ea52cf7e67789a64131545295c (diff)
6 files changed, 1 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 79526947..d5ace998 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -35,7 +35,6 @@
 #include "gv11b/subctx_gv11b.h"
 #include <nvgpu/hw/gv100/hw_gr_gv100.h>
-#include <nvgpu/hw/gv100/hw_fb_gv100.h>
 #include <nvgpu/hw/gv100/hw_proj_gv100.h>
 #include <nvgpu/hw/gv100/hw_top_gv100.h>
 #include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
@@ -286,43 +285,6 @@ exit_build_table:
        return err;
 }
-void gr_gv100_init_gpc_mmu(struct gk20a *g)
-{
-        u32 temp;
-        nvgpu_log_info(g, "initialize gpc mmu");
-        if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
-                /* Bypass MMU check for non-secure boot. For
-                 * secure-boot,this register write has no-effect */
-                gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff);
-        }
-        temp = gk20a_readl(g, fb_mmu_ctrl_r());
-        temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() |
-                gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() |
-                gr_gpcs_pri_mmu_ctrl_vol_fault_m() |
-                gr_gpcs_pri_mmu_ctrl_comp_fault_m() |
-                gr_gpcs_pri_mmu_ctrl_miss_gran_m() |
-                gr_gpcs_pri_mmu_ctrl_cache_mode_m() |
-                gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
-                gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
-                gr_gpcs_pri_mmu_ctrl_mmu_disable_m()|
-                gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m();
-        gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
-        nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp);
-        gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
-        gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
-        gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
-                        gk20a_readl(g, fb_mmu_debug_ctrl_r()));
-        gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
-                        gk20a_readl(g, fb_mmu_debug_wr_r()));
-        gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
-                        gk20a_readl(g, fb_mmu_debug_rd_r()));
-}
 u32 gr_gv100_get_patch_slots(struct gk20a *g)
 {
        struct gr_gk20a *gr = &g->gr;
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 81bf7e38..6d6b4170 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
 void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
        u32 num_fbpas,
        u32 *priv_addr_table, u32 *t);
-void gr_gv100_init_gpc_mmu(struct gk20a *g);
 u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
 void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
 #endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 6f07015d..295e896d 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -302,7 +302,7 @@ static const struct gpu_ops gv100_ops = {
        },
        .gr = {
                .get_patch_slots = gr_gv100_get_patch_slots,
-                .init_gpc_mmu = gr_gv100_init_gpc_mmu,
+                .init_gpc_mmu = gr_gv11b_init_gpc_mmu,
                .bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
                .cb_size_default = gr_gv100_cb_size_default,
                .calc_global_ctx_buffer_size =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index aeb49982..0fbba3a0 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -4404,13 +4404,7 @@ void gr_gv11b_init_gpc_mmu(struct gk20a *g)
                gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
                gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
                gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
-        temp = set_field(temp, gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(),
-                           gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f());
        gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
-        nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x, atomic_capability_mode_rmw",
-                        temp);
        gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
        gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
index d2a73286..0f83d6ba 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -4028,14 +4028,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
 {
        return 0x1U << 31U;
 }
-static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void)
-{
-        return 0x3U << 24U;
-}
-static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void)
-{
-        return 0x2000000U;
-}
 static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
 {
        return 0x00418890U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 473eaff4..588452cd 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -4972,14 +4972,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
 {
        return 0x1U << 31U;
 }
-static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void)
-{
-        return 0x3U << 24U;
-}
-static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void)
-{
-        return 0x2000000U;
-}
 static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
 {
        return 0x00418890U;
author	Richard Zhao <rizhao@nvidia.com>	2018-08-06 20:06:14 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-09-04 19:16:20 -0400
commit	f18f06f1043cd049d76520f2230ec40e3f3c1210 (patch)
tree	032bbaccb81ea362eb46b7fa0076a55c1b963ecf
parent	a6499fb9ceddd9ea52cf7e67789a64131545295c (diff)

diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 79526947..d5ace998 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -35,7 +35,6 @@
35	#include "gv11b/subctx_gv11b.h"	35	#include "gv11b/subctx_gv11b.h"
36		36
37	#include <nvgpu/hw/gv100/hw_gr_gv100.h>	37	#include <nvgpu/hw/gv100/hw_gr_gv100.h>
38	#include <nvgpu/hw/gv100/hw_fb_gv100.h>
39	#include <nvgpu/hw/gv100/hw_proj_gv100.h>	38	#include <nvgpu/hw/gv100/hw_proj_gv100.h>
40	#include <nvgpu/hw/gv100/hw_top_gv100.h>	39	#include <nvgpu/hw/gv100/hw_top_gv100.h>
41	#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>	40	#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
@@ -286,43 +285,6 @@ exit_build_table:
286	return err;	285	return err;
287	}	286	}
288		287
289	void gr_gv100_init_gpc_mmu(struct gk20a *g)
290	{
291	u32 temp;
292
293	nvgpu_log_info(g, "initialize gpc mmu");
294
295	if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
296	/* Bypass MMU check for non-secure boot. For
297	* secure-boot,this register write has no-effect */
298	gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff);
299	}
300	temp = gk20a_readl(g, fb_mmu_ctrl_r());
301	temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() \|
302	gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() \|
303	gr_gpcs_pri_mmu_ctrl_vol_fault_m() \|
304	gr_gpcs_pri_mmu_ctrl_comp_fault_m() \|
305	gr_gpcs_pri_mmu_ctrl_miss_gran_m() \|
306	gr_gpcs_pri_mmu_ctrl_cache_mode_m() \|
307	gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() \|
308	gr_gpcs_pri_mmu_ctrl_mmu_vol_m() \|
309	gr_gpcs_pri_mmu_ctrl_mmu_disable_m()\|
310	gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m();
311
312	gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
313	nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp);
314
315	gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
316	gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
317
318	gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
319	gk20a_readl(g, fb_mmu_debug_ctrl_r()));
320	gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
321	gk20a_readl(g, fb_mmu_debug_wr_r()));
322	gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
323	gk20a_readl(g, fb_mmu_debug_rd_r()));
324	}
325
326	u32 gr_gv100_get_patch_slots(struct gk20a *g)	288	u32 gr_gv100_get_patch_slots(struct gk20a *g)
327	{	289	{
328	struct gr_gk20a *gr = &g->gr;	290	struct gr_gk20a *gr = &g->gr;


diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 81bf7e38..6d6b4170 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
46	void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,	46	void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
47	u32 num_fbpas,	47	u32 num_fbpas,
48	u32 priv_addr_table, u32 t);	48	u32 priv_addr_table, u32 t);
49	void gr_gv100_init_gpc_mmu(struct gk20a *g);
50	u32 gr_gv100_get_hw_accessor_stream_out_mode(void);	49	u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
51	void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);	50	void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
52	#endif	51	#endif


diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 6f07015d..295e896d 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -302,7 +302,7 @@ static const struct gpu_ops gv100_ops = {
302	},	302	},
303	.gr = {	303	.gr = {
304	.get_patch_slots = gr_gv100_get_patch_slots,	304	.get_patch_slots = gr_gv100_get_patch_slots,
305	.init_gpc_mmu = gr_gv100_init_gpc_mmu,	305	.init_gpc_mmu = gr_gv11b_init_gpc_mmu,
306	.bundle_cb_defaults = gr_gv100_bundle_cb_defaults,	306	.bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
307	.cb_size_default = gr_gv100_cb_size_default,	307	.cb_size_default = gr_gv100_cb_size_default,
308	.calc_global_ctx_buffer_size =	308	.calc_global_ctx_buffer_size =


diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index aeb49982..0fbba3a0 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -4404,13 +4404,7 @@ void gr_gv11b_init_gpc_mmu(struct gk20a *g)
4404	gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() \|	4404	gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() \|
4405	gr_gpcs_pri_mmu_ctrl_mmu_vol_m() \|	4405	gr_gpcs_pri_mmu_ctrl_mmu_vol_m() \|
4406	gr_gpcs_pri_mmu_ctrl_mmu_disable_m();	4406	gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
4407
4408	temp = set_field(temp, gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(),
4409	gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f());
4410	gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);	4407	gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
4411	nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x, atomic_capability_mode_rmw",
4412	temp);
4413
4414	gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);	4408	gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
4415	gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);	4409	gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
4416		4410


diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h index d2a73286..0f83d6ba 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -4028,14 +4028,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
4028	{	4028	{
4029	return 0x1U << 31U;	4029	return 0x1U << 31U;
4030	}	4030	}
4031	static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void)
4032	{
4033	return 0x3U << 24U;
4034	}
4035	static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void)
4036	{
4037	return 0x2000000U;
4038	}
4039	static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)	4031	static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
4040	{	4032	{
4041	return 0x00418890U;	4033	return 0x00418890U;


diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h index 473eaff4..588452cd 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -4972,14 +4972,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
4972	{	4972	{
4973	return 0x1U << 31U;	4973	return 0x1U << 31U;
4974	}	4974	}
4975	static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void)
4976	{
4977	return 0x3U << 24U;
4978	}
4979	static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void)
4980	{
4981	return 0x2000000U;
4982	}
4983	static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)	4975	static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
4984	{	4976	{
4985	return 0x00418890U;	4977	return 0x00418890U;