summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-08-06 20:06:14 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-09-04 19:16:20 -0400
commitf18f06f1043cd049d76520f2230ec40e3f3c1210 (patch)
tree032bbaccb81ea362eb46b7fa0076a55c1b963ecf
parenta6499fb9ceddd9ea52cf7e67789a64131545295c (diff)
Revert "gpu: nvgpu: gv11b: enable RMW for gpu atomics"
The original change caused cuda atomic perf regression. Bug 2310618 This reverts commit 10c3d4447d4206302f5d51695bf1f193255dd889. Change-Id: Iea5391a89fdfadfb9a79cda57e71f1c9e87ca882 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1793880 (cherry picked from commit d0e51ddcb8139de70916335f124a80b8b588308b) Reviewed-on: https://git-master.nvidia.com/r/1804945 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.c38
-rw-r--r--drivers/gpu/nvgpu/gv100/gr_gv100.h1
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c2
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h8
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h8
6 files changed, 1 insertions, 62 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index 79526947..d5ace998 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -35,7 +35,6 @@
35#include "gv11b/subctx_gv11b.h" 35#include "gv11b/subctx_gv11b.h"
36 36
37#include <nvgpu/hw/gv100/hw_gr_gv100.h> 37#include <nvgpu/hw/gv100/hw_gr_gv100.h>
38#include <nvgpu/hw/gv100/hw_fb_gv100.h>
39#include <nvgpu/hw/gv100/hw_proj_gv100.h> 38#include <nvgpu/hw/gv100/hw_proj_gv100.h>
40#include <nvgpu/hw/gv100/hw_top_gv100.h> 39#include <nvgpu/hw/gv100/hw_top_gv100.h>
41#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h> 40#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
@@ -286,43 +285,6 @@ exit_build_table:
286 return err; 285 return err;
287} 286}
288 287
289void gr_gv100_init_gpc_mmu(struct gk20a *g)
290{
291 u32 temp;
292
293 nvgpu_log_info(g, "initialize gpc mmu");
294
295 if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) {
296 /* Bypass MMU check for non-secure boot. For
297 * secure-boot,this register write has no-effect */
298 gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff);
299 }
300 temp = gk20a_readl(g, fb_mmu_ctrl_r());
301 temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() |
302 gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() |
303 gr_gpcs_pri_mmu_ctrl_vol_fault_m() |
304 gr_gpcs_pri_mmu_ctrl_comp_fault_m() |
305 gr_gpcs_pri_mmu_ctrl_miss_gran_m() |
306 gr_gpcs_pri_mmu_ctrl_cache_mode_m() |
307 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
308 gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
309 gr_gpcs_pri_mmu_ctrl_mmu_disable_m()|
310 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m();
311
312 gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
313 nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp);
314
315 gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
316 gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
317
318 gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(),
319 gk20a_readl(g, fb_mmu_debug_ctrl_r()));
320 gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(),
321 gk20a_readl(g, fb_mmu_debug_wr_r()));
322 gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(),
323 gk20a_readl(g, fb_mmu_debug_rd_r()));
324}
325
326u32 gr_gv100_get_patch_slots(struct gk20a *g) 288u32 gr_gv100_get_patch_slots(struct gk20a *g)
327{ 289{
328 struct gr_gk20a *gr = &g->gr; 290 struct gr_gk20a *gr = &g->gr;
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 81bf7e38..6d6b4170 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
46void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, 46void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
47 u32 num_fbpas, 47 u32 num_fbpas,
48 u32 *priv_addr_table, u32 *t); 48 u32 *priv_addr_table, u32 *t);
49void gr_gv100_init_gpc_mmu(struct gk20a *g);
50u32 gr_gv100_get_hw_accessor_stream_out_mode(void); 49u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
51void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); 50void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
52#endif 51#endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 6f07015d..295e896d 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -302,7 +302,7 @@ static const struct gpu_ops gv100_ops = {
302 }, 302 },
303 .gr = { 303 .gr = {
304 .get_patch_slots = gr_gv100_get_patch_slots, 304 .get_patch_slots = gr_gv100_get_patch_slots,
305 .init_gpc_mmu = gr_gv100_init_gpc_mmu, 305 .init_gpc_mmu = gr_gv11b_init_gpc_mmu,
306 .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, 306 .bundle_cb_defaults = gr_gv100_bundle_cb_defaults,
307 .cb_size_default = gr_gv100_cb_size_default, 307 .cb_size_default = gr_gv100_cb_size_default,
308 .calc_global_ctx_buffer_size = 308 .calc_global_ctx_buffer_size =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index aeb49982..0fbba3a0 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -4404,13 +4404,7 @@ void gr_gv11b_init_gpc_mmu(struct gk20a *g)
4404 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | 4404 gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() |
4405 gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | 4405 gr_gpcs_pri_mmu_ctrl_mmu_vol_m() |
4406 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); 4406 gr_gpcs_pri_mmu_ctrl_mmu_disable_m();
4407
4408 temp = set_field(temp, gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(),
4409 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f());
4410 gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); 4407 gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp);
4411 nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x, atomic_capability_mode_rmw",
4412 temp);
4413
4414 gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); 4408 gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0);
4415 gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); 4409 gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0);
4416 4410
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
index d2a73286..0f83d6ba 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -4028,14 +4028,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
4028{ 4028{
4029 return 0x1U << 31U; 4029 return 0x1U << 31U;
4030} 4030}
4031static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void)
4032{
4033 return 0x3U << 24U;
4034}
4035static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void)
4036{
4037 return 0x2000000U;
4038}
4039static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) 4031static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
4040{ 4032{
4041 return 0x00418890U; 4033 return 0x00418890U;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index 473eaff4..588452cd 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -4972,14 +4972,6 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void)
4972{ 4972{
4973 return 0x1U << 31U; 4973 return 0x1U << 31U;
4974} 4974}
4975static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void)
4976{
4977 return 0x3U << 24U;
4978}
4979static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void)
4980{
4981 return 0x2000000U;
4982}
4983static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) 4975static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void)
4984{ 4976{
4985 return 0x00418890U; 4977 return 0x00418890U;