diff options
author | Ashish Srivastava <assrivastava@nvidia.com> | 2018-02-20 06:40:27 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-26 14:17:17 -0400 |
commit | 10c3d4447d4206302f5d51695bf1f193255dd889 (patch) | |
tree | d70139a9c5f0a7476bf7c471bda2c62d5317b64f /drivers/gpu/nvgpu/gv100 | |
parent | 2d397e34a5aafb5feed406a13f3db536eadae5bb (diff) |
gpu: nvgpu: gv11b: enable RMW for gpu atomics
Separate HAL added in gv11b and gv100 for
init_gpc_mmu function.
In gv11b HAL, RMW is enabled for gpu atomics
as default.
In gv100 HAL, GPC atomic capability mode will
get set based on the FB MMU capability.
If GPU is connected through NVLINK then mmu
will be set to RMW mode, else it will be in
L2 mode.
Bug 200390336
Change-Id: I224934f83d1762ec864ef8da7265dd01d86893a0
Signed-off-by: Ashish Srivastava <assrivastava@nvidia.com>
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1735137
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv100')
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.c | 38 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 |
3 files changed, 40 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 98e61eb0..2180fa1c 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "gv11b/subctx_gv11b.h" | 34 | #include "gv11b/subctx_gv11b.h" |
35 | 35 | ||
36 | #include <nvgpu/hw/gv100/hw_gr_gv100.h> | 36 | #include <nvgpu/hw/gv100/hw_gr_gv100.h> |
37 | #include <nvgpu/hw/gv100/hw_fb_gv100.h> | ||
37 | #include <nvgpu/hw/gv100/hw_proj_gv100.h> | 38 | #include <nvgpu/hw/gv100/hw_proj_gv100.h> |
38 | #include <nvgpu/hw/gv100/hw_fuse_gv100.h> | 39 | #include <nvgpu/hw/gv100/hw_fuse_gv100.h> |
39 | 40 | ||
@@ -280,6 +281,43 @@ exit_build_table: | |||
280 | return err; | 281 | return err; |
281 | } | 282 | } |
282 | 283 | ||
284 | void gr_gv100_init_gpc_mmu(struct gk20a *g) | ||
285 | { | ||
286 | u32 temp; | ||
287 | |||
288 | nvgpu_log_info(g, "initialize gpc mmu"); | ||
289 | |||
290 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
291 | /* Bypass MMU check for non-secure boot. For | ||
292 | * secure-boot,this register write has no-effect */ | ||
293 | gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); | ||
294 | } | ||
295 | temp = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
296 | temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | | ||
297 | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | | ||
298 | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | | ||
299 | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | | ||
300 | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | | ||
301 | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | | ||
302 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | ||
303 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | ||
304 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| | ||
305 | gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(); | ||
306 | |||
307 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | ||
308 | nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp); | ||
309 | |||
310 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | ||
311 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | ||
312 | |||
313 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), | ||
314 | gk20a_readl(g, fb_mmu_debug_ctrl_r())); | ||
315 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), | ||
316 | gk20a_readl(g, fb_mmu_debug_wr_r())); | ||
317 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), | ||
318 | gk20a_readl(g, fb_mmu_debug_rd_r())); | ||
319 | } | ||
320 | |||
283 | u32 gr_gv100_get_patch_slots(struct gk20a *g) | 321 | u32 gr_gv100_get_patch_slots(struct gk20a *g) |
284 | { | 322 | { |
285 | struct gr_gk20a *gr = &g->gr; | 323 | struct gr_gk20a *gr = &g->gr; |
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index ccc73e28..821659aa 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h | |||
@@ -46,4 +46,5 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, | |||
46 | void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | 46 | void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, |
47 | u32 num_fbpas, | 47 | u32 num_fbpas, |
48 | u32 *priv_addr_table, u32 *t); | 48 | u32 *priv_addr_table, u32 *t); |
49 | void gr_gv100_init_gpc_mmu(struct gk20a *g); | ||
49 | #endif | 50 | #endif |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index ae6c3d22..852fc1b7 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -290,7 +290,7 @@ static const struct gpu_ops gv100_ops = { | |||
290 | }, | 290 | }, |
291 | .gr = { | 291 | .gr = { |
292 | .get_patch_slots = gr_gv100_get_patch_slots, | 292 | .get_patch_slots = gr_gv100_get_patch_slots, |
293 | .init_gpc_mmu = gr_gv11b_init_gpc_mmu, | 293 | .init_gpc_mmu = gr_gv100_init_gpc_mmu, |
294 | .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, | 294 | .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, |
295 | .cb_size_default = gr_gv100_cb_size_default, | 295 | .cb_size_default = gr_gv100_cb_size_default, |
296 | .calc_global_ctx_buffer_size = | 296 | .calc_global_ctx_buffer_size = |