diff options
author | Ashish Srivastava <assrivastava@nvidia.com> | 2018-02-20 06:40:27 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-26 14:17:17 -0400 |
commit | 10c3d4447d4206302f5d51695bf1f193255dd889 (patch) | |
tree | d70139a9c5f0a7476bf7c471bda2c62d5317b64f | |
parent | 2d397e34a5aafb5feed406a13f3db536eadae5bb (diff) |
gpu: nvgpu: gv11b: enable RMW for gpu atomics
Separate HAL added in gv11b and gv100 for
init_gpc_mmu function.
In gv11b HAL, RMW is enabled for gpu atomics
as default.
In gv100 HAL, GPC atomic capability mode will
get set based on the FB MMU capability.
If GPU is connected through NVLINK then mmu
will be set to RMW mode, else it will be in
L2 mode.
Bug 200390336
Change-Id: I224934f83d1762ec864ef8da7265dd01d86893a0
Signed-off-by: Ashish Srivastava <assrivastava@nvidia.com>
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1735137
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.c | 38 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/gr_gv100.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | 8 |
6 files changed, 62 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 98e61eb0..2180fa1c 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "gv11b/subctx_gv11b.h" | 34 | #include "gv11b/subctx_gv11b.h" |
35 | 35 | ||
36 | #include <nvgpu/hw/gv100/hw_gr_gv100.h> | 36 | #include <nvgpu/hw/gv100/hw_gr_gv100.h> |
37 | #include <nvgpu/hw/gv100/hw_fb_gv100.h> | ||
37 | #include <nvgpu/hw/gv100/hw_proj_gv100.h> | 38 | #include <nvgpu/hw/gv100/hw_proj_gv100.h> |
38 | #include <nvgpu/hw/gv100/hw_fuse_gv100.h> | 39 | #include <nvgpu/hw/gv100/hw_fuse_gv100.h> |
39 | 40 | ||
@@ -280,6 +281,43 @@ exit_build_table: | |||
280 | return err; | 281 | return err; |
281 | } | 282 | } |
282 | 283 | ||
284 | void gr_gv100_init_gpc_mmu(struct gk20a *g) | ||
285 | { | ||
286 | u32 temp; | ||
287 | |||
288 | nvgpu_log_info(g, "initialize gpc mmu"); | ||
289 | |||
290 | if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { | ||
291 | /* Bypass MMU check for non-secure boot. For | ||
292 | * secure-boot,this register write has no-effect */ | ||
293 | gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); | ||
294 | } | ||
295 | temp = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
296 | temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | | ||
297 | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | | ||
298 | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | | ||
299 | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | | ||
300 | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | | ||
301 | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | | ||
302 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | ||
303 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | ||
304 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m()| | ||
305 | gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(); | ||
306 | |||
307 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | ||
308 | nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x", temp); | ||
309 | |||
310 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | ||
311 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | ||
312 | |||
313 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), | ||
314 | gk20a_readl(g, fb_mmu_debug_ctrl_r())); | ||
315 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), | ||
316 | gk20a_readl(g, fb_mmu_debug_wr_r())); | ||
317 | gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), | ||
318 | gk20a_readl(g, fb_mmu_debug_rd_r())); | ||
319 | } | ||
320 | |||
283 | u32 gr_gv100_get_patch_slots(struct gk20a *g) | 321 | u32 gr_gv100_get_patch_slots(struct gk20a *g) |
284 | { | 322 | { |
285 | struct gr_gk20a *gr = &g->gr; | 323 | struct gr_gk20a *gr = &g->gr; |
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index ccc73e28..821659aa 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h | |||
@@ -46,4 +46,5 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, | |||
46 | void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, | 46 | void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, |
47 | u32 num_fbpas, | 47 | u32 num_fbpas, |
48 | u32 *priv_addr_table, u32 *t); | 48 | u32 *priv_addr_table, u32 *t); |
49 | void gr_gv100_init_gpc_mmu(struct gk20a *g); | ||
49 | #endif | 50 | #endif |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index ae6c3d22..852fc1b7 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -290,7 +290,7 @@ static const struct gpu_ops gv100_ops = { | |||
290 | }, | 290 | }, |
291 | .gr = { | 291 | .gr = { |
292 | .get_patch_slots = gr_gv100_get_patch_slots, | 292 | .get_patch_slots = gr_gv100_get_patch_slots, |
293 | .init_gpc_mmu = gr_gv11b_init_gpc_mmu, | 293 | .init_gpc_mmu = gr_gv100_init_gpc_mmu, |
294 | .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, | 294 | .bundle_cb_defaults = gr_gv100_bundle_cb_defaults, |
295 | .cb_size_default = gr_gv100_cb_size_default, | 295 | .cb_size_default = gr_gv100_cb_size_default, |
296 | .calc_global_ctx_buffer_size = | 296 | .calc_global_ctx_buffer_size = |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index aed45ceb..1336557a 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -4263,7 +4263,13 @@ void gr_gv11b_init_gpc_mmu(struct gk20a *g) | |||
4263 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | | 4263 | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | |
4264 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | | 4264 | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | |
4265 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); | 4265 | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); |
4266 | |||
4267 | temp = set_field(temp, gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(), | ||
4268 | gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f()); | ||
4266 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); | 4269 | gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); |
4270 | nvgpu_log_info(g, "mmu_ctrl_r = 0x%08x, atomic_capability_mode_rmw", | ||
4271 | temp); | ||
4272 | |||
4267 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); | 4273 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); |
4268 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); | 4274 | gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); |
4269 | 4275 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h index 0f83d6ba..d2a73286 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_gr_gv100.h | |||
@@ -4028,6 +4028,14 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void) | |||
4028 | { | 4028 | { |
4029 | return 0x1U << 31U; | 4029 | return 0x1U << 31U; |
4030 | } | 4030 | } |
4031 | static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void) | ||
4032 | { | ||
4033 | return 0x3U << 24U; | ||
4034 | } | ||
4035 | static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void) | ||
4036 | { | ||
4037 | return 0x2000000U; | ||
4038 | } | ||
4031 | static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) | 4039 | static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) |
4032 | { | 4040 | { |
4033 | return 0x00418890U; | 4041 | return 0x00418890U; |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h index 5de691a2..90994a53 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_gr_gv11b.h | |||
@@ -4936,6 +4936,14 @@ static inline u32 gr_gpcs_pri_mmu_ctrl_mmu_disable_m(void) | |||
4936 | { | 4936 | { |
4937 | return 0x1U << 31U; | 4937 | return 0x1U << 31U; |
4938 | } | 4938 | } |
4939 | static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_m(void) | ||
4940 | { | ||
4941 | return 0x3U << 24U; | ||
4942 | } | ||
4943 | static inline u32 gr_gpcs_pri_mmu_ctrl_atomic_capability_mode_rmw_f(void) | ||
4944 | { | ||
4945 | return 0x2000000U; | ||
4946 | } | ||
4939 | static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) | 4947 | static inline u32 gr_gpcs_pri_mmu_pm_unit_mask_r(void) |
4940 | { | 4948 | { |
4941 | return 0x00418890U; | 4949 | return 0x00418890U; |