summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-09 14:57:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-27 19:03:43 -0500
commit1170687c33f7506f39aaf47acee5430233e3d1a8 (patch)
treeb41146a14a78eba99b9f326ef63efbe8ba77caab /drivers/gpu/nvgpu/gk20a
parent71f53272b28b1086b3f34e5e255815c37504ac2c (diff)
gpu: nvgpu: Use coherent aperture flag
When using a coherent DMA API wee must make sure to program any aperture fields with the coherent aperture setting. To do this the nvgpu_aperture_mask() function was modified to take a third aperture mask argument, a coherent setting, so that code can use this function to generate coherent aperture settings. The aperture choice is some what tricky: the default version of this function uses the state of the DMA API to determine what aperture to use for SYSMEM: either coherent or non-coherent internally. Thus a kernel user need only specify the normal nvgpu_mem struct and the correct mask should be chosen. Due to many uses of nvgpu_mem structs not created directly from the DMA API wrapper it's easier to translate SYSMEM to SYSMEM_COH after creation. However, the GMMU mapping code, will encounter buffers from userspace with difference coerency attributes than the DMA API. Thus the __nvgpu_aperture_mask() really respects the aperture setting passed in regardless of the DMA API state. This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT since this is either passed in from userspace or set by the kernel when using coherent DMA. The aperture field in attrs is upgraded to coh if this flag is set. This change also adds a coherent sysmem mask everywhere that it can. There's a couple places that do not have a coherent register field defined yet. These need to eventually be defined and added. Lastly the aperture mask code has been mvoed from the Linux vm.c code to the general vm.c code since this function has no Linux dependencies. Note: depends on https://git-master.nvidia.com/r/1664536 for new register fields. JIRA EVLR-2333 Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655220 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c1
7 files changed, 45 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 7f0cfe58..b2800772 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23#include <nvgpu/page_allocator.h> 23#include <nvgpu/page_allocator.h>
24#include <nvgpu/enabled.h>
24#include <nvgpu/log.h> 25#include <nvgpu/log.h>
25#include <nvgpu/soc.h> 26#include <nvgpu/soc.h>
26#include <nvgpu/bus.h> 27#include <nvgpu/bus.h>
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
155 156
156 gk20a_writel(g, bus_bar1_block_r(), 157 gk20a_writel(g, bus_bar1_block_r(),
157 nvgpu_aperture_mask(g, bar1_inst, 158 nvgpu_aperture_mask(g, bar1_inst,
158 bus_bar1_block_target_sys_mem_ncoh_f(), 159 bus_bar1_block_target_sys_mem_ncoh_f(),
159 bus_bar1_block_target_vid_mem_f()) | 160 bus_bar1_block_target_sys_mem_coh_f(),
161 bus_bar1_block_target_vid_mem_f()) |
160 bus_bar1_block_mode_virtual_f() | 162 bus_bar1_block_mode_virtual_f() |
161 bus_bar1_block_ptr_f(ptr_v)); 163 bus_bar1_block_ptr_f(ptr_v));
162 164
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index a5a2cb51..e3052701 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(), 98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
99 fb_mmu_invalidate_pdb_addr_f(addr_lo) | 99 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
100 nvgpu_aperture_mask(g, pdb, 100 nvgpu_aperture_mask(g, pdb,
101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(), 101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
102 fb_mmu_invalidate_pdb_aperture_vid_mem_f())); 102 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
103 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
103 104
104 gk20a_writel(g, fb_mmu_invalidate_r(), 105 gk20a_writel(g, fb_mmu_invalidate_r(),
105 fb_mmu_invalidate_all_va_true_f() | 106 fb_mmu_invalidate_all_va_true_f() |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 409661fc..4fda0d2e 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
653 return -ENOMEM; 653 return -ENOMEM;
654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), 655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); 657 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
657 658
658 if (nvgpu_mem_begin(g, mem)) 659 if (nvgpu_mem_begin(g, mem))
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dd0b78c0..247557aa 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3240,8 +3240,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3240 gk20a_writel(g, fifo_runlist_base_r(), 3240 gk20a_writel(g, fifo_runlist_base_r(),
3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | 3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf], 3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf],
3243 fifo_runlist_base_target_sys_mem_ncoh_f(), 3243 fifo_runlist_base_target_sys_mem_ncoh_f(),
3244 fifo_runlist_base_target_vid_mem_f())); 3244 fifo_runlist_base_target_sys_mem_coh_f(),
3245 fifo_runlist_base_target_vid_mem_f()));
3245 } 3246 }
3246 3247
3247 gk20a_writel(g, fifo_runlist_r(), 3248 gk20a_writel(g, fifo_runlist_r(),
@@ -3763,8 +3764,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
3763 nvgpu_mem_wr32(g, &c->inst_block, 3764 nvgpu_mem_wr32(g, &c->inst_block,
3764 ram_in_ramfc_w() + ram_fc_userd_w(), 3765 ram_in_ramfc_w() + ram_fc_userd_w(),
3765 nvgpu_aperture_mask(g, &g->fifo.userd, 3766 nvgpu_aperture_mask(g, &g->fifo.userd,
3766 pbdma_userd_target_sys_mem_ncoh_f(), 3767 pbdma_userd_target_sys_mem_ncoh_f(),
3767 pbdma_userd_target_vid_mem_f()) | 3768 pbdma_userd_target_sys_mem_coh_f(),
3769 pbdma_userd_target_vid_mem_f()) |
3768 pbdma_userd_addr_f(addr_lo)); 3770 pbdma_userd_addr_f(addr_lo));
3769 3771
3770 nvgpu_mem_wr32(g, &c->inst_block, 3772 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 6ae743ef..2cde10ec 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> 745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
746 ram_in_base_shift_v(); 746 ram_in_base_shift_v();
747 u32 aperture = nvgpu_aperture_mask(g, inst_block, 747 u32 aperture = nvgpu_aperture_mask(g, inst_block,
748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(), 748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
749 gr_fecs_current_ctx_target_vid_mem_f()); 749 gr_fecs_current_ctx_target_sys_mem_coh_f(),
750 gr_fecs_current_ctx_target_vid_mem_f());
750 751
751 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | 752 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
752 gr_fecs_current_ctx_valid_f(1); 753 gr_fecs_current_ctx_valid_f(1);
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2171 2172
2172 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); 2173 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2173 gk20a_writel(g, gr_fecs_new_ctx_r(), 2174 gk20a_writel(g, gr_fecs_new_ctx_r(),
2174 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 2175 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2175 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2176 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2176 gr_fecs_new_ctx_target_sys_mem_ncoh_f(), 2177 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2178 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2177 gr_fecs_new_ctx_target_vid_mem_f()) | 2179 gr_fecs_new_ctx_target_vid_mem_f()) |
2178 gr_fecs_new_ctx_valid_m()); 2180 gr_fecs_new_ctx_valid_m());
2179 2181
2180 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), 2182 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2181 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | 2183 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2182 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2184 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2183 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), 2185 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2186 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2184 gr_fecs_arb_ctx_ptr_target_vid_mem_f())); 2187 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2185 2188
2186 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); 2189 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4379 4382
4380 gk20a_writel(g, fb_mmu_debug_wr_r(), 4383 gk20a_writel(g, fb_mmu_debug_wr_r(),
4381 nvgpu_aperture_mask(g, &gr->mmu_wr_mem, 4384 nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
4382 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4385 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4383 fb_mmu_debug_wr_aperture_vid_mem_f()) | 4386 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4387 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4384 fb_mmu_debug_wr_vol_false_f() | 4388 fb_mmu_debug_wr_vol_false_f() |
4385 fb_mmu_debug_wr_addr_f(addr)); 4389 fb_mmu_debug_wr_addr_f(addr));
4386 4390
@@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4389 4393
4390 gk20a_writel(g, fb_mmu_debug_rd_r(), 4394 gk20a_writel(g, fb_mmu_debug_rd_r(),
4391 nvgpu_aperture_mask(g, &gr->mmu_rd_mem, 4395 nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
4392 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4396 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4393 fb_mmu_debug_rd_aperture_vid_mem_f()) | 4397 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4398 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4394 fb_mmu_debug_rd_vol_false_f() | 4399 fb_mmu_debug_rd_vol_false_f() |
4395 fb_mmu_debug_rd_addr_f(addr)); 4400 fb_mmu_debug_rd_addr_f(addr));
4396 4401
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b27d1109..4ff6125b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
122{ 122{
123 u32 pde0_bits = 123 u32 pde0_bits =
124 nvgpu_aperture_mask(g, pd->mem, 124 nvgpu_aperture_mask(g, pd->mem,
125 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 125 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
126 gmmu_pde_aperture_big_video_memory_f()) | 126 gmmu_pde_aperture_big_sys_mem_coh_f(),
127 gmmu_pde_aperture_big_video_memory_f()) |
127 gmmu_pde_address_big_sys_f( 128 gmmu_pde_address_big_sys_f(
128 (u32)(addr >> gmmu_pde_address_shift_v())); 129 (u32)(addr >> gmmu_pde_address_shift_v()));
129 130
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
135{ 136{
136 u32 pde1_bits = 137 u32 pde1_bits =
137 nvgpu_aperture_mask(g, pd->mem, 138 nvgpu_aperture_mask(g, pd->mem,
138 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 139 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
139 gmmu_pde_aperture_small_video_memory_f()) | 140 gmmu_pde_aperture_small_sys_mem_coh_f(),
141 gmmu_pde_aperture_small_video_memory_f()) |
140 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 142 gmmu_pde_vol_small_true_f() | /* tbd: why? */
141 gmmu_pde_address_small_sys_f( 143 gmmu_pde_address_small_sys_f(
142 (u32)(addr >> gmmu_pde_address_shift_v())); 144 (u32)(addr >> gmmu_pde_address_shift_v()));
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm,
215 217
216 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, 218 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
217 gmmu_pte_aperture_sys_mem_ncoh_f(), 219 gmmu_pte_aperture_sys_mem_ncoh_f(),
220 gmmu_pte_aperture_sys_mem_coh_f(),
218 gmmu_pte_aperture_video_memory_f()) | 221 gmmu_pte_aperture_video_memory_f()) |
219 gmmu_pte_kind_f(attrs->kind_v) | 222 gmmu_pte_kind_f(attrs->kind_v) |
220 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); 223 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
268 page_size >> 10, 271 page_size >> 10,
269 nvgpu_gmmu_perm_str(attrs->rw_flag), 272 nvgpu_gmmu_perm_str(attrs->rw_flag),
270 attrs->kind_v, 273 attrs->kind_v,
271 nvgpu_aperture_str(attrs->aperture), 274 nvgpu_aperture_str(g, attrs->aperture),
272 attrs->cacheable ? 'C' : '-', 275 attrs->cacheable ? 'C' : '-',
273 attrs->sparse ? 'S' : '-', 276 attrs->sparse ? 'S' : '-',
274 attrs->priv ? 'P' : '-', 277 attrs->priv ? 'P' : '-',
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
363 gk20a_dbg_info("pde pa=0x%llx", pdb_addr); 366 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
364 367
365 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 368 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
366 nvgpu_aperture_mask(g, vm->pdb.mem, 369 nvgpu_aperture_mask(g, vm->pdb.mem,
367 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 370 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
368 ram_in_page_dir_base_target_vid_mem_f()) | 371 ram_in_page_dir_base_target_sys_mem_coh_f(),
369 ram_in_page_dir_base_vol_true_f() | 372 ram_in_page_dir_base_target_vid_mem_f()) |
370 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 373 ram_in_page_dir_base_vol_true_f() |
374 ram_in_page_dir_base_lo_f(pdb_addr_lo));
371 375
372 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), 376 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
373 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 377 ram_in_page_dir_base_hi_f(pdb_addr_hi));
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 05d0473e..711aeb0d 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 u32 lo = (u32)(addr & 0xfffff); 41 u32 lo = (u32)(addr & 0xfffff);
42 u32 win = nvgpu_aperture_mask(g, mem, 42 u32 win = nvgpu_aperture_mask(g, mem,
43 bus_bar0_window_target_sys_mem_noncoherent_f(), 43 bus_bar0_window_target_sys_mem_noncoherent_f(),
44 bus_bar0_window_target_sys_mem_coherent_f(),
44 bus_bar0_window_target_vid_mem_f()) | 45 bus_bar0_window_target_vid_mem_f()) |
45 bus_bar0_window_base_f(hi); 46 bus_bar0_window_base_f(hi);
46 47