summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-09 14:57:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-27 19:03:43 -0500
commit1170687c33f7506f39aaf47acee5430233e3d1a8 (patch)
treeb41146a14a78eba99b9f326ef63efbe8ba77caab /drivers/gpu/nvgpu/gp10b
parent71f53272b28b1086b3f34e5e255815c37504ac2c (diff)
gpu: nvgpu: Use coherent aperture flag
When using a coherent DMA API wee must make sure to program any aperture fields with the coherent aperture setting. To do this the nvgpu_aperture_mask() function was modified to take a third aperture mask argument, a coherent setting, so that code can use this function to generate coherent aperture settings. The aperture choice is some what tricky: the default version of this function uses the state of the DMA API to determine what aperture to use for SYSMEM: either coherent or non-coherent internally. Thus a kernel user need only specify the normal nvgpu_mem struct and the correct mask should be chosen. Due to many uses of nvgpu_mem structs not created directly from the DMA API wrapper it's easier to translate SYSMEM to SYSMEM_COH after creation. However, the GMMU mapping code, will encounter buffers from userspace with difference coerency attributes than the DMA API. Thus the __nvgpu_aperture_mask() really respects the aperture setting passed in regardless of the DMA API state. This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT since this is either passed in from userspace or set by the kernel when using coherent DMA. The aperture field in attrs is upgraded to coh if this flag is set. This change also adds a coherent sysmem mask everywhere that it can. There's a couple places that do not have a coherent register field defined yet. These need to eventually be defined and added. Lastly the aperture mask code has been mvoed from the Linux vm.c code to the general vm.c code since this function has no Linux dependencies. Note: depends on https://git-master.nvidia.com/r/1664536 for new register fields. JIRA EVLR-2333 Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655220 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b')
-rw-r--r--drivers/gpu/nvgpu/gp10b/fifo_gp10b.c6
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c28
2 files changed, 20 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index c82fb1cc..1436a260 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -25,6 +25,7 @@
25#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
26#include <nvgpu/bug.h> 26#include <nvgpu/bug.h>
27#include <nvgpu/log2.h> 27#include <nvgpu/log2.h>
28#include <nvgpu/enabled.h>
28 29
29#include "fifo_gp10b.h" 30#include "fifo_gp10b.h"
30 31
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
78 nvgpu_mem_wr32(g, &c->inst_block, 79 nvgpu_mem_wr32(g, &c->inst_block,
79 ram_in_ramfc_w() + ram_fc_userd_w(), 80 ram_in_ramfc_w() + ram_fc_userd_w(),
80 nvgpu_aperture_mask(g, &g->fifo.userd, 81 nvgpu_aperture_mask(g, &g->fifo.userd,
81 pbdma_userd_target_sys_mem_ncoh_f(), 82 pbdma_userd_target_sys_mem_ncoh_f(),
82 pbdma_userd_target_vid_mem_f()) | 83 pbdma_userd_target_sys_mem_coh_f(),
84 pbdma_userd_target_vid_mem_f()) |
83 pbdma_userd_addr_f(addr_lo)); 85 pbdma_userd_addr_f(addr_lo));
84 86
85 nvgpu_mem_wr32(g, &c->inst_block, 87 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0439dda9..7ff5f6a6 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g)
124 124
125 gk20a_writel(g, bus_bar2_block_r(), 125 gk20a_writel(g, bus_bar2_block_r(),
126 nvgpu_aperture_mask(g, inst_block, 126 nvgpu_aperture_mask(g, inst_block,
127 bus_bar2_block_target_sys_mem_ncoh_f(), 127 bus_bar2_block_target_sys_mem_ncoh_f(),
128 bus_bar2_block_target_vid_mem_f()) | 128 bus_bar2_block_target_sys_mem_coh_f(),
129 bus_bar2_block_target_vid_mem_f()) |
129 bus_bar2_block_mode_virtual_f() | 130 bus_bar2_block_mode_virtual_f() |
130 bus_bar2_block_ptr_f(inst_pa)); 131 bus_bar2_block_ptr_f(inst_pa));
131 132
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
148 phys_addr >>= gmmu_new_pde_address_shift_v(); 149 phys_addr >>= gmmu_new_pde_address_shift_v();
149 150
150 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 151 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
151 gmmu_new_pde_aperture_sys_mem_ncoh_f(), 152 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
152 gmmu_new_pde_aperture_video_memory_f()); 153 gmmu_new_pde_aperture_sys_mem_coh_f(),
154 gmmu_new_pde_aperture_video_memory_f());
153 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); 155 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
154 pde_v[0] |= gmmu_new_pde_vol_true_f(); 156 pde_v[0] |= gmmu_new_pde_vol_true_f();
155 pde_v[1] |= phys_addr >> 24; 157 pde_v[1] |= phys_addr >> 24;
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
194 gmmu_new_dual_pde_address_small_sys_f(small_addr); 196 gmmu_new_dual_pde_address_small_sys_f(small_addr);
195 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, 197 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
196 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), 198 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
199 gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(),
197 gmmu_new_dual_pde_aperture_small_video_memory_f()); 200 gmmu_new_dual_pde_aperture_small_video_memory_f());
198 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); 201 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
199 pde_v[3] |= small_addr >> 24; 202 pde_v[3] |= small_addr >> 24;
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
204 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); 207 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
205 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 208 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
206 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), 209 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
210 gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(),
207 gmmu_new_dual_pde_aperture_big_video_memory_f()); 211 gmmu_new_dual_pde_aperture_big_video_memory_f());
208 pde_v[1] |= big_addr >> 28; 212 pde_v[1] |= big_addr >> 28;
209 } 213 }
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm,
240 gmmu_new_pte_address_sys_f(phys_shifted) : 244 gmmu_new_pte_address_sys_f(phys_shifted) :
241 gmmu_new_pte_address_vid_f(phys_shifted); 245 gmmu_new_pte_address_vid_f(phys_shifted);
242 u32 pte_tgt = __nvgpu_aperture_mask(g, 246 u32 pte_tgt = __nvgpu_aperture_mask(g,
243 attrs->aperture, 247 attrs->aperture,
244 attrs->coherent ? 248 gmmu_new_pte_aperture_sys_mem_ncoh_f(),
245 gmmu_new_pte_aperture_sys_mem_coh_f() : 249 gmmu_new_pte_aperture_sys_mem_coh_f(),
246 gmmu_new_pte_aperture_sys_mem_ncoh_f(), 250 gmmu_new_pte_aperture_video_memory_f());
247 gmmu_new_pte_aperture_video_memory_f());
248 251
249 pte_w[0] = pte_valid | pte_addr | pte_tgt; 252 pte_w[0] = pte_valid | pte_addr | pte_tgt;
250 253
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
306 page_size >> 10, 309 page_size >> 10,
307 nvgpu_gmmu_perm_str(attrs->rw_flag), 310 nvgpu_gmmu_perm_str(attrs->rw_flag),
308 attrs->kind_v, 311 attrs->kind_v,
309 nvgpu_aperture_str(attrs->aperture), 312 nvgpu_aperture_str(g, attrs->aperture),
310 attrs->cacheable ? 'C' : '-', 313 attrs->cacheable ? 'C' : '-',
311 attrs->sparse ? 'S' : '-', 314 attrs->sparse ? 'S' : '-',
312 attrs->priv ? 'P' : '-', 315 attrs->priv ? 'P' : '-',
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
428 431
429 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 432 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
430 nvgpu_aperture_mask(g, vm->pdb.mem, 433 nvgpu_aperture_mask(g, vm->pdb.mem,
431 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 434 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
432 ram_in_page_dir_base_target_vid_mem_f()) | 435 ram_in_page_dir_base_target_sys_mem_coh_f(),
436 ram_in_page_dir_base_target_vid_mem_f()) |
433 ram_in_page_dir_base_vol_true_f() | 437 ram_in_page_dir_base_vol_true_f() |
434 ram_in_big_page_size_64kb_f() | 438 ram_in_big_page_size_64kb_f() |
435 ram_in_page_dir_base_lo_f(pdb_addr_lo) | 439 ram_in_page_dir_base_lo_f(pdb_addr_lo) |