summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gv11b
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-09 14:57:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-27 19:03:43 -0500
commit1170687c33f7506f39aaf47acee5430233e3d1a8 (patch)
treeb41146a14a78eba99b9f326ef63efbe8ba77caab /drivers/gpu/nvgpu/gv11b
parent71f53272b28b1086b3f34e5e255815c37504ac2c (diff)
gpu: nvgpu: Use coherent aperture flag
When using a coherent DMA API wee must make sure to program any aperture fields with the coherent aperture setting. To do this the nvgpu_aperture_mask() function was modified to take a third aperture mask argument, a coherent setting, so that code can use this function to generate coherent aperture settings. The aperture choice is some what tricky: the default version of this function uses the state of the DMA API to determine what aperture to use for SYSMEM: either coherent or non-coherent internally. Thus a kernel user need only specify the normal nvgpu_mem struct and the correct mask should be chosen. Due to many uses of nvgpu_mem structs not created directly from the DMA API wrapper it's easier to translate SYSMEM to SYSMEM_COH after creation. However, the GMMU mapping code, will encounter buffers from userspace with difference coerency attributes than the DMA API. Thus the __nvgpu_aperture_mask() really respects the aperture setting passed in regardless of the DMA API state. This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT since this is either passed in from userspace or set by the kernel when using coherent DMA. The aperture field in attrs is upgraded to coh if this flag is set. This change also adds a coherent sysmem mask everywhere that it can. There's a couple places that do not have a coherent register field defined yet. These need to eventually be defined and added. Lastly the aperture mask code has been mvoed from the Linux vm.c code to the general vm.c code since this function has no Linux dependencies. Note: depends on https://git-master.nvidia.com/r/1664536 for new register fields. JIRA EVLR-2333 Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655220 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gv11b')
-rw-r--r--drivers/gpu/nvgpu/gv11b/acr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c11
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c10
-rw-r--r--drivers/gpu/nvgpu/gv11b/mm_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/gv11b/pmu_gv11b.c8
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c5
7 files changed, 33 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
index 799b2db4..4fa3f324 100644
--- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
@@ -27,9 +27,10 @@
27#include <nvgpu/nvgpu_common.h> 27#include <nvgpu/nvgpu_common.h>
28#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
29#include <nvgpu/nvgpu_mem.h> 29#include <nvgpu/nvgpu_mem.h>
30#include <nvgpu/acr/nvgpu_acr.h>
31#include <nvgpu/firmware.h> 30#include <nvgpu/firmware.h>
32#include <nvgpu/mm.h> 31#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/acr/nvgpu_acr.h>
33 34
34#include "gk20a/gk20a.h" 35#include "gk20a/gk20a.h"
35#include "acr_gv11b.h" 36#include "acr_gv11b.h"
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
220 pwr_pmu_new_instblk_ptr_f( 221 pwr_pmu_new_instblk_ptr_f(
221 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 222 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
222 pwr_pmu_new_instblk_valid_f(1) | 223 pwr_pmu_new_instblk_valid_f(1) |
223 pwr_pmu_new_instblk_target_sys_ncoh_f()); 224 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
225 pwr_pmu_new_instblk_target_sys_coh_f() :
226 pwr_pmu_new_instblk_target_sys_ncoh_f())) ;
224 227
225 /*copy bootloader interface structure to dmem*/ 228 /*copy bootloader interface structure to dmem*/
226 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, 229 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
index 617ea61d..86977bb3 100644
--- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
@@ -31,14 +31,14 @@
31#include <nvgpu/dma.h> 31#include <nvgpu/dma.h>
32#include <nvgpu/mm.h> 32#include <nvgpu/mm.h>
33#include <nvgpu/sizes.h> 33#include <nvgpu/sizes.h>
34#include <nvgpu/enabled.h>
35#include <nvgpu/log.h>
36#include <nvgpu/bug.h>
34 37
35#include "gk20a/gk20a.h" 38#include "gk20a/gk20a.h"
36#include "gk20a/css_gr_gk20a.h" 39#include "gk20a/css_gr_gk20a.h"
37#include "css_gr_gv11b.h" 40#include "css_gr_gv11b.h"
38 41
39#include <nvgpu/log.h>
40#include <nvgpu/bug.h>
41
42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> 42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h> 43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
44 44
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
144 perf_pmasys_mem_block_valid_true_f() | 144 perf_pmasys_mem_block_valid_true_f() |
145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, 145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
146 perf_pmasys_mem_block_target_sys_ncoh_f(), 146 perf_pmasys_mem_block_target_sys_ncoh_f(),
147 perf_pmasys_mem_block_target_sys_coh_f(),
147 perf_pmasys_mem_block_target_lfb_f())); 148 perf_pmasys_mem_block_target_lfb_f()));
148 149
149 150
diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
index e5d88e8c..562476ca 100644
--- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; 59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
60 60
61 gk20a_writel(g, perf_pmasys_mem_block_r(), 61 gk20a_writel(g, perf_pmasys_mem_block_r(),
62 perf_pmasys_mem_block_base_f(inst_pa_page) | 62 perf_pmasys_mem_block_base_f(inst_pa_page) |
63 perf_pmasys_mem_block_valid_true_f() | 63 perf_pmasys_mem_block_valid_true_f() |
64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, 64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
65+ perf_pmasys_mem_block_target_sys_ncoh_f(), 65 perf_pmasys_mem_block_target_sys_ncoh_f(),
66+ perf_pmasys_mem_block_target_lfb_f())); 66 perf_pmasys_mem_block_target_sys_coh_f(),
67 perf_pmasys_mem_block_target_lfb_f()));
67 68
68 gk20a_idle(g); 69 gk20a_idle(g);
69 return 0; 70 return 0;
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 97ab7aab..6b4b07a6 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist)
101 c->runqueue_sel) | 101 c->runqueue_sel) |
102 ram_rl_entry_chan_userd_target_f( 102 ram_rl_entry_chan_userd_target_f(
103 nvgpu_aperture_mask(g, &g->fifo.userd, 103 nvgpu_aperture_mask(g, &g->fifo.userd,
104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), 104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
105 ram_rl_entry_chan_userd_target_vid_mem_v())) | 105 ram_rl_entry_chan_userd_target_sys_mem_coh_v(),
106 ram_rl_entry_chan_userd_target_vid_mem_v())) |
106 ram_rl_entry_chan_inst_target_f( 107 ram_rl_entry_chan_inst_target_f(
107 nvgpu_aperture_mask(g, &c->inst_block, 108 nvgpu_aperture_mask(g, &c->inst_block,
108 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), 109 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
109 ram_rl_entry_chan_inst_target_vid_mem_v())); 110 ram_rl_entry_chan_inst_target_sys_mem_coh_v(),
111 ram_rl_entry_chan_inst_target_vid_mem_v()));
110 112
111 addr_lo = u64_lo32(c->userd_iova) >> 113 addr_lo = u64_lo32(c->userd_iova) >>
112 ram_rl_entry_chan_userd_ptr_align_shift_v(); 114 ram_rl_entry_chan_userd_ptr_align_shift_v();
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index ade1d9fe..b46ecb0a 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -26,6 +26,7 @@
26#include <nvgpu/dma.h> 26#include <nvgpu/dma.h>
27#include <nvgpu/log.h> 27#include <nvgpu/log.h>
28#include <nvgpu/mm.h> 28#include <nvgpu/mm.h>
29#include <nvgpu/enabled.h>
29 30
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
31#include "gk20a/mm_gk20a.h" 32#include "gk20a/mm_gk20a.h"
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g)
292 293
293 gk20a_writel(g, bus_bar2_block_r(), 294 gk20a_writel(g, bus_bar2_block_r(),
294 nvgpu_aperture_mask(g, inst_block, 295 nvgpu_aperture_mask(g, inst_block,
295 bus_bar2_block_target_sys_mem_ncoh_f(), 296 bus_bar2_block_target_sys_mem_ncoh_f(),
296 bus_bar2_block_target_vid_mem_f()) | 297 bus_bar2_block_target_sys_mem_coh_f(),
298 bus_bar2_block_target_vid_mem_f()) |
297 bus_bar2_block_mode_virtual_f() | 299 bus_bar2_block_mode_virtual_f() |
298 bus_bar2_block_ptr_f(inst_pa)); 300 bus_bar2_block_ptr_f(inst_pa));
299 301
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
index 7dd4f8f4..13e70eca 100644
--- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
195 195
196 gk20a_writel(g, pwr_pmu_new_instblk_r(), 196 gk20a_writel(g, pwr_pmu_new_instblk_r(),
197 pwr_pmu_new_instblk_ptr_f( 197 pwr_pmu_new_instblk_ptr_f(
198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) 198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) |
199 | pwr_pmu_new_instblk_valid_f(1) 199 pwr_pmu_new_instblk_valid_f(1) |
200 | pwr_pmu_new_instblk_target_sys_ncoh_f()); 200 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
201 pwr_pmu_new_instblk_target_sys_coh_f() :
202 pwr_pmu_new_instblk_target_sys_ncoh_f()));
201 203
202 /* TBD: load all other surfaces */ 204 /* TBD: load all other surfaces */
203 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( 205 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index 05d7dee0..bda4c8e4 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm,
177 u32 pdb_addr_lo, pdb_addr_hi; 177 u32 pdb_addr_lo, pdb_addr_hi;
178 u64 pdb_addr; 178 u64 pdb_addr;
179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, 179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem,
180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
181 ram_in_sc_page_dir_base_target_vid_mem_v()); 181 ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
182 ram_in_sc_page_dir_base_target_vid_mem_v());
182 183
183 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); 184 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
184 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 185 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());