summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2018-02-09 14:57:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-02-27 19:03:43 -0500
commit1170687c33f7506f39aaf47acee5430233e3d1a8 (patch)
treeb41146a14a78eba99b9f326ef63efbe8ba77caab
parent71f53272b28b1086b3f34e5e255815c37504ac2c (diff)
gpu: nvgpu: Use coherent aperture flag
When using a coherent DMA API wee must make sure to program any aperture fields with the coherent aperture setting. To do this the nvgpu_aperture_mask() function was modified to take a third aperture mask argument, a coherent setting, so that code can use this function to generate coherent aperture settings. The aperture choice is some what tricky: the default version of this function uses the state of the DMA API to determine what aperture to use for SYSMEM: either coherent or non-coherent internally. Thus a kernel user need only specify the normal nvgpu_mem struct and the correct mask should be chosen. Due to many uses of nvgpu_mem structs not created directly from the DMA API wrapper it's easier to translate SYSMEM to SYSMEM_COH after creation. However, the GMMU mapping code, will encounter buffers from userspace with difference coerency attributes than the DMA API. Thus the __nvgpu_aperture_mask() really respects the aperture setting passed in regardless of the DMA API state. This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT since this is either passed in from userspace or set by the kernel when using coherent DMA. The aperture field in attrs is upgraded to coh if this flag is set. This change also adds a coherent sysmem mask everywhere that it can. There's a couple places that do not have a coherent register field defined yet. These need to eventually be defined and added. Lastly the aperture mask code has been mvoed from the Linux vm.c code to the general vm.c code since this function has no Linux dependencies. Note: depends on https://git-master.nvidia.com/r/1664536 for new register fields. JIRA EVLR-2333 Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655220 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c24
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c3
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c9
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c46
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.c6
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c12
-rw-r--r--drivers/gpu/nvgpu/gp106/sec2_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/fifo_gp10b.c6
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c28
-rw-r--r--drivers/gpu/nvgpu/gv11b/acr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c11
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c10
-rw-r--r--drivers/gpu/nvgpu/gv11b/mm_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/gv11b/pmu_gv11b.c8
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c5
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h23
24 files changed, 184 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e441ec76..c859520d 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -34,30 +34,6 @@
34#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h" 35#include "gk20a/mm_gk20a.h"
36 36
37u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
38 u32 sysmem_mask, u32 vidmem_mask)
39{
40 switch (aperture) {
41 case APERTURE_SYSMEM:
42 /* some igpus consider system memory vidmem */
43 return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
44 ? sysmem_mask : vidmem_mask;
45 case APERTURE_VIDMEM:
46 /* for dgpus only */
47 return vidmem_mask;
48 case APERTURE_INVALID:
49 WARN_ON("Bad aperture");
50 }
51 return 0;
52}
53
54u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
55 u32 sysmem_mask, u32 vidmem_mask)
56{
57 return __nvgpu_aperture_mask(g, mem->aperture,
58 sysmem_mask, vidmem_mask);
59}
60
61int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) 37int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
62{ 38{
63 void *cpu_va; 39 void *cpu_va;
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index e3ca4eda..52b2f30c 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
167 vm_aspace_id(vm), 167 vm_aspace_id(vm),
168 mapped_buffer->flags, 168 mapped_buffer->flags,
169 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); 169 nvgpu_aperture_str(g,
170 gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
170 171
171 return mapped_buffer; 172 return mapped_buffer;
172} 173}
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 5abf5951..41343718 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -634,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 page_size >> 10, 634 page_size >> 10,
635 nvgpu_gmmu_perm_str(attrs->rw_flag), 635 nvgpu_gmmu_perm_str(attrs->rw_flag),
636 attrs->kind_v, 636 attrs->kind_v,
637 nvgpu_aperture_str(attrs->aperture), 637 nvgpu_aperture_str(g, attrs->aperture),
638 attrs->cacheable ? 'C' : '-', 638 attrs->cacheable ? 'C' : '-',
639 attrs->sparse ? 'S' : '-', 639 attrs->sparse ? 'S' : '-',
640 attrs->priv ? 'P' : '-', 640 attrs->priv ? 'P' : '-',
@@ -712,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
712 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); 712 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
713 713
714 /* 714 /*
715 * Handle the IO coherency aperture: make sure the .aperture field is
716 * correct based on the IO coherency flag.
717 */
718 if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
719 attrs.aperture = __APERTURE_SYSMEM_COH;
720
721 /*
715 * Only allocate a new GPU VA range if we haven't already been passed a 722 * Only allocate a new GPU VA range if we haven't already been passed a
716 * GPU VA range. This facilitates fixed mappings. 723 * GPU VA range. This facilitates fixed mappings.
717 */ 724 */
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 73b6b2a7..78a57b4e 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,52 @@
28 28
29#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
30 30
31/*
32 * Make sure to use the right coherency aperture if you use this function! This
33 * will not add any checks. If you want to simply use the default coherency then
34 * use nvgpu_aperture_mask().
35 */
36u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
37 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
38{
39 /*
40 * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
41 * "sysmem" aperture should really be translated to VIDMEM.
42 */
43 if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
44 aperture = APERTURE_VIDMEM;
45
46 switch (aperture) {
47 case __APERTURE_SYSMEM_COH:
48 return sysmem_coh_mask;
49 case APERTURE_SYSMEM:
50 return sysmem_mask;
51 case APERTURE_VIDMEM:
52 return vidmem_mask;
53 case APERTURE_INVALID:
54 WARN_ON("Bad aperture");
55 }
56 return 0;
57}
58
59u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
60 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
61{
62 enum nvgpu_aperture ap = mem->aperture;
63
64 /*
65 * Handle the coherent aperture: ideally most of the driver is not
66 * aware of the difference between coherent and non-coherent sysmem so
67 * we add this translation step here.
68 */
69 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
70 ap == APERTURE_SYSMEM)
71 ap = __APERTURE_SYSMEM_COH;
72
73 return __nvgpu_aperture_mask(g, ap,
74 sysmem_mask, sysmem_coh_mask, vidmem_mask);
75}
76
31void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) 77void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
32{ 78{
33 return sgt->ops->sgl_next(sgl); 79 return sgt->ops->sgl_next(sgl);
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 7f0cfe58..b2800772 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23#include <nvgpu/page_allocator.h> 23#include <nvgpu/page_allocator.h>
24#include <nvgpu/enabled.h>
24#include <nvgpu/log.h> 25#include <nvgpu/log.h>
25#include <nvgpu/soc.h> 26#include <nvgpu/soc.h>
26#include <nvgpu/bus.h> 27#include <nvgpu/bus.h>
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
155 156
156 gk20a_writel(g, bus_bar1_block_r(), 157 gk20a_writel(g, bus_bar1_block_r(),
157 nvgpu_aperture_mask(g, bar1_inst, 158 nvgpu_aperture_mask(g, bar1_inst,
158 bus_bar1_block_target_sys_mem_ncoh_f(), 159 bus_bar1_block_target_sys_mem_ncoh_f(),
159 bus_bar1_block_target_vid_mem_f()) | 160 bus_bar1_block_target_sys_mem_coh_f(),
161 bus_bar1_block_target_vid_mem_f()) |
160 bus_bar1_block_mode_virtual_f() | 162 bus_bar1_block_mode_virtual_f() |
161 bus_bar1_block_ptr_f(ptr_v)); 163 bus_bar1_block_ptr_f(ptr_v));
162 164
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index a5a2cb51..e3052701 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(), 98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
99 fb_mmu_invalidate_pdb_addr_f(addr_lo) | 99 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
100 nvgpu_aperture_mask(g, pdb, 100 nvgpu_aperture_mask(g, pdb,
101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(), 101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
102 fb_mmu_invalidate_pdb_aperture_vid_mem_f())); 102 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
103 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
103 104
104 gk20a_writel(g, fb_mmu_invalidate_r(), 105 gk20a_writel(g, fb_mmu_invalidate_r(),
105 fb_mmu_invalidate_all_va_true_f() | 106 fb_mmu_invalidate_all_va_true_f() |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 409661fc..4fda0d2e 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
653 return -ENOMEM; 653 return -ENOMEM;
654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), 655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); 657 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
657 658
658 if (nvgpu_mem_begin(g, mem)) 659 if (nvgpu_mem_begin(g, mem))
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dd0b78c0..247557aa 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3240,8 +3240,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3240 gk20a_writel(g, fifo_runlist_base_r(), 3240 gk20a_writel(g, fifo_runlist_base_r(),
3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | 3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf], 3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf],
3243 fifo_runlist_base_target_sys_mem_ncoh_f(), 3243 fifo_runlist_base_target_sys_mem_ncoh_f(),
3244 fifo_runlist_base_target_vid_mem_f())); 3244 fifo_runlist_base_target_sys_mem_coh_f(),
3245 fifo_runlist_base_target_vid_mem_f()));
3245 } 3246 }
3246 3247
3247 gk20a_writel(g, fifo_runlist_r(), 3248 gk20a_writel(g, fifo_runlist_r(),
@@ -3763,8 +3764,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
3763 nvgpu_mem_wr32(g, &c->inst_block, 3764 nvgpu_mem_wr32(g, &c->inst_block,
3764 ram_in_ramfc_w() + ram_fc_userd_w(), 3765 ram_in_ramfc_w() + ram_fc_userd_w(),
3765 nvgpu_aperture_mask(g, &g->fifo.userd, 3766 nvgpu_aperture_mask(g, &g->fifo.userd,
3766 pbdma_userd_target_sys_mem_ncoh_f(), 3767 pbdma_userd_target_sys_mem_ncoh_f(),
3767 pbdma_userd_target_vid_mem_f()) | 3768 pbdma_userd_target_sys_mem_coh_f(),
3769 pbdma_userd_target_vid_mem_f()) |
3768 pbdma_userd_addr_f(addr_lo)); 3770 pbdma_userd_addr_f(addr_lo));
3769 3771
3770 nvgpu_mem_wr32(g, &c->inst_block, 3772 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 6ae743ef..2cde10ec 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> 745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
746 ram_in_base_shift_v(); 746 ram_in_base_shift_v();
747 u32 aperture = nvgpu_aperture_mask(g, inst_block, 747 u32 aperture = nvgpu_aperture_mask(g, inst_block,
748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(), 748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
749 gr_fecs_current_ctx_target_vid_mem_f()); 749 gr_fecs_current_ctx_target_sys_mem_coh_f(),
750 gr_fecs_current_ctx_target_vid_mem_f());
750 751
751 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | 752 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
752 gr_fecs_current_ctx_valid_f(1); 753 gr_fecs_current_ctx_valid_f(1);
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2171 2172
2172 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); 2173 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2173 gk20a_writel(g, gr_fecs_new_ctx_r(), 2174 gk20a_writel(g, gr_fecs_new_ctx_r(),
2174 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 2175 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2175 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2176 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2176 gr_fecs_new_ctx_target_sys_mem_ncoh_f(), 2177 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2178 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2177 gr_fecs_new_ctx_target_vid_mem_f()) | 2179 gr_fecs_new_ctx_target_vid_mem_f()) |
2178 gr_fecs_new_ctx_valid_m()); 2180 gr_fecs_new_ctx_valid_m());
2179 2181
2180 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), 2182 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2181 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | 2183 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2182 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2184 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2183 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), 2185 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2186 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2184 gr_fecs_arb_ctx_ptr_target_vid_mem_f())); 2187 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2185 2188
2186 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); 2189 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4379 4382
4380 gk20a_writel(g, fb_mmu_debug_wr_r(), 4383 gk20a_writel(g, fb_mmu_debug_wr_r(),
4381 nvgpu_aperture_mask(g, &gr->mmu_wr_mem, 4384 nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
4382 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4385 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4383 fb_mmu_debug_wr_aperture_vid_mem_f()) | 4386 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4387 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4384 fb_mmu_debug_wr_vol_false_f() | 4388 fb_mmu_debug_wr_vol_false_f() |
4385 fb_mmu_debug_wr_addr_f(addr)); 4389 fb_mmu_debug_wr_addr_f(addr));
4386 4390
@@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4389 4393
4390 gk20a_writel(g, fb_mmu_debug_rd_r(), 4394 gk20a_writel(g, fb_mmu_debug_rd_r(),
4391 nvgpu_aperture_mask(g, &gr->mmu_rd_mem, 4395 nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
4392 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4396 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4393 fb_mmu_debug_rd_aperture_vid_mem_f()) | 4397 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4398 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4394 fb_mmu_debug_rd_vol_false_f() | 4399 fb_mmu_debug_rd_vol_false_f() |
4395 fb_mmu_debug_rd_addr_f(addr)); 4400 fb_mmu_debug_rd_addr_f(addr));
4396 4401
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b27d1109..4ff6125b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
122{ 122{
123 u32 pde0_bits = 123 u32 pde0_bits =
124 nvgpu_aperture_mask(g, pd->mem, 124 nvgpu_aperture_mask(g, pd->mem,
125 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 125 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
126 gmmu_pde_aperture_big_video_memory_f()) | 126 gmmu_pde_aperture_big_sys_mem_coh_f(),
127 gmmu_pde_aperture_big_video_memory_f()) |
127 gmmu_pde_address_big_sys_f( 128 gmmu_pde_address_big_sys_f(
128 (u32)(addr >> gmmu_pde_address_shift_v())); 129 (u32)(addr >> gmmu_pde_address_shift_v()));
129 130
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
135{ 136{
136 u32 pde1_bits = 137 u32 pde1_bits =
137 nvgpu_aperture_mask(g, pd->mem, 138 nvgpu_aperture_mask(g, pd->mem,
138 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 139 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
139 gmmu_pde_aperture_small_video_memory_f()) | 140 gmmu_pde_aperture_small_sys_mem_coh_f(),
141 gmmu_pde_aperture_small_video_memory_f()) |
140 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 142 gmmu_pde_vol_small_true_f() | /* tbd: why? */
141 gmmu_pde_address_small_sys_f( 143 gmmu_pde_address_small_sys_f(
142 (u32)(addr >> gmmu_pde_address_shift_v())); 144 (u32)(addr >> gmmu_pde_address_shift_v()));
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm,
215 217
216 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, 218 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
217 gmmu_pte_aperture_sys_mem_ncoh_f(), 219 gmmu_pte_aperture_sys_mem_ncoh_f(),
220 gmmu_pte_aperture_sys_mem_coh_f(),
218 gmmu_pte_aperture_video_memory_f()) | 221 gmmu_pte_aperture_video_memory_f()) |
219 gmmu_pte_kind_f(attrs->kind_v) | 222 gmmu_pte_kind_f(attrs->kind_v) |
220 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); 223 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
268 page_size >> 10, 271 page_size >> 10,
269 nvgpu_gmmu_perm_str(attrs->rw_flag), 272 nvgpu_gmmu_perm_str(attrs->rw_flag),
270 attrs->kind_v, 273 attrs->kind_v,
271 nvgpu_aperture_str(attrs->aperture), 274 nvgpu_aperture_str(g, attrs->aperture),
272 attrs->cacheable ? 'C' : '-', 275 attrs->cacheable ? 'C' : '-',
273 attrs->sparse ? 'S' : '-', 276 attrs->sparse ? 'S' : '-',
274 attrs->priv ? 'P' : '-', 277 attrs->priv ? 'P' : '-',
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
363 gk20a_dbg_info("pde pa=0x%llx", pdb_addr); 366 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
364 367
365 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 368 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
366 nvgpu_aperture_mask(g, vm->pdb.mem, 369 nvgpu_aperture_mask(g, vm->pdb.mem,
367 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 370 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
368 ram_in_page_dir_base_target_vid_mem_f()) | 371 ram_in_page_dir_base_target_sys_mem_coh_f(),
369 ram_in_page_dir_base_vol_true_f() | 372 ram_in_page_dir_base_target_vid_mem_f()) |
370 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 373 ram_in_page_dir_base_vol_true_f() |
374 ram_in_page_dir_base_lo_f(pdb_addr_lo));
371 375
372 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), 376 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
373 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 377 ram_in_page_dir_base_hi_f(pdb_addr_hi));
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 05d0473e..711aeb0d 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 u32 lo = (u32)(addr & 0xfffff); 41 u32 lo = (u32)(addr & 0xfffff);
42 u32 win = nvgpu_aperture_mask(g, mem, 42 u32 win = nvgpu_aperture_mask(g, mem,
43 bus_bar0_window_target_sys_mem_noncoherent_f(), 43 bus_bar0_window_target_sys_mem_noncoherent_f(),
44 bus_bar0_window_target_sys_mem_coherent_f(),
44 bus_bar0_window_target_vid_mem_f()) | 45 bus_bar0_window_target_vid_mem_f()) |
45 bus_bar0_window_base_f(hi); 46 bus_bar0_window_base_f(hi);
46 47
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index 34c8d4b7..cdd70d5b 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -25,6 +25,7 @@
25#include <nvgpu/timers.h> 25#include <nvgpu/timers.h>
26#include <nvgpu/bus.h> 26#include <nvgpu/bus.h>
27#include <nvgpu/mm.h> 27#include <nvgpu/mm.h>
28#include <nvgpu/enabled.h>
28 29
29#include "bus_gm20b.h" 30#include "bus_gm20b.h"
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
@@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
43 44
44 gk20a_writel(g, bus_bar1_block_r(), 45 gk20a_writel(g, bus_bar1_block_r(),
45 nvgpu_aperture_mask(g, bar1_inst, 46 nvgpu_aperture_mask(g, bar1_inst,
46 bus_bar1_block_target_sys_mem_ncoh_f(), 47 bus_bar1_block_target_sys_mem_ncoh_f(),
47 bus_bar1_block_target_vid_mem_f()) | 48 bus_bar1_block_target_sys_mem_coh_f(),
49 bus_bar1_block_target_vid_mem_f()) |
48 bus_bar1_block_mode_virtual_f() | 50 bus_bar1_block_mode_virtual_f() |
49 bus_bar1_block_ptr_f(ptr_v)); 51 bus_bar1_block_ptr_f(ptr_v));
50 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); 52 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 0762e8bd..15612995 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -32,6 +32,7 @@
32#include <nvgpu/atomic.h> 32#include <nvgpu/atomic.h>
33#include <nvgpu/barrier.h> 33#include <nvgpu/barrier.h>
34#include <nvgpu/mm.h> 34#include <nvgpu/mm.h>
35#include <nvgpu/enabled.h>
35 36
36#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> 37#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
37#include <nvgpu/hw/gm20b/hw_ram_gm20b.h> 38#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c)
51 52
52 53
53 gk20a_writel(g, ccsr_channel_inst_r(c->chid), 54 gk20a_writel(g, ccsr_channel_inst_r(c->chid),
54 ccsr_channel_inst_ptr_f(inst_ptr) | 55 ccsr_channel_inst_ptr_f(inst_ptr) |
55 nvgpu_aperture_mask(g, &c->inst_block, 56 nvgpu_aperture_mask(g, &c->inst_block,
56 ccsr_channel_inst_target_sys_mem_ncoh_f(), 57 ccsr_channel_inst_target_sys_mem_ncoh_f(),
57 ccsr_channel_inst_target_vid_mem_f()) | 58 ccsr_channel_inst_target_sys_mem_coh_f(),
58 ccsr_channel_inst_bind_true_f()); 59 ccsr_channel_inst_target_vid_mem_f()) |
60 ccsr_channel_inst_bind_true_f());
59 61
60 gk20a_writel(g, ccsr_channel_r(c->chid), 62 gk20a_writel(g, ccsr_channel_r(c->chid),
61 (gk20a_readl(g, ccsr_channel_r(c->chid)) & 63 (gk20a_readl(g, ccsr_channel_r(c->chid)) &
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 29aceb7c..8e4e5900 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
99 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 99 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
100 pwr_pmu_new_instblk_valid_f(1) | 100 pwr_pmu_new_instblk_valid_f(1) |
101 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 101 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
102 pwr_pmu_new_instblk_target_sys_ncoh_f(),
102 pwr_pmu_new_instblk_target_sys_coh_f(), 103 pwr_pmu_new_instblk_target_sys_coh_f(),
103 pwr_pmu_new_instblk_target_fb_f())); 104 pwr_pmu_new_instblk_target_fb_f()));
104 105
@@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
165 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 166 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
166 pwr_pmu_new_instblk_valid_f(1) | 167 pwr_pmu_new_instblk_valid_f(1) |
167 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 168 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
169 pwr_pmu_new_instblk_target_sys_ncoh_f(),
168 pwr_pmu_new_instblk_target_sys_coh_f(), 170 pwr_pmu_new_instblk_target_sys_coh_f(),
169 pwr_pmu_new_instblk_target_fb_f())); 171 pwr_pmu_new_instblk_target_fb_f()));
170 172
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index c82fb1cc..1436a260 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -25,6 +25,7 @@
25#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
26#include <nvgpu/bug.h> 26#include <nvgpu/bug.h>
27#include <nvgpu/log2.h> 27#include <nvgpu/log2.h>
28#include <nvgpu/enabled.h>
28 29
29#include "fifo_gp10b.h" 30#include "fifo_gp10b.h"
30 31
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
78 nvgpu_mem_wr32(g, &c->inst_block, 79 nvgpu_mem_wr32(g, &c->inst_block,
79 ram_in_ramfc_w() + ram_fc_userd_w(), 80 ram_in_ramfc_w() + ram_fc_userd_w(),
80 nvgpu_aperture_mask(g, &g->fifo.userd, 81 nvgpu_aperture_mask(g, &g->fifo.userd,
81 pbdma_userd_target_sys_mem_ncoh_f(), 82 pbdma_userd_target_sys_mem_ncoh_f(),
82 pbdma_userd_target_vid_mem_f()) | 83 pbdma_userd_target_sys_mem_coh_f(),
84 pbdma_userd_target_vid_mem_f()) |
83 pbdma_userd_addr_f(addr_lo)); 85 pbdma_userd_addr_f(addr_lo));
84 86
85 nvgpu_mem_wr32(g, &c->inst_block, 87 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0439dda9..7ff5f6a6 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g)
124 124
125 gk20a_writel(g, bus_bar2_block_r(), 125 gk20a_writel(g, bus_bar2_block_r(),
126 nvgpu_aperture_mask(g, inst_block, 126 nvgpu_aperture_mask(g, inst_block,
127 bus_bar2_block_target_sys_mem_ncoh_f(), 127 bus_bar2_block_target_sys_mem_ncoh_f(),
128 bus_bar2_block_target_vid_mem_f()) | 128 bus_bar2_block_target_sys_mem_coh_f(),
129 bus_bar2_block_target_vid_mem_f()) |
129 bus_bar2_block_mode_virtual_f() | 130 bus_bar2_block_mode_virtual_f() |
130 bus_bar2_block_ptr_f(inst_pa)); 131 bus_bar2_block_ptr_f(inst_pa));
131 132
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
148 phys_addr >>= gmmu_new_pde_address_shift_v(); 149 phys_addr >>= gmmu_new_pde_address_shift_v();
149 150
150 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 151 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
151 gmmu_new_pde_aperture_sys_mem_ncoh_f(), 152 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
152 gmmu_new_pde_aperture_video_memory_f()); 153 gmmu_new_pde_aperture_sys_mem_coh_f(),
154 gmmu_new_pde_aperture_video_memory_f());
153 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); 155 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
154 pde_v[0] |= gmmu_new_pde_vol_true_f(); 156 pde_v[0] |= gmmu_new_pde_vol_true_f();
155 pde_v[1] |= phys_addr >> 24; 157 pde_v[1] |= phys_addr >> 24;
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
194 gmmu_new_dual_pde_address_small_sys_f(small_addr); 196 gmmu_new_dual_pde_address_small_sys_f(small_addr);
195 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, 197 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
196 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), 198 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
199 gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(),
197 gmmu_new_dual_pde_aperture_small_video_memory_f()); 200 gmmu_new_dual_pde_aperture_small_video_memory_f());
198 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); 201 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
199 pde_v[3] |= small_addr >> 24; 202 pde_v[3] |= small_addr >> 24;
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
204 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); 207 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
205 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 208 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
206 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), 209 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
210 gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(),
207 gmmu_new_dual_pde_aperture_big_video_memory_f()); 211 gmmu_new_dual_pde_aperture_big_video_memory_f());
208 pde_v[1] |= big_addr >> 28; 212 pde_v[1] |= big_addr >> 28;
209 } 213 }
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm,
240 gmmu_new_pte_address_sys_f(phys_shifted) : 244 gmmu_new_pte_address_sys_f(phys_shifted) :
241 gmmu_new_pte_address_vid_f(phys_shifted); 245 gmmu_new_pte_address_vid_f(phys_shifted);
242 u32 pte_tgt = __nvgpu_aperture_mask(g, 246 u32 pte_tgt = __nvgpu_aperture_mask(g,
243 attrs->aperture, 247 attrs->aperture,
244 attrs->coherent ? 248 gmmu_new_pte_aperture_sys_mem_ncoh_f(),
245 gmmu_new_pte_aperture_sys_mem_coh_f() : 249 gmmu_new_pte_aperture_sys_mem_coh_f(),
246 gmmu_new_pte_aperture_sys_mem_ncoh_f(), 250 gmmu_new_pte_aperture_video_memory_f());
247 gmmu_new_pte_aperture_video_memory_f());
248 251
249 pte_w[0] = pte_valid | pte_addr | pte_tgt; 252 pte_w[0] = pte_valid | pte_addr | pte_tgt;
250 253
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
306 page_size >> 10, 309 page_size >> 10,
307 nvgpu_gmmu_perm_str(attrs->rw_flag), 310 nvgpu_gmmu_perm_str(attrs->rw_flag),
308 attrs->kind_v, 311 attrs->kind_v,
309 nvgpu_aperture_str(attrs->aperture), 312 nvgpu_aperture_str(g, attrs->aperture),
310 attrs->cacheable ? 'C' : '-', 313 attrs->cacheable ? 'C' : '-',
311 attrs->sparse ? 'S' : '-', 314 attrs->sparse ? 'S' : '-',
312 attrs->priv ? 'P' : '-', 315 attrs->priv ? 'P' : '-',
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
428 431
429 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 432 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
430 nvgpu_aperture_mask(g, vm->pdb.mem, 433 nvgpu_aperture_mask(g, vm->pdb.mem,
431 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 434 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
432 ram_in_page_dir_base_target_vid_mem_f()) | 435 ram_in_page_dir_base_target_sys_mem_coh_f(),
436 ram_in_page_dir_base_target_vid_mem_f()) |
433 ram_in_page_dir_base_vol_true_f() | 437 ram_in_page_dir_base_vol_true_f() |
434 ram_in_big_page_size_64kb_f() | 438 ram_in_big_page_size_64kb_f() |
435 ram_in_page_dir_base_lo_f(pdb_addr_lo) | 439 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
index 799b2db4..4fa3f324 100644
--- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
@@ -27,9 +27,10 @@
27#include <nvgpu/nvgpu_common.h> 27#include <nvgpu/nvgpu_common.h>
28#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
29#include <nvgpu/nvgpu_mem.h> 29#include <nvgpu/nvgpu_mem.h>
30#include <nvgpu/acr/nvgpu_acr.h>
31#include <nvgpu/firmware.h> 30#include <nvgpu/firmware.h>
32#include <nvgpu/mm.h> 31#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/acr/nvgpu_acr.h>
33 34
34#include "gk20a/gk20a.h" 35#include "gk20a/gk20a.h"
35#include "acr_gv11b.h" 36#include "acr_gv11b.h"
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
220 pwr_pmu_new_instblk_ptr_f( 221 pwr_pmu_new_instblk_ptr_f(
221 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 222 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
222 pwr_pmu_new_instblk_valid_f(1) | 223 pwr_pmu_new_instblk_valid_f(1) |
223 pwr_pmu_new_instblk_target_sys_ncoh_f()); 224 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
225 pwr_pmu_new_instblk_target_sys_coh_f() :
226 pwr_pmu_new_instblk_target_sys_ncoh_f())) ;
224 227
225 /*copy bootloader interface structure to dmem*/ 228 /*copy bootloader interface structure to dmem*/
226 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, 229 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
index 617ea61d..86977bb3 100644
--- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
@@ -31,14 +31,14 @@
31#include <nvgpu/dma.h> 31#include <nvgpu/dma.h>
32#include <nvgpu/mm.h> 32#include <nvgpu/mm.h>
33#include <nvgpu/sizes.h> 33#include <nvgpu/sizes.h>
34#include <nvgpu/enabled.h>
35#include <nvgpu/log.h>
36#include <nvgpu/bug.h>
34 37
35#include "gk20a/gk20a.h" 38#include "gk20a/gk20a.h"
36#include "gk20a/css_gr_gk20a.h" 39#include "gk20a/css_gr_gk20a.h"
37#include "css_gr_gv11b.h" 40#include "css_gr_gv11b.h"
38 41
39#include <nvgpu/log.h>
40#include <nvgpu/bug.h>
41
42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> 42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h> 43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
44 44
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
144 perf_pmasys_mem_block_valid_true_f() | 144 perf_pmasys_mem_block_valid_true_f() |
145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, 145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
146 perf_pmasys_mem_block_target_sys_ncoh_f(), 146 perf_pmasys_mem_block_target_sys_ncoh_f(),
147 perf_pmasys_mem_block_target_sys_coh_f(),
147 perf_pmasys_mem_block_target_lfb_f())); 148 perf_pmasys_mem_block_target_lfb_f()));
148 149
149 150
diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
index e5d88e8c..562476ca 100644
--- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; 59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
60 60
61 gk20a_writel(g, perf_pmasys_mem_block_r(), 61 gk20a_writel(g, perf_pmasys_mem_block_r(),
62 perf_pmasys_mem_block_base_f(inst_pa_page) | 62 perf_pmasys_mem_block_base_f(inst_pa_page) |
63 perf_pmasys_mem_block_valid_true_f() | 63 perf_pmasys_mem_block_valid_true_f() |
64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, 64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
65+ perf_pmasys_mem_block_target_sys_ncoh_f(), 65 perf_pmasys_mem_block_target_sys_ncoh_f(),
66+ perf_pmasys_mem_block_target_lfb_f())); 66 perf_pmasys_mem_block_target_sys_coh_f(),
67 perf_pmasys_mem_block_target_lfb_f()));
67 68
68 gk20a_idle(g); 69 gk20a_idle(g);
69 return 0; 70 return 0;
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 97ab7aab..6b4b07a6 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist)
101 c->runqueue_sel) | 101 c->runqueue_sel) |
102 ram_rl_entry_chan_userd_target_f( 102 ram_rl_entry_chan_userd_target_f(
103 nvgpu_aperture_mask(g, &g->fifo.userd, 103 nvgpu_aperture_mask(g, &g->fifo.userd,
104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), 104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
105 ram_rl_entry_chan_userd_target_vid_mem_v())) | 105 ram_rl_entry_chan_userd_target_sys_mem_coh_v(),
106 ram_rl_entry_chan_userd_target_vid_mem_v())) |
106 ram_rl_entry_chan_inst_target_f( 107 ram_rl_entry_chan_inst_target_f(
107 nvgpu_aperture_mask(g, &c->inst_block, 108 nvgpu_aperture_mask(g, &c->inst_block,
108 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), 109 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
109 ram_rl_entry_chan_inst_target_vid_mem_v())); 110 ram_rl_entry_chan_inst_target_sys_mem_coh_v(),
111 ram_rl_entry_chan_inst_target_vid_mem_v()));
110 112
111 addr_lo = u64_lo32(c->userd_iova) >> 113 addr_lo = u64_lo32(c->userd_iova) >>
112 ram_rl_entry_chan_userd_ptr_align_shift_v(); 114 ram_rl_entry_chan_userd_ptr_align_shift_v();
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index ade1d9fe..b46ecb0a 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -26,6 +26,7 @@
26#include <nvgpu/dma.h> 26#include <nvgpu/dma.h>
27#include <nvgpu/log.h> 27#include <nvgpu/log.h>
28#include <nvgpu/mm.h> 28#include <nvgpu/mm.h>
29#include <nvgpu/enabled.h>
29 30
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
31#include "gk20a/mm_gk20a.h" 32#include "gk20a/mm_gk20a.h"
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g)
292 293
293 gk20a_writel(g, bus_bar2_block_r(), 294 gk20a_writel(g, bus_bar2_block_r(),
294 nvgpu_aperture_mask(g, inst_block, 295 nvgpu_aperture_mask(g, inst_block,
295 bus_bar2_block_target_sys_mem_ncoh_f(), 296 bus_bar2_block_target_sys_mem_ncoh_f(),
296 bus_bar2_block_target_vid_mem_f()) | 297 bus_bar2_block_target_sys_mem_coh_f(),
298 bus_bar2_block_target_vid_mem_f()) |
297 bus_bar2_block_mode_virtual_f() | 299 bus_bar2_block_mode_virtual_f() |
298 bus_bar2_block_ptr_f(inst_pa)); 300 bus_bar2_block_ptr_f(inst_pa));
299 301
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
index 7dd4f8f4..13e70eca 100644
--- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
195 195
196 gk20a_writel(g, pwr_pmu_new_instblk_r(), 196 gk20a_writel(g, pwr_pmu_new_instblk_r(),
197 pwr_pmu_new_instblk_ptr_f( 197 pwr_pmu_new_instblk_ptr_f(
198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) 198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) |
199 | pwr_pmu_new_instblk_valid_f(1) 199 pwr_pmu_new_instblk_valid_f(1) |
200 | pwr_pmu_new_instblk_target_sys_ncoh_f()); 200 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
201 pwr_pmu_new_instblk_target_sys_coh_f() :
202 pwr_pmu_new_instblk_target_sys_ncoh_f()));
201 203
202 /* TBD: load all other surfaces */ 204 /* TBD: load all other surfaces */
203 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( 205 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index 05d7dee0..bda4c8e4 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm,
177 u32 pdb_addr_lo, pdb_addr_hi; 177 u32 pdb_addr_lo, pdb_addr_hi;
178 u64 pdb_addr; 178 u64 pdb_addr;
179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, 179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem,
180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
181 ram_in_sc_page_dir_base_target_vid_mem_v()); 181 ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
182 ram_in_sc_page_dir_base_target_vid_mem_v());
182 183
183 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); 184 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
184 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 185 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 2b8b7015..f1ab8a6e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -25,6 +25,7 @@
25 25
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27#include <nvgpu/list.h> 27#include <nvgpu/list.h>
28#include <nvgpu/enabled.h>
28 29
29#ifdef __KERNEL__ 30#ifdef __KERNEL__
30#include <nvgpu/linux/nvgpu_mem.h> 31#include <nvgpu/linux/nvgpu_mem.h>
@@ -51,6 +52,10 @@ struct nvgpu_page_alloc;
51enum nvgpu_aperture { 52enum nvgpu_aperture {
52 APERTURE_INVALID = 0, /* unallocated or N/A */ 53 APERTURE_INVALID = 0, /* unallocated or N/A */
53 APERTURE_SYSMEM, 54 APERTURE_SYSMEM,
55
56 /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */
57 __APERTURE_SYSMEM_COH,
58
54 APERTURE_VIDMEM 59 APERTURE_VIDMEM
55}; 60};
56 61
@@ -187,12 +192,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
187 clear_list_entry)); 192 clear_list_entry));
188}; 193};
189 194
190static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) 195static inline const char *nvgpu_aperture_str(struct gk20a *g,
196 enum nvgpu_aperture aperture)
191{ 197{
192 switch (aperture) { 198 switch (aperture) {
193 case APERTURE_INVALID: return "INVAL"; 199 case APERTURE_INVALID:
194 case APERTURE_SYSMEM: return "SYSMEM"; 200 return "INVAL";
195 case APERTURE_VIDMEM: return "VIDMEM"; 201 case APERTURE_SYSMEM:
202 return "SYSMEM";
203 case __APERTURE_SYSMEM_COH:
204 return "SYSCOH";
205 case APERTURE_VIDMEM:
206 return "VIDMEM";
196 }; 207 };
197 return "UNKNOWN"; 208 return "UNKNOWN";
198} 209}
@@ -322,9 +333,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
322u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); 333u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
323 334
324u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, 335u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
325 u32 sysmem_mask, u32 vidmem_mask); 336 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
326u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, 337u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
327 u32 sysmem_mask, u32 vidmem_mask); 338 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
328 339
329u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); 340u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys);
330 341