summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c24
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c3
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c9
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c46
-rw-r--r--drivers/gpu/nvgpu/gk20a/bus_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gm20b/bus_gm20b.c6
-rw-r--r--drivers/gpu/nvgpu/gm20b/fifo_gm20b.c12
-rw-r--r--drivers/gpu/nvgpu/gp106/sec2_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/fifo_gp10b.c6
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c28
-rw-r--r--drivers/gpu/nvgpu/gv11b/acr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c7
-rw-r--r--drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c11
-rw-r--r--drivers/gpu/nvgpu/gv11b/fifo_gv11b.c10
-rw-r--r--drivers/gpu/nvgpu/gv11b/mm_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/gv11b/pmu_gv11b.c8
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c5
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h23
24 files changed, 184 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e441ec76..c859520d 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -34,30 +34,6 @@
34#include "gk20a/gk20a.h" 34#include "gk20a/gk20a.h"
35#include "gk20a/mm_gk20a.h" 35#include "gk20a/mm_gk20a.h"
36 36
37u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
38 u32 sysmem_mask, u32 vidmem_mask)
39{
40 switch (aperture) {
41 case APERTURE_SYSMEM:
42 /* some igpus consider system memory vidmem */
43 return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
44 ? sysmem_mask : vidmem_mask;
45 case APERTURE_VIDMEM:
46 /* for dgpus only */
47 return vidmem_mask;
48 case APERTURE_INVALID:
49 WARN_ON("Bad aperture");
50 }
51 return 0;
52}
53
54u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
55 u32 sysmem_mask, u32 vidmem_mask)
56{
57 return __nvgpu_aperture_mask(g, mem->aperture,
58 sysmem_mask, vidmem_mask);
59}
60
61int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) 37int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
62{ 38{
63 void *cpu_va; 39 void *cpu_va;
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index e3ca4eda..52b2f30c 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, 166 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
167 vm_aspace_id(vm), 167 vm_aspace_id(vm),
168 mapped_buffer->flags, 168 mapped_buffer->flags,
169 nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); 169 nvgpu_aperture_str(g,
170 gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
170 171
171 return mapped_buffer; 172 return mapped_buffer;
172} 173}
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 5abf5951..41343718 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -634,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 page_size >> 10, 634 page_size >> 10,
635 nvgpu_gmmu_perm_str(attrs->rw_flag), 635 nvgpu_gmmu_perm_str(attrs->rw_flag),
636 attrs->kind_v, 636 attrs->kind_v,
637 nvgpu_aperture_str(attrs->aperture), 637 nvgpu_aperture_str(g, attrs->aperture),
638 attrs->cacheable ? 'C' : '-', 638 attrs->cacheable ? 'C' : '-',
639 attrs->sparse ? 'S' : '-', 639 attrs->sparse ? 'S' : '-',
640 attrs->priv ? 'P' : '-', 640 attrs->priv ? 'P' : '-',
@@ -712,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
712 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); 712 attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
713 713
714 /* 714 /*
715 * Handle the IO coherency aperture: make sure the .aperture field is
716 * correct based on the IO coherency flag.
717 */
718 if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
719 attrs.aperture = __APERTURE_SYSMEM_COH;
720
721 /*
715 * Only allocate a new GPU VA range if we haven't already been passed a 722 * Only allocate a new GPU VA range if we haven't already been passed a
716 * GPU VA range. This facilitates fixed mappings. 723 * GPU VA range. This facilitates fixed mappings.
717 */ 724 */
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 73b6b2a7..78a57b4e 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -28,6 +28,52 @@
28 28
29#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
30 30
31/*
32 * Make sure to use the right coherency aperture if you use this function! This
33 * will not add any checks. If you want to simply use the default coherency then
34 * use nvgpu_aperture_mask().
35 */
36u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
37 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
38{
39 /*
40 * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
41 * "sysmem" aperture should really be translated to VIDMEM.
42 */
43 if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
44 aperture = APERTURE_VIDMEM;
45
46 switch (aperture) {
47 case __APERTURE_SYSMEM_COH:
48 return sysmem_coh_mask;
49 case APERTURE_SYSMEM:
50 return sysmem_mask;
51 case APERTURE_VIDMEM:
52 return vidmem_mask;
53 case APERTURE_INVALID:
54 WARN_ON("Bad aperture");
55 }
56 return 0;
57}
58
59u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
60 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
61{
62 enum nvgpu_aperture ap = mem->aperture;
63
64 /*
65 * Handle the coherent aperture: ideally most of the driver is not
66 * aware of the difference between coherent and non-coherent sysmem so
67 * we add this translation step here.
68 */
69 if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
70 ap == APERTURE_SYSMEM)
71 ap = __APERTURE_SYSMEM_COH;
72
73 return __nvgpu_aperture_mask(g, ap,
74 sysmem_mask, sysmem_coh_mask, vidmem_mask);
75}
76
31void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) 77void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
32{ 78{
33 return sgt->ops->sgl_next(sgl); 79 return sgt->ops->sgl_next(sgl);
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 7f0cfe58..b2800772 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23#include <nvgpu/page_allocator.h> 23#include <nvgpu/page_allocator.h>
24#include <nvgpu/enabled.h>
24#include <nvgpu/log.h> 25#include <nvgpu/log.h>
25#include <nvgpu/soc.h> 26#include <nvgpu/soc.h>
26#include <nvgpu/bus.h> 27#include <nvgpu/bus.h>
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
155 156
156 gk20a_writel(g, bus_bar1_block_r(), 157 gk20a_writel(g, bus_bar1_block_r(),
157 nvgpu_aperture_mask(g, bar1_inst, 158 nvgpu_aperture_mask(g, bar1_inst,
158 bus_bar1_block_target_sys_mem_ncoh_f(), 159 bus_bar1_block_target_sys_mem_ncoh_f(),
159 bus_bar1_block_target_vid_mem_f()) | 160 bus_bar1_block_target_sys_mem_coh_f(),
161 bus_bar1_block_target_vid_mem_f()) |
160 bus_bar1_block_mode_virtual_f() | 162 bus_bar1_block_mode_virtual_f() |
161 bus_bar1_block_ptr_f(ptr_v)); 163 bus_bar1_block_ptr_f(ptr_v));
162 164
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index a5a2cb51..e3052701 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(), 98 gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
99 fb_mmu_invalidate_pdb_addr_f(addr_lo) | 99 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
100 nvgpu_aperture_mask(g, pdb, 100 nvgpu_aperture_mask(g, pdb,
101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(), 101 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
102 fb_mmu_invalidate_pdb_aperture_vid_mem_f())); 102 fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
103 fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
103 104
104 gk20a_writel(g, fb_mmu_invalidate_r(), 105 gk20a_writel(g, fb_mmu_invalidate_r(),
105 fb_mmu_invalidate_all_va_true_f() | 106 fb_mmu_invalidate_all_va_true_f() |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 409661fc..4fda0d2e 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
653 return -ENOMEM; 653 return -ENOMEM;
654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf, 654 aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), 655 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
656 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); 657 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
657 658
658 if (nvgpu_mem_begin(g, mem)) 659 if (nvgpu_mem_begin(g, mem))
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dd0b78c0..247557aa 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3240,8 +3240,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
3240 gk20a_writel(g, fifo_runlist_base_r(), 3240 gk20a_writel(g, fifo_runlist_base_r(),
3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | 3241 fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf], 3242 nvgpu_aperture_mask(g, &runlist->mem[new_buf],
3243 fifo_runlist_base_target_sys_mem_ncoh_f(), 3243 fifo_runlist_base_target_sys_mem_ncoh_f(),
3244 fifo_runlist_base_target_vid_mem_f())); 3244 fifo_runlist_base_target_sys_mem_coh_f(),
3245 fifo_runlist_base_target_vid_mem_f()));
3245 } 3246 }
3246 3247
3247 gk20a_writel(g, fifo_runlist_r(), 3248 gk20a_writel(g, fifo_runlist_r(),
@@ -3763,8 +3764,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
3763 nvgpu_mem_wr32(g, &c->inst_block, 3764 nvgpu_mem_wr32(g, &c->inst_block,
3764 ram_in_ramfc_w() + ram_fc_userd_w(), 3765 ram_in_ramfc_w() + ram_fc_userd_w(),
3765 nvgpu_aperture_mask(g, &g->fifo.userd, 3766 nvgpu_aperture_mask(g, &g->fifo.userd,
3766 pbdma_userd_target_sys_mem_ncoh_f(), 3767 pbdma_userd_target_sys_mem_ncoh_f(),
3767 pbdma_userd_target_vid_mem_f()) | 3768 pbdma_userd_target_sys_mem_coh_f(),
3769 pbdma_userd_target_vid_mem_f()) |
3768 pbdma_userd_addr_f(addr_lo)); 3770 pbdma_userd_addr_f(addr_lo));
3769 3771
3770 nvgpu_mem_wr32(g, &c->inst_block, 3772 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 6ae743ef..2cde10ec 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> 745 u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
746 ram_in_base_shift_v(); 746 ram_in_base_shift_v();
747 u32 aperture = nvgpu_aperture_mask(g, inst_block, 747 u32 aperture = nvgpu_aperture_mask(g, inst_block,
748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(), 748 gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
749 gr_fecs_current_ctx_target_vid_mem_f()); 749 gr_fecs_current_ctx_target_sys_mem_coh_f(),
750 gr_fecs_current_ctx_target_vid_mem_f());
750 751
751 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | 752 return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
752 gr_fecs_current_ctx_valid_f(1); 753 gr_fecs_current_ctx_valid_f(1);
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
2171 2172
2172 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); 2173 inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
2173 gk20a_writel(g, gr_fecs_new_ctx_r(), 2174 gk20a_writel(g, gr_fecs_new_ctx_r(),
2174 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | 2175 gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
2175 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2176 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2176 gr_fecs_new_ctx_target_sys_mem_ncoh_f(), 2177 gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
2178 gr_fecs_new_ctx_target_sys_mem_coh_f(),
2177 gr_fecs_new_ctx_target_vid_mem_f()) | 2179 gr_fecs_new_ctx_target_vid_mem_f()) |
2178 gr_fecs_new_ctx_valid_m()); 2180 gr_fecs_new_ctx_valid_m());
2179 2181
2180 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), 2182 gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
2181 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | 2183 gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
2182 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, 2184 nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
2183 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), 2185 gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
2186 gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
2184 gr_fecs_arb_ctx_ptr_target_vid_mem_f())); 2187 gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
2185 2188
2186 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); 2189 gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4379 4382
4380 gk20a_writel(g, fb_mmu_debug_wr_r(), 4383 gk20a_writel(g, fb_mmu_debug_wr_r(),
4381 nvgpu_aperture_mask(g, &gr->mmu_wr_mem, 4384 nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
4382 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4385 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4383 fb_mmu_debug_wr_aperture_vid_mem_f()) | 4386 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4387 fb_mmu_debug_wr_aperture_vid_mem_f()) |
4384 fb_mmu_debug_wr_vol_false_f() | 4388 fb_mmu_debug_wr_vol_false_f() |
4385 fb_mmu_debug_wr_addr_f(addr)); 4389 fb_mmu_debug_wr_addr_f(addr));
4386 4390
@@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4389 4393
4390 gk20a_writel(g, fb_mmu_debug_rd_r(), 4394 gk20a_writel(g, fb_mmu_debug_rd_r(),
4391 nvgpu_aperture_mask(g, &gr->mmu_rd_mem, 4395 nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
4392 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), 4396 fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
4393 fb_mmu_debug_rd_aperture_vid_mem_f()) | 4397 fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
4398 fb_mmu_debug_rd_aperture_vid_mem_f()) |
4394 fb_mmu_debug_rd_vol_false_f() | 4399 fb_mmu_debug_rd_vol_false_f() |
4395 fb_mmu_debug_rd_addr_f(addr)); 4400 fb_mmu_debug_rd_addr_f(addr));
4396 4401
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b27d1109..4ff6125b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
122{ 122{
123 u32 pde0_bits = 123 u32 pde0_bits =
124 nvgpu_aperture_mask(g, pd->mem, 124 nvgpu_aperture_mask(g, pd->mem,
125 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 125 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
126 gmmu_pde_aperture_big_video_memory_f()) | 126 gmmu_pde_aperture_big_sys_mem_coh_f(),
127 gmmu_pde_aperture_big_video_memory_f()) |
127 gmmu_pde_address_big_sys_f( 128 gmmu_pde_address_big_sys_f(
128 (u32)(addr >> gmmu_pde_address_shift_v())); 129 (u32)(addr >> gmmu_pde_address_shift_v()));
129 130
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
135{ 136{
136 u32 pde1_bits = 137 u32 pde1_bits =
137 nvgpu_aperture_mask(g, pd->mem, 138 nvgpu_aperture_mask(g, pd->mem,
138 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 139 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
139 gmmu_pde_aperture_small_video_memory_f()) | 140 gmmu_pde_aperture_small_sys_mem_coh_f(),
141 gmmu_pde_aperture_small_video_memory_f()) |
140 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 142 gmmu_pde_vol_small_true_f() | /* tbd: why? */
141 gmmu_pde_address_small_sys_f( 143 gmmu_pde_address_small_sys_f(
142 (u32)(addr >> gmmu_pde_address_shift_v())); 144 (u32)(addr >> gmmu_pde_address_shift_v()));
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm,
215 217
216 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, 218 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
217 gmmu_pte_aperture_sys_mem_ncoh_f(), 219 gmmu_pte_aperture_sys_mem_ncoh_f(),
220 gmmu_pte_aperture_sys_mem_coh_f(),
218 gmmu_pte_aperture_video_memory_f()) | 221 gmmu_pte_aperture_video_memory_f()) |
219 gmmu_pte_kind_f(attrs->kind_v) | 222 gmmu_pte_kind_f(attrs->kind_v) |
220 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); 223 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
268 page_size >> 10, 271 page_size >> 10,
269 nvgpu_gmmu_perm_str(attrs->rw_flag), 272 nvgpu_gmmu_perm_str(attrs->rw_flag),
270 attrs->kind_v, 273 attrs->kind_v,
271 nvgpu_aperture_str(attrs->aperture), 274 nvgpu_aperture_str(g, attrs->aperture),
272 attrs->cacheable ? 'C' : '-', 275 attrs->cacheable ? 'C' : '-',
273 attrs->sparse ? 'S' : '-', 276 attrs->sparse ? 'S' : '-',
274 attrs->priv ? 'P' : '-', 277 attrs->priv ? 'P' : '-',
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
363 gk20a_dbg_info("pde pa=0x%llx", pdb_addr); 366 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
364 367
365 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 368 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
366 nvgpu_aperture_mask(g, vm->pdb.mem, 369 nvgpu_aperture_mask(g, vm->pdb.mem,
367 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 370 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
368 ram_in_page_dir_base_target_vid_mem_f()) | 371 ram_in_page_dir_base_target_sys_mem_coh_f(),
369 ram_in_page_dir_base_vol_true_f() | 372 ram_in_page_dir_base_target_vid_mem_f()) |
370 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 373 ram_in_page_dir_base_vol_true_f() |
374 ram_in_page_dir_base_lo_f(pdb_addr_lo));
371 375
372 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), 376 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
373 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 377 ram_in_page_dir_base_hi_f(pdb_addr_hi));
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 05d0473e..711aeb0d 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 u32 lo = (u32)(addr & 0xfffff); 41 u32 lo = (u32)(addr & 0xfffff);
42 u32 win = nvgpu_aperture_mask(g, mem, 42 u32 win = nvgpu_aperture_mask(g, mem,
43 bus_bar0_window_target_sys_mem_noncoherent_f(), 43 bus_bar0_window_target_sys_mem_noncoherent_f(),
44 bus_bar0_window_target_sys_mem_coherent_f(),
44 bus_bar0_window_target_vid_mem_f()) | 45 bus_bar0_window_target_vid_mem_f()) |
45 bus_bar0_window_base_f(hi); 46 bus_bar0_window_base_f(hi);
46 47
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index 34c8d4b7..cdd70d5b 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -25,6 +25,7 @@
25#include <nvgpu/timers.h> 25#include <nvgpu/timers.h>
26#include <nvgpu/bus.h> 26#include <nvgpu/bus.h>
27#include <nvgpu/mm.h> 27#include <nvgpu/mm.h>
28#include <nvgpu/enabled.h>
28 29
29#include "bus_gm20b.h" 30#include "bus_gm20b.h"
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
@@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
43 44
44 gk20a_writel(g, bus_bar1_block_r(), 45 gk20a_writel(g, bus_bar1_block_r(),
45 nvgpu_aperture_mask(g, bar1_inst, 46 nvgpu_aperture_mask(g, bar1_inst,
46 bus_bar1_block_target_sys_mem_ncoh_f(), 47 bus_bar1_block_target_sys_mem_ncoh_f(),
47 bus_bar1_block_target_vid_mem_f()) | 48 bus_bar1_block_target_sys_mem_coh_f(),
49 bus_bar1_block_target_vid_mem_f()) |
48 bus_bar1_block_mode_virtual_f() | 50 bus_bar1_block_mode_virtual_f() |
49 bus_bar1_block_ptr_f(ptr_v)); 51 bus_bar1_block_ptr_f(ptr_v));
50 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); 52 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 0762e8bd..15612995 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -32,6 +32,7 @@
32#include <nvgpu/atomic.h> 32#include <nvgpu/atomic.h>
33#include <nvgpu/barrier.h> 33#include <nvgpu/barrier.h>
34#include <nvgpu/mm.h> 34#include <nvgpu/mm.h>
35#include <nvgpu/enabled.h>
35 36
36#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> 37#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
37#include <nvgpu/hw/gm20b/hw_ram_gm20b.h> 38#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c)
51 52
52 53
53 gk20a_writel(g, ccsr_channel_inst_r(c->chid), 54 gk20a_writel(g, ccsr_channel_inst_r(c->chid),
54 ccsr_channel_inst_ptr_f(inst_ptr) | 55 ccsr_channel_inst_ptr_f(inst_ptr) |
55 nvgpu_aperture_mask(g, &c->inst_block, 56 nvgpu_aperture_mask(g, &c->inst_block,
56 ccsr_channel_inst_target_sys_mem_ncoh_f(), 57 ccsr_channel_inst_target_sys_mem_ncoh_f(),
57 ccsr_channel_inst_target_vid_mem_f()) | 58 ccsr_channel_inst_target_sys_mem_coh_f(),
58 ccsr_channel_inst_bind_true_f()); 59 ccsr_channel_inst_target_vid_mem_f()) |
60 ccsr_channel_inst_bind_true_f());
59 61
60 gk20a_writel(g, ccsr_channel_r(c->chid), 62 gk20a_writel(g, ccsr_channel_r(c->chid),
61 (gk20a_readl(g, ccsr_channel_r(c->chid)) & 63 (gk20a_readl(g, ccsr_channel_r(c->chid)) &
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 29aceb7c..8e4e5900 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
99 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 99 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
100 pwr_pmu_new_instblk_valid_f(1) | 100 pwr_pmu_new_instblk_valid_f(1) |
101 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 101 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
102 pwr_pmu_new_instblk_target_sys_ncoh_f(),
102 pwr_pmu_new_instblk_target_sys_coh_f(), 103 pwr_pmu_new_instblk_target_sys_coh_f(),
103 pwr_pmu_new_instblk_target_fb_f())); 104 pwr_pmu_new_instblk_target_fb_f()));
104 105
@@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
165 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 166 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
166 pwr_pmu_new_instblk_valid_f(1) | 167 pwr_pmu_new_instblk_valid_f(1) |
167 nvgpu_aperture_mask(g, &mm->pmu.inst_block, 168 nvgpu_aperture_mask(g, &mm->pmu.inst_block,
169 pwr_pmu_new_instblk_target_sys_ncoh_f(),
168 pwr_pmu_new_instblk_target_sys_coh_f(), 170 pwr_pmu_new_instblk_target_sys_coh_f(),
169 pwr_pmu_new_instblk_target_fb_f())); 171 pwr_pmu_new_instblk_target_fb_f()));
170 172
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index c82fb1cc..1436a260 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -25,6 +25,7 @@
25#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
26#include <nvgpu/bug.h> 26#include <nvgpu/bug.h>
27#include <nvgpu/log2.h> 27#include <nvgpu/log2.h>
28#include <nvgpu/enabled.h>
28 29
29#include "fifo_gp10b.h" 30#include "fifo_gp10b.h"
30 31
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
78 nvgpu_mem_wr32(g, &c->inst_block, 79 nvgpu_mem_wr32(g, &c->inst_block,
79 ram_in_ramfc_w() + ram_fc_userd_w(), 80 ram_in_ramfc_w() + ram_fc_userd_w(),
80 nvgpu_aperture_mask(g, &g->fifo.userd, 81 nvgpu_aperture_mask(g, &g->fifo.userd,
81 pbdma_userd_target_sys_mem_ncoh_f(), 82 pbdma_userd_target_sys_mem_ncoh_f(),
82 pbdma_userd_target_vid_mem_f()) | 83 pbdma_userd_target_sys_mem_coh_f(),
84 pbdma_userd_target_vid_mem_f()) |
83 pbdma_userd_addr_f(addr_lo)); 85 pbdma_userd_addr_f(addr_lo));
84 86
85 nvgpu_mem_wr32(g, &c->inst_block, 87 nvgpu_mem_wr32(g, &c->inst_block,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0439dda9..7ff5f6a6 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g)
124 124
125 gk20a_writel(g, bus_bar2_block_r(), 125 gk20a_writel(g, bus_bar2_block_r(),
126 nvgpu_aperture_mask(g, inst_block, 126 nvgpu_aperture_mask(g, inst_block,
127 bus_bar2_block_target_sys_mem_ncoh_f(), 127 bus_bar2_block_target_sys_mem_ncoh_f(),
128 bus_bar2_block_target_vid_mem_f()) | 128 bus_bar2_block_target_sys_mem_coh_f(),
129 bus_bar2_block_target_vid_mem_f()) |
129 bus_bar2_block_mode_virtual_f() | 130 bus_bar2_block_mode_virtual_f() |
130 bus_bar2_block_ptr_f(inst_pa)); 131 bus_bar2_block_ptr_f(inst_pa));
131 132
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
148 phys_addr >>= gmmu_new_pde_address_shift_v(); 149 phys_addr >>= gmmu_new_pde_address_shift_v();
149 150
150 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 151 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
151 gmmu_new_pde_aperture_sys_mem_ncoh_f(), 152 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
152 gmmu_new_pde_aperture_video_memory_f()); 153 gmmu_new_pde_aperture_sys_mem_coh_f(),
154 gmmu_new_pde_aperture_video_memory_f());
153 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); 155 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
154 pde_v[0] |= gmmu_new_pde_vol_true_f(); 156 pde_v[0] |= gmmu_new_pde_vol_true_f();
155 pde_v[1] |= phys_addr >> 24; 157 pde_v[1] |= phys_addr >> 24;
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
194 gmmu_new_dual_pde_address_small_sys_f(small_addr); 196 gmmu_new_dual_pde_address_small_sys_f(small_addr);
195 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, 197 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
196 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), 198 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
199 gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(),
197 gmmu_new_dual_pde_aperture_small_video_memory_f()); 200 gmmu_new_dual_pde_aperture_small_video_memory_f());
198 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); 201 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
199 pde_v[3] |= small_addr >> 24; 202 pde_v[3] |= small_addr >> 24;
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
204 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); 207 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
205 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, 208 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
206 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), 209 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
210 gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(),
207 gmmu_new_dual_pde_aperture_big_video_memory_f()); 211 gmmu_new_dual_pde_aperture_big_video_memory_f());
208 pde_v[1] |= big_addr >> 28; 212 pde_v[1] |= big_addr >> 28;
209 } 213 }
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm,
240 gmmu_new_pte_address_sys_f(phys_shifted) : 244 gmmu_new_pte_address_sys_f(phys_shifted) :
241 gmmu_new_pte_address_vid_f(phys_shifted); 245 gmmu_new_pte_address_vid_f(phys_shifted);
242 u32 pte_tgt = __nvgpu_aperture_mask(g, 246 u32 pte_tgt = __nvgpu_aperture_mask(g,
243 attrs->aperture, 247 attrs->aperture,
244 attrs->coherent ? 248 gmmu_new_pte_aperture_sys_mem_ncoh_f(),
245 gmmu_new_pte_aperture_sys_mem_coh_f() : 249 gmmu_new_pte_aperture_sys_mem_coh_f(),
246 gmmu_new_pte_aperture_sys_mem_ncoh_f(), 250 gmmu_new_pte_aperture_video_memory_f());
247 gmmu_new_pte_aperture_video_memory_f());
248 251
249 pte_w[0] = pte_valid | pte_addr | pte_tgt; 252 pte_w[0] = pte_valid | pte_addr | pte_tgt;
250 253
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
306 page_size >> 10, 309 page_size >> 10,
307 nvgpu_gmmu_perm_str(attrs->rw_flag), 310 nvgpu_gmmu_perm_str(attrs->rw_flag),
308 attrs->kind_v, 311 attrs->kind_v,
309 nvgpu_aperture_str(attrs->aperture), 312 nvgpu_aperture_str(g, attrs->aperture),
310 attrs->cacheable ? 'C' : '-', 313 attrs->cacheable ? 'C' : '-',
311 attrs->sparse ? 'S' : '-', 314 attrs->sparse ? 'S' : '-',
312 attrs->priv ? 'P' : '-', 315 attrs->priv ? 'P' : '-',
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
428 431
429 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), 432 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
430 nvgpu_aperture_mask(g, vm->pdb.mem, 433 nvgpu_aperture_mask(g, vm->pdb.mem,
431 ram_in_page_dir_base_target_sys_mem_ncoh_f(), 434 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
432 ram_in_page_dir_base_target_vid_mem_f()) | 435 ram_in_page_dir_base_target_sys_mem_coh_f(),
436 ram_in_page_dir_base_target_vid_mem_f()) |
433 ram_in_page_dir_base_vol_true_f() | 437 ram_in_page_dir_base_vol_true_f() |
434 ram_in_big_page_size_64kb_f() | 438 ram_in_big_page_size_64kb_f() |
435 ram_in_page_dir_base_lo_f(pdb_addr_lo) | 439 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
index 799b2db4..4fa3f324 100644
--- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c
@@ -27,9 +27,10 @@
27#include <nvgpu/nvgpu_common.h> 27#include <nvgpu/nvgpu_common.h>
28#include <nvgpu/kmem.h> 28#include <nvgpu/kmem.h>
29#include <nvgpu/nvgpu_mem.h> 29#include <nvgpu/nvgpu_mem.h>
30#include <nvgpu/acr/nvgpu_acr.h>
31#include <nvgpu/firmware.h> 30#include <nvgpu/firmware.h>
32#include <nvgpu/mm.h> 31#include <nvgpu/mm.h>
32#include <nvgpu/enabled.h>
33#include <nvgpu/acr/nvgpu_acr.h>
33 34
34#include "gk20a/gk20a.h" 35#include "gk20a/gk20a.h"
35#include "acr_gv11b.h" 36#include "acr_gv11b.h"
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
220 pwr_pmu_new_instblk_ptr_f( 221 pwr_pmu_new_instblk_ptr_f(
221 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | 222 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
222 pwr_pmu_new_instblk_valid_f(1) | 223 pwr_pmu_new_instblk_valid_f(1) |
223 pwr_pmu_new_instblk_target_sys_ncoh_f()); 224 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
225 pwr_pmu_new_instblk_target_sys_coh_f() :
226 pwr_pmu_new_instblk_target_sys_ncoh_f())) ;
224 227
225 /*copy bootloader interface structure to dmem*/ 228 /*copy bootloader interface structure to dmem*/
226 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, 229 nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
index 617ea61d..86977bb3 100644
--- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c
@@ -31,14 +31,14 @@
31#include <nvgpu/dma.h> 31#include <nvgpu/dma.h>
32#include <nvgpu/mm.h> 32#include <nvgpu/mm.h>
33#include <nvgpu/sizes.h> 33#include <nvgpu/sizes.h>
34#include <nvgpu/enabled.h>
35#include <nvgpu/log.h>
36#include <nvgpu/bug.h>
34 37
35#include "gk20a/gk20a.h" 38#include "gk20a/gk20a.h"
36#include "gk20a/css_gr_gk20a.h" 39#include "gk20a/css_gr_gk20a.h"
37#include "css_gr_gv11b.h" 40#include "css_gr_gv11b.h"
38 41
39#include <nvgpu/log.h>
40#include <nvgpu/bug.h>
41
42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> 42#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h> 43#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
44 44
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
144 perf_pmasys_mem_block_valid_true_f() | 144 perf_pmasys_mem_block_valid_true_f() |
145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, 145 nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
146 perf_pmasys_mem_block_target_sys_ncoh_f(), 146 perf_pmasys_mem_block_target_sys_ncoh_f(),
147 perf_pmasys_mem_block_target_sys_coh_f(),
147 perf_pmasys_mem_block_target_lfb_f())); 148 perf_pmasys_mem_block_target_lfb_f()));
148 149
149 150
diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
index e5d88e8c..562476ca 100644
--- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c
@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; 59 inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
60 60
61 gk20a_writel(g, perf_pmasys_mem_block_r(), 61 gk20a_writel(g, perf_pmasys_mem_block_r(),
62 perf_pmasys_mem_block_base_f(inst_pa_page) | 62 perf_pmasys_mem_block_base_f(inst_pa_page) |
63 perf_pmasys_mem_block_valid_true_f() | 63 perf_pmasys_mem_block_valid_true_f() |
64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, 64 nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
65+ perf_pmasys_mem_block_target_sys_ncoh_f(), 65 perf_pmasys_mem_block_target_sys_ncoh_f(),
66+ perf_pmasys_mem_block_target_lfb_f())); 66 perf_pmasys_mem_block_target_sys_coh_f(),
67 perf_pmasys_mem_block_target_lfb_f()));
67 68
68 gk20a_idle(g); 69 gk20a_idle(g);
69 return 0; 70 return 0;
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 97ab7aab..6b4b07a6 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist)
101 c->runqueue_sel) | 101 c->runqueue_sel) |
102 ram_rl_entry_chan_userd_target_f( 102 ram_rl_entry_chan_userd_target_f(
103 nvgpu_aperture_mask(g, &g->fifo.userd, 103 nvgpu_aperture_mask(g, &g->fifo.userd,
104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), 104 ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
105 ram_rl_entry_chan_userd_target_vid_mem_v())) | 105 ram_rl_entry_chan_userd_target_sys_mem_coh_v(),
106 ram_rl_entry_chan_userd_target_vid_mem_v())) |
106 ram_rl_entry_chan_inst_target_f( 107 ram_rl_entry_chan_inst_target_f(
107 nvgpu_aperture_mask(g, &c->inst_block, 108 nvgpu_aperture_mask(g, &c->inst_block,
108 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), 109 ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
109 ram_rl_entry_chan_inst_target_vid_mem_v())); 110 ram_rl_entry_chan_inst_target_sys_mem_coh_v(),
111 ram_rl_entry_chan_inst_target_vid_mem_v()));
110 112
111 addr_lo = u64_lo32(c->userd_iova) >> 113 addr_lo = u64_lo32(c->userd_iova) >>
112 ram_rl_entry_chan_userd_ptr_align_shift_v(); 114 ram_rl_entry_chan_userd_ptr_align_shift_v();
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index ade1d9fe..b46ecb0a 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -26,6 +26,7 @@
26#include <nvgpu/dma.h> 26#include <nvgpu/dma.h>
27#include <nvgpu/log.h> 27#include <nvgpu/log.h>
28#include <nvgpu/mm.h> 28#include <nvgpu/mm.h>
29#include <nvgpu/enabled.h>
29 30
30#include "gk20a/gk20a.h" 31#include "gk20a/gk20a.h"
31#include "gk20a/mm_gk20a.h" 32#include "gk20a/mm_gk20a.h"
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g)
292 293
293 gk20a_writel(g, bus_bar2_block_r(), 294 gk20a_writel(g, bus_bar2_block_r(),
294 nvgpu_aperture_mask(g, inst_block, 295 nvgpu_aperture_mask(g, inst_block,
295 bus_bar2_block_target_sys_mem_ncoh_f(), 296 bus_bar2_block_target_sys_mem_ncoh_f(),
296 bus_bar2_block_target_vid_mem_f()) | 297 bus_bar2_block_target_sys_mem_coh_f(),
298 bus_bar2_block_target_vid_mem_f()) |
297 bus_bar2_block_mode_virtual_f() | 299 bus_bar2_block_mode_virtual_f() |
298 bus_bar2_block_ptr_f(inst_pa)); 300 bus_bar2_block_ptr_f(inst_pa));
299 301
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
index 7dd4f8f4..13e70eca 100644
--- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c
@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
195 195
196 gk20a_writel(g, pwr_pmu_new_instblk_r(), 196 gk20a_writel(g, pwr_pmu_new_instblk_r(),
197 pwr_pmu_new_instblk_ptr_f( 197 pwr_pmu_new_instblk_ptr_f(
198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) 198 nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) |
199 | pwr_pmu_new_instblk_valid_f(1) 199 pwr_pmu_new_instblk_valid_f(1) |
200 | pwr_pmu_new_instblk_target_sys_ncoh_f()); 200 (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
201 pwr_pmu_new_instblk_target_sys_coh_f() :
202 pwr_pmu_new_instblk_target_sys_ncoh_f()));
201 203
202 /* TBD: load all other surfaces */ 204 /* TBD: load all other surfaces */
203 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( 205 g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index 05d7dee0..bda4c8e4 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm,
177 u32 pdb_addr_lo, pdb_addr_hi; 177 u32 pdb_addr_lo, pdb_addr_hi;
178 u64 pdb_addr; 178 u64 pdb_addr;
179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, 179 u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem,
180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), 180 ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
181 ram_in_sc_page_dir_base_target_vid_mem_v()); 181 ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
182 ram_in_sc_page_dir_base_target_vid_mem_v());
182 183
183 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); 184 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
184 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 185 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index 2b8b7015..f1ab8a6e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -25,6 +25,7 @@
25 25
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27#include <nvgpu/list.h> 27#include <nvgpu/list.h>
28#include <nvgpu/enabled.h>
28 29
29#ifdef __KERNEL__ 30#ifdef __KERNEL__
30#include <nvgpu/linux/nvgpu_mem.h> 31#include <nvgpu/linux/nvgpu_mem.h>
@@ -51,6 +52,10 @@ struct nvgpu_page_alloc;
51enum nvgpu_aperture { 52enum nvgpu_aperture {
52 APERTURE_INVALID = 0, /* unallocated or N/A */ 53 APERTURE_INVALID = 0, /* unallocated or N/A */
53 APERTURE_SYSMEM, 54 APERTURE_SYSMEM,
55
56 /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */
57 __APERTURE_SYSMEM_COH,
58
54 APERTURE_VIDMEM 59 APERTURE_VIDMEM
55}; 60};
56 61
@@ -187,12 +192,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
187 clear_list_entry)); 192 clear_list_entry));
188}; 193};
189 194
190static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) 195static inline const char *nvgpu_aperture_str(struct gk20a *g,
196 enum nvgpu_aperture aperture)
191{ 197{
192 switch (aperture) { 198 switch (aperture) {
193 case APERTURE_INVALID: return "INVAL"; 199 case APERTURE_INVALID:
194 case APERTURE_SYSMEM: return "SYSMEM"; 200 return "INVAL";
195 case APERTURE_VIDMEM: return "VIDMEM"; 201 case APERTURE_SYSMEM:
202 return "SYSMEM";
203 case __APERTURE_SYSMEM_COH:
204 return "SYSCOH";
205 case APERTURE_VIDMEM:
206 return "VIDMEM";
196 }; 207 };
197 return "UNKNOWN"; 208 return "UNKNOWN";
198} 209}
@@ -322,9 +333,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
322u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); 333u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
323 334
324u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, 335u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
325 u32 sysmem_mask, u32 vidmem_mask); 336 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
326u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, 337u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
327 u32 sysmem_mask, u32 vidmem_mask); 338 u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
328 339
329u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); 340u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys);
330 341