diff options
author | Alex Waterman <alexw@nvidia.com> | 2018-02-09 14:57:54 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-02-27 19:03:43 -0500 |
commit | 1170687c33f7506f39aaf47acee5430233e3d1a8 (patch) | |
tree | b41146a14a78eba99b9f326ef63efbe8ba77caab /drivers | |
parent | 71f53272b28b1086b3f34e5e255815c37504ac2c (diff) |
gpu: nvgpu: Use coherent aperture flag
When using a coherent DMA API wee must make sure to program
any aperture fields with the coherent aperture setting. To
do this the nvgpu_aperture_mask() function was modified to
take a third aperture mask argument, a coherent setting, so
that code can use this function to generate coherent aperture
settings.
The aperture choice is some what tricky: the default version
of this function uses the state of the DMA API to determine
what aperture to use for SYSMEM: either coherent or
non-coherent internally. Thus a kernel user need only specify
the normal nvgpu_mem struct and the correct mask should be
chosen. Due to many uses of nvgpu_mem structs not created
directly from the DMA API wrapper it's easier to translate
SYSMEM to SYSMEM_COH after creation.
However, the GMMU mapping code, will encounter buffers from
userspace with difference coerency attributes than the DMA
API. Thus the __nvgpu_aperture_mask() really respects the
aperture setting passed in regardless of the DMA API state.
This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT
since this is either passed in from userspace or set by the
kernel when using coherent DMA. The aperture field in attrs
is upgraded to coh if this flag is set.
This change also adds a coherent sysmem mask everywhere that
it can. There's a couple places that do not have a coherent
register field defined yet. These need to eventually be
defined and added.
Lastly the aperture mask code has been mvoed from the Linux
vm.c code to the general vm.c code since this function has
no Linux dependencies.
Note: depends on https://git-master.nvidia.com/r/1664536 for
new register fields.
JIRA EVLR-2333
Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1655220
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
24 files changed, 184 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index e441ec76..c859520d 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -34,30 +34,6 @@ | |||
34 | #include "gk20a/gk20a.h" | 34 | #include "gk20a/gk20a.h" |
35 | #include "gk20a/mm_gk20a.h" | 35 | #include "gk20a/mm_gk20a.h" |
36 | 36 | ||
37 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
38 | u32 sysmem_mask, u32 vidmem_mask) | ||
39 | { | ||
40 | switch (aperture) { | ||
41 | case APERTURE_SYSMEM: | ||
42 | /* some igpus consider system memory vidmem */ | ||
43 | return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) | ||
44 | ? sysmem_mask : vidmem_mask; | ||
45 | case APERTURE_VIDMEM: | ||
46 | /* for dgpus only */ | ||
47 | return vidmem_mask; | ||
48 | case APERTURE_INVALID: | ||
49 | WARN_ON("Bad aperture"); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
55 | u32 sysmem_mask, u32 vidmem_mask) | ||
56 | { | ||
57 | return __nvgpu_aperture_mask(g, mem->aperture, | ||
58 | sysmem_mask, vidmem_mask); | ||
59 | } | ||
60 | |||
61 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) | 37 | int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) |
62 | { | 38 | { |
63 | void *cpu_va; | 39 | void *cpu_va; |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index e3ca4eda..52b2f30c 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, | |||
166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, | 166 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, |
167 | vm_aspace_id(vm), | 167 | vm_aspace_id(vm), |
168 | mapped_buffer->flags, | 168 | mapped_buffer->flags, |
169 | nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | 169 | nvgpu_aperture_str(g, |
170 | gk20a_dmabuf_aperture(g, os_buf->dmabuf))); | ||
170 | 171 | ||
171 | return mapped_buffer; | 172 | return mapped_buffer; |
172 | } | 173 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 5abf5951..41343718 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -634,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
634 | page_size >> 10, | 634 | page_size >> 10, |
635 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 635 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
636 | attrs->kind_v, | 636 | attrs->kind_v, |
637 | nvgpu_aperture_str(attrs->aperture), | 637 | nvgpu_aperture_str(g, attrs->aperture), |
638 | attrs->cacheable ? 'C' : '-', | 638 | attrs->cacheable ? 'C' : '-', |
639 | attrs->sparse ? 'S' : '-', | 639 | attrs->sparse ? 'S' : '-', |
640 | attrs->priv ? 'P' : '-', | 640 | attrs->priv ? 'P' : '-', |
@@ -712,6 +712,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
712 | attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); | 712 | attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); |
713 | 713 | ||
714 | /* | 714 | /* |
715 | * Handle the IO coherency aperture: make sure the .aperture field is | ||
716 | * correct based on the IO coherency flag. | ||
717 | */ | ||
718 | if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM) | ||
719 | attrs.aperture = __APERTURE_SYSMEM_COH; | ||
720 | |||
721 | /* | ||
715 | * Only allocate a new GPU VA range if we haven't already been passed a | 722 | * Only allocate a new GPU VA range if we haven't already been passed a |
716 | * GPU VA range. This facilitates fixed mappings. | 723 | * GPU VA range. This facilitates fixed mappings. |
717 | */ | 724 | */ |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 73b6b2a7..78a57b4e 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -28,6 +28,52 @@ | |||
28 | 28 | ||
29 | #include "gk20a/gk20a.h" | 29 | #include "gk20a/gk20a.h" |
30 | 30 | ||
31 | /* | ||
32 | * Make sure to use the right coherency aperture if you use this function! This | ||
33 | * will not add any checks. If you want to simply use the default coherency then | ||
34 | * use nvgpu_aperture_mask(). | ||
35 | */ | ||
36 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | ||
37 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) | ||
38 | { | ||
39 | /* | ||
40 | * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the | ||
41 | * "sysmem" aperture should really be translated to VIDMEM. | ||
42 | */ | ||
43 | if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)) | ||
44 | aperture = APERTURE_VIDMEM; | ||
45 | |||
46 | switch (aperture) { | ||
47 | case __APERTURE_SYSMEM_COH: | ||
48 | return sysmem_coh_mask; | ||
49 | case APERTURE_SYSMEM: | ||
50 | return sysmem_mask; | ||
51 | case APERTURE_VIDMEM: | ||
52 | return vidmem_mask; | ||
53 | case APERTURE_INVALID: | ||
54 | WARN_ON("Bad aperture"); | ||
55 | } | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | ||
60 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) | ||
61 | { | ||
62 | enum nvgpu_aperture ap = mem->aperture; | ||
63 | |||
64 | /* | ||
65 | * Handle the coherent aperture: ideally most of the driver is not | ||
66 | * aware of the difference between coherent and non-coherent sysmem so | ||
67 | * we add this translation step here. | ||
68 | */ | ||
69 | if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) && | ||
70 | ap == APERTURE_SYSMEM) | ||
71 | ap = __APERTURE_SYSMEM_COH; | ||
72 | |||
73 | return __nvgpu_aperture_mask(g, ap, | ||
74 | sysmem_mask, sysmem_coh_mask, vidmem_mask); | ||
75 | } | ||
76 | |||
31 | void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) | 77 | void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) |
32 | { | 78 | { |
33 | return sgt->ops->sgl_next(sgl); | 79 | return sgt->ops->sgl_next(sgl); |
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index 7f0cfe58..b2800772 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <nvgpu/page_allocator.h> | 23 | #include <nvgpu/page_allocator.h> |
24 | #include <nvgpu/enabled.h> | ||
24 | #include <nvgpu/log.h> | 25 | #include <nvgpu/log.h> |
25 | #include <nvgpu/soc.h> | 26 | #include <nvgpu/soc.h> |
26 | #include <nvgpu/bus.h> | 27 | #include <nvgpu/bus.h> |
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | |||
155 | 156 | ||
156 | gk20a_writel(g, bus_bar1_block_r(), | 157 | gk20a_writel(g, bus_bar1_block_r(), |
157 | nvgpu_aperture_mask(g, bar1_inst, | 158 | nvgpu_aperture_mask(g, bar1_inst, |
158 | bus_bar1_block_target_sys_mem_ncoh_f(), | 159 | bus_bar1_block_target_sys_mem_ncoh_f(), |
159 | bus_bar1_block_target_vid_mem_f()) | | 160 | bus_bar1_block_target_sys_mem_coh_f(), |
161 | bus_bar1_block_target_vid_mem_f()) | | ||
160 | bus_bar1_block_mode_virtual_f() | | 162 | bus_bar1_block_mode_virtual_f() | |
161 | bus_bar1_block_ptr_f(ptr_v)); | 163 | bus_bar1_block_ptr_f(ptr_v)); |
162 | 164 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index a5a2cb51..e3052701 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c | |||
@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) | |||
98 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), | 98 | gk20a_writel(g, fb_mmu_invalidate_pdb_r(), |
99 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | | 99 | fb_mmu_invalidate_pdb_addr_f(addr_lo) | |
100 | nvgpu_aperture_mask(g, pdb, | 100 | nvgpu_aperture_mask(g, pdb, |
101 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), | 101 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), |
102 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); | 102 | fb_mmu_invalidate_pdb_aperture_sys_mem_f(), |
103 | fb_mmu_invalidate_pdb_aperture_vid_mem_f())); | ||
103 | 104 | ||
104 | gk20a_writel(g, fb_mmu_invalidate_r(), | 105 | gk20a_writel(g, fb_mmu_invalidate_r(), |
105 | fb_mmu_invalidate_all_va_true_f() | | 106 | fb_mmu_invalidate_all_va_true_f() | |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 409661fc..4fda0d2e 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
653 | return -ENOMEM; | 653 | return -ENOMEM; |
654 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, | 654 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, |
655 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), | 655 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), |
656 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), | ||
656 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); | 657 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); |
657 | 658 | ||
658 | if (nvgpu_mem_begin(g, mem)) | 659 | if (nvgpu_mem_begin(g, mem)) |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index dd0b78c0..247557aa 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -3240,8 +3240,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
3240 | gk20a_writel(g, fifo_runlist_base_r(), | 3240 | gk20a_writel(g, fifo_runlist_base_r(), |
3241 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | | 3241 | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | |
3242 | nvgpu_aperture_mask(g, &runlist->mem[new_buf], | 3242 | nvgpu_aperture_mask(g, &runlist->mem[new_buf], |
3243 | fifo_runlist_base_target_sys_mem_ncoh_f(), | 3243 | fifo_runlist_base_target_sys_mem_ncoh_f(), |
3244 | fifo_runlist_base_target_vid_mem_f())); | 3244 | fifo_runlist_base_target_sys_mem_coh_f(), |
3245 | fifo_runlist_base_target_vid_mem_f())); | ||
3245 | } | 3246 | } |
3246 | 3247 | ||
3247 | gk20a_writel(g, fifo_runlist_r(), | 3248 | gk20a_writel(g, fifo_runlist_r(), |
@@ -3763,8 +3764,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c) | |||
3763 | nvgpu_mem_wr32(g, &c->inst_block, | 3764 | nvgpu_mem_wr32(g, &c->inst_block, |
3764 | ram_in_ramfc_w() + ram_fc_userd_w(), | 3765 | ram_in_ramfc_w() + ram_fc_userd_w(), |
3765 | nvgpu_aperture_mask(g, &g->fifo.userd, | 3766 | nvgpu_aperture_mask(g, &g->fifo.userd, |
3766 | pbdma_userd_target_sys_mem_ncoh_f(), | 3767 | pbdma_userd_target_sys_mem_ncoh_f(), |
3767 | pbdma_userd_target_vid_mem_f()) | | 3768 | pbdma_userd_target_sys_mem_coh_f(), |
3769 | pbdma_userd_target_vid_mem_f()) | | ||
3768 | pbdma_userd_addr_f(addr_lo)); | 3770 | pbdma_userd_addr_f(addr_lo)); |
3769 | 3771 | ||
3770 | nvgpu_mem_wr32(g, &c->inst_block, | 3772 | nvgpu_mem_wr32(g, &c->inst_block, |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 6ae743ef..2cde10ec 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) | |||
745 | u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> | 745 | u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> |
746 | ram_in_base_shift_v(); | 746 | ram_in_base_shift_v(); |
747 | u32 aperture = nvgpu_aperture_mask(g, inst_block, | 747 | u32 aperture = nvgpu_aperture_mask(g, inst_block, |
748 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), | 748 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), |
749 | gr_fecs_current_ctx_target_vid_mem_f()); | 749 | gr_fecs_current_ctx_target_sys_mem_coh_f(), |
750 | gr_fecs_current_ctx_target_vid_mem_f()); | ||
750 | 751 | ||
751 | return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | | 752 | return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | |
752 | gr_fecs_current_ctx_valid_f(1); | 753 | gr_fecs_current_ctx_valid_f(1); |
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) | |||
2171 | 2172 | ||
2172 | inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); | 2173 | inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); |
2173 | gk20a_writel(g, gr_fecs_new_ctx_r(), | 2174 | gk20a_writel(g, gr_fecs_new_ctx_r(), |
2174 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | | 2175 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | |
2175 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, | 2176 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, |
2176 | gr_fecs_new_ctx_target_sys_mem_ncoh_f(), | 2177 | gr_fecs_new_ctx_target_sys_mem_ncoh_f(), |
2178 | gr_fecs_new_ctx_target_sys_mem_coh_f(), | ||
2177 | gr_fecs_new_ctx_target_vid_mem_f()) | | 2179 | gr_fecs_new_ctx_target_vid_mem_f()) | |
2178 | gr_fecs_new_ctx_valid_m()); | 2180 | gr_fecs_new_ctx_valid_m()); |
2179 | 2181 | ||
2180 | gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), | 2182 | gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), |
2181 | gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | | 2183 | gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | |
2182 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, | 2184 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, |
2183 | gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), | 2185 | gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), |
2186 | gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(), | ||
2184 | gr_fecs_arb_ctx_ptr_target_vid_mem_f())); | 2187 | gr_fecs_arb_ctx_ptr_target_vid_mem_f())); |
2185 | 2188 | ||
2186 | gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); | 2189 | gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); |
@@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4379 | 4382 | ||
4380 | gk20a_writel(g, fb_mmu_debug_wr_r(), | 4383 | gk20a_writel(g, fb_mmu_debug_wr_r(), |
4381 | nvgpu_aperture_mask(g, &gr->mmu_wr_mem, | 4384 | nvgpu_aperture_mask(g, &gr->mmu_wr_mem, |
4382 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), | 4385 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), |
4383 | fb_mmu_debug_wr_aperture_vid_mem_f()) | | 4386 | fb_mmu_debug_wr_aperture_sys_mem_coh_f(), |
4387 | fb_mmu_debug_wr_aperture_vid_mem_f()) | | ||
4384 | fb_mmu_debug_wr_vol_false_f() | | 4388 | fb_mmu_debug_wr_vol_false_f() | |
4385 | fb_mmu_debug_wr_addr_f(addr)); | 4389 | fb_mmu_debug_wr_addr_f(addr)); |
4386 | 4390 | ||
@@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4389 | 4393 | ||
4390 | gk20a_writel(g, fb_mmu_debug_rd_r(), | 4394 | gk20a_writel(g, fb_mmu_debug_rd_r(), |
4391 | nvgpu_aperture_mask(g, &gr->mmu_rd_mem, | 4395 | nvgpu_aperture_mask(g, &gr->mmu_rd_mem, |
4392 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), | 4396 | fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), |
4393 | fb_mmu_debug_rd_aperture_vid_mem_f()) | | 4397 | fb_mmu_debug_wr_aperture_sys_mem_coh_f(), |
4398 | fb_mmu_debug_rd_aperture_vid_mem_f()) | | ||
4394 | fb_mmu_debug_rd_vol_false_f() | | 4399 | fb_mmu_debug_rd_vol_false_f() | |
4395 | fb_mmu_debug_rd_addr_f(addr)); | 4400 | fb_mmu_debug_rd_addr_f(addr)); |
4396 | 4401 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b27d1109..4ff6125b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g, | |||
122 | { | 122 | { |
123 | u32 pde0_bits = | 123 | u32 pde0_bits = |
124 | nvgpu_aperture_mask(g, pd->mem, | 124 | nvgpu_aperture_mask(g, pd->mem, |
125 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), | 125 | gmmu_pde_aperture_big_sys_mem_ncoh_f(), |
126 | gmmu_pde_aperture_big_video_memory_f()) | | 126 | gmmu_pde_aperture_big_sys_mem_coh_f(), |
127 | gmmu_pde_aperture_big_video_memory_f()) | | ||
127 | gmmu_pde_address_big_sys_f( | 128 | gmmu_pde_address_big_sys_f( |
128 | (u32)(addr >> gmmu_pde_address_shift_v())); | 129 | (u32)(addr >> gmmu_pde_address_shift_v())); |
129 | 130 | ||
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g, | |||
135 | { | 136 | { |
136 | u32 pde1_bits = | 137 | u32 pde1_bits = |
137 | nvgpu_aperture_mask(g, pd->mem, | 138 | nvgpu_aperture_mask(g, pd->mem, |
138 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), | 139 | gmmu_pde_aperture_small_sys_mem_ncoh_f(), |
139 | gmmu_pde_aperture_small_video_memory_f()) | | 140 | gmmu_pde_aperture_small_sys_mem_coh_f(), |
141 | gmmu_pde_aperture_small_video_memory_f()) | | ||
140 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ | 142 | gmmu_pde_vol_small_true_f() | /* tbd: why? */ |
141 | gmmu_pde_address_small_sys_f( | 143 | gmmu_pde_address_small_sys_f( |
142 | (u32)(addr >> gmmu_pde_address_shift_v())); | 144 | (u32)(addr >> gmmu_pde_address_shift_v())); |
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm, | |||
215 | 217 | ||
216 | pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, | 218 | pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, |
217 | gmmu_pte_aperture_sys_mem_ncoh_f(), | 219 | gmmu_pte_aperture_sys_mem_ncoh_f(), |
220 | gmmu_pte_aperture_sys_mem_coh_f(), | ||
218 | gmmu_pte_aperture_video_memory_f()) | | 221 | gmmu_pte_aperture_video_memory_f()) | |
219 | gmmu_pte_kind_f(attrs->kind_v) | | 222 | gmmu_pte_kind_f(attrs->kind_v) | |
220 | gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); | 223 | gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); |
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
268 | page_size >> 10, | 271 | page_size >> 10, |
269 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 272 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
270 | attrs->kind_v, | 273 | attrs->kind_v, |
271 | nvgpu_aperture_str(attrs->aperture), | 274 | nvgpu_aperture_str(g, attrs->aperture), |
272 | attrs->cacheable ? 'C' : '-', | 275 | attrs->cacheable ? 'C' : '-', |
273 | attrs->sparse ? 'S' : '-', | 276 | attrs->sparse ? 'S' : '-', |
274 | attrs->priv ? 'P' : '-', | 277 | attrs->priv ? 'P' : '-', |
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, | |||
363 | gk20a_dbg_info("pde pa=0x%llx", pdb_addr); | 366 | gk20a_dbg_info("pde pa=0x%llx", pdb_addr); |
364 | 367 | ||
365 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), | 368 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), |
366 | nvgpu_aperture_mask(g, vm->pdb.mem, | 369 | nvgpu_aperture_mask(g, vm->pdb.mem, |
367 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), | 370 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), |
368 | ram_in_page_dir_base_target_vid_mem_f()) | | 371 | ram_in_page_dir_base_target_sys_mem_coh_f(), |
369 | ram_in_page_dir_base_vol_true_f() | | 372 | ram_in_page_dir_base_target_vid_mem_f()) | |
370 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | 373 | ram_in_page_dir_base_vol_true_f() | |
374 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | ||
371 | 375 | ||
372 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), | 376 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), |
373 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 377 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c index 05d0473e..711aeb0d 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | |||
@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | |||
41 | u32 lo = (u32)(addr & 0xfffff); | 41 | u32 lo = (u32)(addr & 0xfffff); |
42 | u32 win = nvgpu_aperture_mask(g, mem, | 42 | u32 win = nvgpu_aperture_mask(g, mem, |
43 | bus_bar0_window_target_sys_mem_noncoherent_f(), | 43 | bus_bar0_window_target_sys_mem_noncoherent_f(), |
44 | bus_bar0_window_target_sys_mem_coherent_f(), | ||
44 | bus_bar0_window_target_vid_mem_f()) | | 45 | bus_bar0_window_target_vid_mem_f()) | |
45 | bus_bar0_window_base_f(hi); | 46 | bus_bar0_window_base_f(hi); |
46 | 47 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c index 34c8d4b7..cdd70d5b 100644 --- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <nvgpu/timers.h> | 25 | #include <nvgpu/timers.h> |
26 | #include <nvgpu/bus.h> | 26 | #include <nvgpu/bus.h> |
27 | #include <nvgpu/mm.h> | 27 | #include <nvgpu/mm.h> |
28 | #include <nvgpu/enabled.h> | ||
28 | 29 | ||
29 | #include "bus_gm20b.h" | 30 | #include "bus_gm20b.h" |
30 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
@@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | |||
43 | 44 | ||
44 | gk20a_writel(g, bus_bar1_block_r(), | 45 | gk20a_writel(g, bus_bar1_block_r(), |
45 | nvgpu_aperture_mask(g, bar1_inst, | 46 | nvgpu_aperture_mask(g, bar1_inst, |
46 | bus_bar1_block_target_sys_mem_ncoh_f(), | 47 | bus_bar1_block_target_sys_mem_ncoh_f(), |
47 | bus_bar1_block_target_vid_mem_f()) | | 48 | bus_bar1_block_target_sys_mem_coh_f(), |
49 | bus_bar1_block_target_vid_mem_f()) | | ||
48 | bus_bar1_block_mode_virtual_f() | | 50 | bus_bar1_block_mode_virtual_f() | |
49 | bus_bar1_block_ptr_f(ptr_v)); | 51 | bus_bar1_block_ptr_f(ptr_v)); |
50 | nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); | 52 | nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); |
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 0762e8bd..15612995 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <nvgpu/atomic.h> | 32 | #include <nvgpu/atomic.h> |
33 | #include <nvgpu/barrier.h> | 33 | #include <nvgpu/barrier.h> |
34 | #include <nvgpu/mm.h> | 34 | #include <nvgpu/mm.h> |
35 | #include <nvgpu/enabled.h> | ||
35 | 36 | ||
36 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> | 37 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> |
37 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | 38 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> |
@@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c) | |||
51 | 52 | ||
52 | 53 | ||
53 | gk20a_writel(g, ccsr_channel_inst_r(c->chid), | 54 | gk20a_writel(g, ccsr_channel_inst_r(c->chid), |
54 | ccsr_channel_inst_ptr_f(inst_ptr) | | 55 | ccsr_channel_inst_ptr_f(inst_ptr) | |
55 | nvgpu_aperture_mask(g, &c->inst_block, | 56 | nvgpu_aperture_mask(g, &c->inst_block, |
56 | ccsr_channel_inst_target_sys_mem_ncoh_f(), | 57 | ccsr_channel_inst_target_sys_mem_ncoh_f(), |
57 | ccsr_channel_inst_target_vid_mem_f()) | | 58 | ccsr_channel_inst_target_sys_mem_coh_f(), |
58 | ccsr_channel_inst_bind_true_f()); | 59 | ccsr_channel_inst_target_vid_mem_f()) | |
60 | ccsr_channel_inst_bind_true_f()); | ||
59 | 61 | ||
60 | gk20a_writel(g, ccsr_channel_r(c->chid), | 62 | gk20a_writel(g, ccsr_channel_r(c->chid), |
61 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & | 63 | (gk20a_readl(g, ccsr_channel_r(c->chid)) & |
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index 29aceb7c..8e4e5900 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c | |||
@@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu, | |||
99 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 99 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
100 | pwr_pmu_new_instblk_valid_f(1) | | 100 | pwr_pmu_new_instblk_valid_f(1) | |
101 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, | 101 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, |
102 | pwr_pmu_new_instblk_target_sys_ncoh_f(), | ||
102 | pwr_pmu_new_instblk_target_sys_coh_f(), | 103 | pwr_pmu_new_instblk_target_sys_coh_f(), |
103 | pwr_pmu_new_instblk_target_fb_f())); | 104 | pwr_pmu_new_instblk_target_fb_f())); |
104 | 105 | ||
@@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g) | |||
165 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 166 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
166 | pwr_pmu_new_instblk_valid_f(1) | | 167 | pwr_pmu_new_instblk_valid_f(1) | |
167 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, | 168 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, |
169 | pwr_pmu_new_instblk_target_sys_ncoh_f(), | ||
168 | pwr_pmu_new_instblk_target_sys_coh_f(), | 170 | pwr_pmu_new_instblk_target_sys_coh_f(), |
169 | pwr_pmu_new_instblk_target_fb_f())); | 171 | pwr_pmu_new_instblk_target_fb_f())); |
170 | 172 | ||
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index c82fb1cc..1436a260 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <nvgpu/dma.h> | 25 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/bug.h> | 26 | #include <nvgpu/bug.h> |
27 | #include <nvgpu/log2.h> | 27 | #include <nvgpu/log2.h> |
28 | #include <nvgpu/enabled.h> | ||
28 | 29 | ||
29 | #include "fifo_gp10b.h" | 30 | #include "fifo_gp10b.h" |
30 | 31 | ||
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c) | |||
78 | nvgpu_mem_wr32(g, &c->inst_block, | 79 | nvgpu_mem_wr32(g, &c->inst_block, |
79 | ram_in_ramfc_w() + ram_fc_userd_w(), | 80 | ram_in_ramfc_w() + ram_fc_userd_w(), |
80 | nvgpu_aperture_mask(g, &g->fifo.userd, | 81 | nvgpu_aperture_mask(g, &g->fifo.userd, |
81 | pbdma_userd_target_sys_mem_ncoh_f(), | 82 | pbdma_userd_target_sys_mem_ncoh_f(), |
82 | pbdma_userd_target_vid_mem_f()) | | 83 | pbdma_userd_target_sys_mem_coh_f(), |
84 | pbdma_userd_target_vid_mem_f()) | | ||
83 | pbdma_userd_addr_f(addr_lo)); | 85 | pbdma_userd_addr_f(addr_lo)); |
84 | 86 | ||
85 | nvgpu_mem_wr32(g, &c->inst_block, | 87 | nvgpu_mem_wr32(g, &c->inst_block, |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0439dda9..7ff5f6a6 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g) | |||
124 | 124 | ||
125 | gk20a_writel(g, bus_bar2_block_r(), | 125 | gk20a_writel(g, bus_bar2_block_r(), |
126 | nvgpu_aperture_mask(g, inst_block, | 126 | nvgpu_aperture_mask(g, inst_block, |
127 | bus_bar2_block_target_sys_mem_ncoh_f(), | 127 | bus_bar2_block_target_sys_mem_ncoh_f(), |
128 | bus_bar2_block_target_vid_mem_f()) | | 128 | bus_bar2_block_target_sys_mem_coh_f(), |
129 | bus_bar2_block_target_vid_mem_f()) | | ||
129 | bus_bar2_block_mode_virtual_f() | | 130 | bus_bar2_block_mode_virtual_f() | |
130 | bus_bar2_block_ptr_f(inst_pa)); | 131 | bus_bar2_block_ptr_f(inst_pa)); |
131 | 132 | ||
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm, | |||
148 | phys_addr >>= gmmu_new_pde_address_shift_v(); | 149 | phys_addr >>= gmmu_new_pde_address_shift_v(); |
149 | 150 | ||
150 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, | 151 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, |
151 | gmmu_new_pde_aperture_sys_mem_ncoh_f(), | 152 | gmmu_new_pde_aperture_sys_mem_ncoh_f(), |
152 | gmmu_new_pde_aperture_video_memory_f()); | 153 | gmmu_new_pde_aperture_sys_mem_coh_f(), |
154 | gmmu_new_pde_aperture_video_memory_f()); | ||
153 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); | 155 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); |
154 | pde_v[0] |= gmmu_new_pde_vol_true_f(); | 156 | pde_v[0] |= gmmu_new_pde_vol_true_f(); |
155 | pde_v[1] |= phys_addr >> 24; | 157 | pde_v[1] |= phys_addr >> 24; |
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
194 | gmmu_new_dual_pde_address_small_sys_f(small_addr); | 196 | gmmu_new_dual_pde_address_small_sys_f(small_addr); |
195 | pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, | 197 | pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, |
196 | gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), | 198 | gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), |
199 | gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(), | ||
197 | gmmu_new_dual_pde_aperture_small_video_memory_f()); | 200 | gmmu_new_dual_pde_aperture_small_video_memory_f()); |
198 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); | 201 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); |
199 | pde_v[3] |= small_addr >> 24; | 202 | pde_v[3] |= small_addr >> 24; |
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, | |||
204 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); | 207 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); |
205 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, | 208 | pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, |
206 | gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), | 209 | gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), |
210 | gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(), | ||
207 | gmmu_new_dual_pde_aperture_big_video_memory_f()); | 211 | gmmu_new_dual_pde_aperture_big_video_memory_f()); |
208 | pde_v[1] |= big_addr >> 28; | 212 | pde_v[1] |= big_addr >> 28; |
209 | } | 213 | } |
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm, | |||
240 | gmmu_new_pte_address_sys_f(phys_shifted) : | 244 | gmmu_new_pte_address_sys_f(phys_shifted) : |
241 | gmmu_new_pte_address_vid_f(phys_shifted); | 245 | gmmu_new_pte_address_vid_f(phys_shifted); |
242 | u32 pte_tgt = __nvgpu_aperture_mask(g, | 246 | u32 pte_tgt = __nvgpu_aperture_mask(g, |
243 | attrs->aperture, | 247 | attrs->aperture, |
244 | attrs->coherent ? | 248 | gmmu_new_pte_aperture_sys_mem_ncoh_f(), |
245 | gmmu_new_pte_aperture_sys_mem_coh_f() : | 249 | gmmu_new_pte_aperture_sys_mem_coh_f(), |
246 | gmmu_new_pte_aperture_sys_mem_ncoh_f(), | 250 | gmmu_new_pte_aperture_video_memory_f()); |
247 | gmmu_new_pte_aperture_video_memory_f()); | ||
248 | 251 | ||
249 | pte_w[0] = pte_valid | pte_addr | pte_tgt; | 252 | pte_w[0] = pte_valid | pte_addr | pte_tgt; |
250 | 253 | ||
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
306 | page_size >> 10, | 309 | page_size >> 10, |
307 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 310 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
308 | attrs->kind_v, | 311 | attrs->kind_v, |
309 | nvgpu_aperture_str(attrs->aperture), | 312 | nvgpu_aperture_str(g, attrs->aperture), |
310 | attrs->cacheable ? 'C' : '-', | 313 | attrs->cacheable ? 'C' : '-', |
311 | attrs->sparse ? 'S' : '-', | 314 | attrs->sparse ? 'S' : '-', |
312 | attrs->priv ? 'P' : '-', | 315 | attrs->priv ? 'P' : '-', |
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, | |||
428 | 431 | ||
429 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), | 432 | nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), |
430 | nvgpu_aperture_mask(g, vm->pdb.mem, | 433 | nvgpu_aperture_mask(g, vm->pdb.mem, |
431 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), | 434 | ram_in_page_dir_base_target_sys_mem_ncoh_f(), |
432 | ram_in_page_dir_base_target_vid_mem_f()) | | 435 | ram_in_page_dir_base_target_sys_mem_coh_f(), |
436 | ram_in_page_dir_base_target_vid_mem_f()) | | ||
433 | ram_in_page_dir_base_vol_true_f() | | 437 | ram_in_page_dir_base_vol_true_f() | |
434 | ram_in_big_page_size_64kb_f() | | 438 | ram_in_big_page_size_64kb_f() | |
435 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | | 439 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | |
diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c index 799b2db4..4fa3f324 100644 --- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c | |||
@@ -27,9 +27,10 @@ | |||
27 | #include <nvgpu/nvgpu_common.h> | 27 | #include <nvgpu/nvgpu_common.h> |
28 | #include <nvgpu/kmem.h> | 28 | #include <nvgpu/kmem.h> |
29 | #include <nvgpu/nvgpu_mem.h> | 29 | #include <nvgpu/nvgpu_mem.h> |
30 | #include <nvgpu/acr/nvgpu_acr.h> | ||
31 | #include <nvgpu/firmware.h> | 30 | #include <nvgpu/firmware.h> |
32 | #include <nvgpu/mm.h> | 31 | #include <nvgpu/mm.h> |
32 | #include <nvgpu/enabled.h> | ||
33 | #include <nvgpu/acr/nvgpu_acr.h> | ||
33 | 34 | ||
34 | #include "gk20a/gk20a.h" | 35 | #include "gk20a/gk20a.h" |
35 | #include "acr_gv11b.h" | 36 | #include "acr_gv11b.h" |
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu, | |||
220 | pwr_pmu_new_instblk_ptr_f( | 221 | pwr_pmu_new_instblk_ptr_f( |
221 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 222 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
222 | pwr_pmu_new_instblk_valid_f(1) | | 223 | pwr_pmu_new_instblk_valid_f(1) | |
223 | pwr_pmu_new_instblk_target_sys_ncoh_f()); | 224 | (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
225 | pwr_pmu_new_instblk_target_sys_coh_f() : | ||
226 | pwr_pmu_new_instblk_target_sys_ncoh_f())) ; | ||
224 | 227 | ||
225 | /*copy bootloader interface structure to dmem*/ | 228 | /*copy bootloader interface structure to dmem*/ |
226 | nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, | 229 | nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, |
diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c index 617ea61d..86977bb3 100644 --- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | |||
@@ -31,14 +31,14 @@ | |||
31 | #include <nvgpu/dma.h> | 31 | #include <nvgpu/dma.h> |
32 | #include <nvgpu/mm.h> | 32 | #include <nvgpu/mm.h> |
33 | #include <nvgpu/sizes.h> | 33 | #include <nvgpu/sizes.h> |
34 | #include <nvgpu/enabled.h> | ||
35 | #include <nvgpu/log.h> | ||
36 | #include <nvgpu/bug.h> | ||
34 | 37 | ||
35 | #include "gk20a/gk20a.h" | 38 | #include "gk20a/gk20a.h" |
36 | #include "gk20a/css_gr_gk20a.h" | 39 | #include "gk20a/css_gr_gk20a.h" |
37 | #include "css_gr_gv11b.h" | 40 | #include "css_gr_gv11b.h" |
38 | 41 | ||
39 | #include <nvgpu/log.h> | ||
40 | #include <nvgpu/bug.h> | ||
41 | |||
42 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> | 42 | #include <nvgpu/hw/gv11b/hw_perf_gv11b.h> |
43 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> | 43 | #include <nvgpu/hw/gv11b/hw_mc_gv11b.h> |
44 | 44 | ||
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch, | |||
144 | perf_pmasys_mem_block_valid_true_f() | | 144 | perf_pmasys_mem_block_valid_true_f() | |
145 | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, | 145 | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, |
146 | perf_pmasys_mem_block_target_sys_ncoh_f(), | 146 | perf_pmasys_mem_block_target_sys_ncoh_f(), |
147 | perf_pmasys_mem_block_target_sys_coh_f(), | ||
147 | perf_pmasys_mem_block_target_lfb_f())); | 148 | perf_pmasys_mem_block_target_lfb_f())); |
148 | 149 | ||
149 | 150 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c index e5d88e8c..562476ca 100644 --- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c | |||
@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | |||
59 | inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; | 59 | inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; |
60 | 60 | ||
61 | gk20a_writel(g, perf_pmasys_mem_block_r(), | 61 | gk20a_writel(g, perf_pmasys_mem_block_r(), |
62 | perf_pmasys_mem_block_base_f(inst_pa_page) | | 62 | perf_pmasys_mem_block_base_f(inst_pa_page) | |
63 | perf_pmasys_mem_block_valid_true_f() | | 63 | perf_pmasys_mem_block_valid_true_f() | |
64 | nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, | 64 | nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, |
65 | + perf_pmasys_mem_block_target_sys_ncoh_f(), | 65 | perf_pmasys_mem_block_target_sys_ncoh_f(), |
66 | + perf_pmasys_mem_block_target_lfb_f())); | 66 | perf_pmasys_mem_block_target_sys_coh_f(), |
67 | perf_pmasys_mem_block_target_lfb_f())); | ||
67 | 68 | ||
68 | gk20a_idle(g); | 69 | gk20a_idle(g); |
69 | return 0; | 70 | return 0; |
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 97ab7aab..6b4b07a6 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | |||
@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist) | |||
101 | c->runqueue_sel) | | 101 | c->runqueue_sel) | |
102 | ram_rl_entry_chan_userd_target_f( | 102 | ram_rl_entry_chan_userd_target_f( |
103 | nvgpu_aperture_mask(g, &g->fifo.userd, | 103 | nvgpu_aperture_mask(g, &g->fifo.userd, |
104 | ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), | 104 | ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), |
105 | ram_rl_entry_chan_userd_target_vid_mem_v())) | | 105 | ram_rl_entry_chan_userd_target_sys_mem_coh_v(), |
106 | ram_rl_entry_chan_userd_target_vid_mem_v())) | | ||
106 | ram_rl_entry_chan_inst_target_f( | 107 | ram_rl_entry_chan_inst_target_f( |
107 | nvgpu_aperture_mask(g, &c->inst_block, | 108 | nvgpu_aperture_mask(g, &c->inst_block, |
108 | ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), | 109 | ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), |
109 | ram_rl_entry_chan_inst_target_vid_mem_v())); | 110 | ram_rl_entry_chan_inst_target_sys_mem_coh_v(), |
111 | ram_rl_entry_chan_inst_target_vid_mem_v())); | ||
110 | 112 | ||
111 | addr_lo = u64_lo32(c->userd_iova) >> | 113 | addr_lo = u64_lo32(c->userd_iova) >> |
112 | ram_rl_entry_chan_userd_ptr_align_shift_v(); | 114 | ram_rl_entry_chan_userd_ptr_align_shift_v(); |
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c index ade1d9fe..b46ecb0a 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <nvgpu/dma.h> | 26 | #include <nvgpu/dma.h> |
27 | #include <nvgpu/log.h> | 27 | #include <nvgpu/log.h> |
28 | #include <nvgpu/mm.h> | 28 | #include <nvgpu/mm.h> |
29 | #include <nvgpu/enabled.h> | ||
29 | 30 | ||
30 | #include "gk20a/gk20a.h" | 31 | #include "gk20a/gk20a.h" |
31 | #include "gk20a/mm_gk20a.h" | 32 | #include "gk20a/mm_gk20a.h" |
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g) | |||
292 | 293 | ||
293 | gk20a_writel(g, bus_bar2_block_r(), | 294 | gk20a_writel(g, bus_bar2_block_r(), |
294 | nvgpu_aperture_mask(g, inst_block, | 295 | nvgpu_aperture_mask(g, inst_block, |
295 | bus_bar2_block_target_sys_mem_ncoh_f(), | 296 | bus_bar2_block_target_sys_mem_ncoh_f(), |
296 | bus_bar2_block_target_vid_mem_f()) | | 297 | bus_bar2_block_target_sys_mem_coh_f(), |
298 | bus_bar2_block_target_vid_mem_f()) | | ||
297 | bus_bar2_block_mode_virtual_f() | | 299 | bus_bar2_block_mode_virtual_f() | |
298 | bus_bar2_block_ptr_f(inst_pa)); | 300 | bus_bar2_block_ptr_f(inst_pa)); |
299 | 301 | ||
diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c index 7dd4f8f4..13e70eca 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | |||
@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu) | |||
195 | 195 | ||
196 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 196 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
197 | pwr_pmu_new_instblk_ptr_f( | 197 | pwr_pmu_new_instblk_ptr_f( |
198 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) | 198 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) | |
199 | | pwr_pmu_new_instblk_valid_f(1) | 199 | pwr_pmu_new_instblk_valid_f(1) | |
200 | | pwr_pmu_new_instblk_target_sys_ncoh_f()); | 200 | (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? |
201 | pwr_pmu_new_instblk_target_sys_coh_f() : | ||
202 | pwr_pmu_new_instblk_target_sys_ncoh_f())); | ||
201 | 203 | ||
202 | /* TBD: load all other surfaces */ | 204 | /* TBD: load all other surfaces */ |
203 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( | 205 | g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( |
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 05d7dee0..bda4c8e4 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, | |||
177 | u32 pdb_addr_lo, pdb_addr_hi; | 177 | u32 pdb_addr_lo, pdb_addr_hi; |
178 | u64 pdb_addr; | 178 | u64 pdb_addr; |
179 | u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, | 179 | u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, |
180 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), | 180 | ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), |
181 | ram_in_sc_page_dir_base_target_vid_mem_v()); | 181 | ram_in_sc_page_dir_base_target_sys_mem_coh_v(), |
182 | ram_in_sc_page_dir_base_target_vid_mem_v()); | ||
182 | 183 | ||
183 | pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); | 184 | pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); |
184 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 185 | pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 2b8b7015..f1ab8a6e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <nvgpu/types.h> | 26 | #include <nvgpu/types.h> |
27 | #include <nvgpu/list.h> | 27 | #include <nvgpu/list.h> |
28 | #include <nvgpu/enabled.h> | ||
28 | 29 | ||
29 | #ifdef __KERNEL__ | 30 | #ifdef __KERNEL__ |
30 | #include <nvgpu/linux/nvgpu_mem.h> | 31 | #include <nvgpu/linux/nvgpu_mem.h> |
@@ -51,6 +52,10 @@ struct nvgpu_page_alloc; | |||
51 | enum nvgpu_aperture { | 52 | enum nvgpu_aperture { |
52 | APERTURE_INVALID = 0, /* unallocated or N/A */ | 53 | APERTURE_INVALID = 0, /* unallocated or N/A */ |
53 | APERTURE_SYSMEM, | 54 | APERTURE_SYSMEM, |
55 | |||
56 | /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */ | ||
57 | __APERTURE_SYSMEM_COH, | ||
58 | |||
54 | APERTURE_VIDMEM | 59 | APERTURE_VIDMEM |
55 | }; | 60 | }; |
56 | 61 | ||
@@ -187,12 +192,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node) | |||
187 | clear_list_entry)); | 192 | clear_list_entry)); |
188 | }; | 193 | }; |
189 | 194 | ||
190 | static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) | 195 | static inline const char *nvgpu_aperture_str(struct gk20a *g, |
196 | enum nvgpu_aperture aperture) | ||
191 | { | 197 | { |
192 | switch (aperture) { | 198 | switch (aperture) { |
193 | case APERTURE_INVALID: return "INVAL"; | 199 | case APERTURE_INVALID: |
194 | case APERTURE_SYSMEM: return "SYSMEM"; | 200 | return "INVAL"; |
195 | case APERTURE_VIDMEM: return "VIDMEM"; | 201 | case APERTURE_SYSMEM: |
202 | return "SYSMEM"; | ||
203 | case __APERTURE_SYSMEM_COH: | ||
204 | return "SYSCOH"; | ||
205 | case APERTURE_VIDMEM: | ||
206 | return "VIDMEM"; | ||
196 | }; | 207 | }; |
197 | return "UNKNOWN"; | 208 | return "UNKNOWN"; |
198 | } | 209 | } |
@@ -322,9 +333,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); | |||
322 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); | 333 | u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); |
323 | 334 | ||
324 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, | 335 | u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, |
325 | u32 sysmem_mask, u32 vidmem_mask); | 336 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); |
326 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, | 337 | u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, |
327 | u32 sysmem_mask, u32 vidmem_mask); | 338 | u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); |
328 | 339 | ||
329 | u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); | 340 | u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); |
330 | 341 | ||