From 1170687c33f7506f39aaf47acee5430233e3d1a8 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Fri, 9 Feb 2018 11:57:54 -0800 Subject: gpu: nvgpu: Use coherent aperture flag When using a coherent DMA API wee must make sure to program any aperture fields with the coherent aperture setting. To do this the nvgpu_aperture_mask() function was modified to take a third aperture mask argument, a coherent setting, so that code can use this function to generate coherent aperture settings. The aperture choice is some what tricky: the default version of this function uses the state of the DMA API to determine what aperture to use for SYSMEM: either coherent or non-coherent internally. Thus a kernel user need only specify the normal nvgpu_mem struct and the correct mask should be chosen. Due to many uses of nvgpu_mem structs not created directly from the DMA API wrapper it's easier to translate SYSMEM to SYSMEM_COH after creation. However, the GMMU mapping code, will encounter buffers from userspace with difference coerency attributes than the DMA API. Thus the __nvgpu_aperture_mask() really respects the aperture setting passed in regardless of the DMA API state. This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT since this is either passed in from userspace or set by the kernel when using coherent DMA. The aperture field in attrs is upgraded to coh if this flag is set. This change also adds a coherent sysmem mask everywhere that it can. There's a couple places that do not have a coherent register field defined yet. These need to eventually be defined and added. Lastly the aperture mask code has been mvoed from the Linux vm.c code to the general vm.c code since this function has no Linux dependencies. Note: depends on https://git-master.nvidia.com/r/1664536 for new register fields. JIRA EVLR-2333 Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71 Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1655220 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 24 --------------- drivers/gpu/nvgpu/common/linux/vm.c | 3 +- drivers/gpu/nvgpu/common/mm/gmmu.c | 9 +++++- drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 46 +++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/bus_gk20a.c | 6 ++-- drivers/gpu/nvgpu/gk20a/fb_gk20a.c | 5 ++-- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 1 + drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 10 ++++--- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 27 ++++++++++------- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 24 ++++++++------- drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | 1 + drivers/gpu/nvgpu/gm20b/bus_gm20b.c | 6 ++-- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 12 ++++---- drivers/gpu/nvgpu/gp106/sec2_gp106.c | 2 ++ drivers/gpu/nvgpu/gp10b/fifo_gp10b.c | 6 ++-- drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 28 ++++++++++-------- drivers/gpu/nvgpu/gv11b/acr_gv11b.c | 7 +++-- drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c | 7 +++-- drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c | 11 +++---- drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 10 ++++--- drivers/gpu/nvgpu/gv11b/mm_gv11b.c | 6 ++-- drivers/gpu/nvgpu/gv11b/pmu_gv11b.c | 8 +++-- drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | 5 ++-- drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 23 +++++++++++---- 24 files changed, 184 insertions(+), 103 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index e441ec76..c859520d 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -34,30 +34,6 @@ #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" -u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, - u32 sysmem_mask, u32 vidmem_mask) -{ - switch (aperture) { - case APERTURE_SYSMEM: - /* some igpus consider system memory vidmem */ - return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE) - ? sysmem_mask : vidmem_mask; - case APERTURE_VIDMEM: - /* for dgpus only */ - return vidmem_mask; - case APERTURE_INVALID: - WARN_ON("Bad aperture"); - } - return 0; -} - -u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, - u32 sysmem_mask, u32 vidmem_mask) -{ - return __nvgpu_aperture_mask(g, mem->aperture, - sysmem_mask, vidmem_mask); -} - int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) { void *cpu_va; diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index e3ca4eda..52b2f30c 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm, vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, vm_aspace_id(vm), mapped_buffer->flags, - nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); + nvgpu_aperture_str(g, + gk20a_dmabuf_aperture(g, os_buf->dmabuf))); return mapped_buffer; } diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 5abf5951..41343718 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -634,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, page_size >> 10, nvgpu_gmmu_perm_str(attrs->rw_flag), attrs->kind_v, - nvgpu_aperture_str(attrs->aperture), + nvgpu_aperture_str(g, attrs->aperture), attrs->cacheable ? 'C' : '-', attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', @@ -711,6 +711,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); + /* + * Handle the IO coherency aperture: make sure the .aperture field is + * correct based on the IO coherency flag. + */ + if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM) + attrs.aperture = __APERTURE_SYSMEM_COH; + /* * Only allocate a new GPU VA range if we haven't already been passed a * GPU VA range. This facilitates fixed mappings. diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 73b6b2a7..78a57b4e 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -28,6 +28,52 @@ #include "gk20a/gk20a.h" +/* + * Make sure to use the right coherency aperture if you use this function! This + * will not add any checks. If you want to simply use the default coherency then + * use nvgpu_aperture_mask(). + */ +u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, + u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) +{ + /* + * Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the + * "sysmem" aperture should really be translated to VIDMEM. + */ + if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)) + aperture = APERTURE_VIDMEM; + + switch (aperture) { + case __APERTURE_SYSMEM_COH: + return sysmem_coh_mask; + case APERTURE_SYSMEM: + return sysmem_mask; + case APERTURE_VIDMEM: + return vidmem_mask; + case APERTURE_INVALID: + WARN_ON("Bad aperture"); + } + return 0; +} + +u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, + u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask) +{ + enum nvgpu_aperture ap = mem->aperture; + + /* + * Handle the coherent aperture: ideally most of the driver is not + * aware of the difference between coherent and non-coherent sysmem so + * we add this translation step here. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) && + ap == APERTURE_SYSMEM) + ap = __APERTURE_SYSMEM_COH; + + return __nvgpu_aperture_mask(g, ap, + sysmem_mask, sysmem_coh_mask, vidmem_mask); +} + void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) { return sgt->ops->sgl_next(sgl); diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index 7f0cfe58..b2800772 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) gk20a_writel(g, bus_bar1_block_r(), nvgpu_aperture_mask(g, bar1_inst, - bus_bar1_block_target_sys_mem_ncoh_f(), - bus_bar1_block_target_vid_mem_f()) | + bus_bar1_block_target_sys_mem_ncoh_f(), + bus_bar1_block_target_sys_mem_coh_f(), + bus_bar1_block_target_vid_mem_f()) | bus_bar1_block_mode_virtual_f() | bus_bar1_block_ptr_f(ptr_v)); diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index a5a2cb51..e3052701 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c @@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb) gk20a_writel(g, fb_mmu_invalidate_pdb_r(), fb_mmu_invalidate_pdb_addr_f(addr_lo) | nvgpu_aperture_mask(g, pdb, - fb_mmu_invalidate_pdb_aperture_sys_mem_f(), - fb_mmu_invalidate_pdb_aperture_vid_mem_f())); + fb_mmu_invalidate_pdb_aperture_sys_mem_f(), + fb_mmu_invalidate_pdb_aperture_sys_mem_f(), + fb_mmu_invalidate_pdb_aperture_vid_mem_f())); gk20a_writel(g, fb_mmu_invalidate_r(), fb_mmu_invalidate_all_va_true_f() | diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 409661fc..4fda0d2e 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, return -ENOMEM; aperture = nvgpu_aperture_mask(g, &trace->trace_buf, ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); if (nvgpu_mem_begin(g, mem)) diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index dd0b78c0..247557aa 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -3240,8 +3240,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, gk20a_writel(g, fifo_runlist_base_r(), fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | nvgpu_aperture_mask(g, &runlist->mem[new_buf], - fifo_runlist_base_target_sys_mem_ncoh_f(), - fifo_runlist_base_target_vid_mem_f())); + fifo_runlist_base_target_sys_mem_ncoh_f(), + fifo_runlist_base_target_sys_mem_coh_f(), + fifo_runlist_base_target_vid_mem_f())); } gk20a_writel(g, fifo_runlist_r(), @@ -3763,8 +3764,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c) nvgpu_mem_wr32(g, &c->inst_block, ram_in_ramfc_w() + ram_fc_userd_w(), nvgpu_aperture_mask(g, &g->fifo.userd, - pbdma_userd_target_sys_mem_ncoh_f(), - pbdma_userd_target_vid_mem_f()) | + pbdma_userd_target_sys_mem_ncoh_f(), + pbdma_userd_target_sys_mem_coh_f(), + pbdma_userd_target_vid_mem_f()) | pbdma_userd_addr_f(addr_lo)); nvgpu_mem_wr32(g, &c->inst_block, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 6ae743ef..2cde10ec 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) u64 ptr = nvgpu_inst_block_addr(g, inst_block) >> ram_in_base_shift_v(); u32 aperture = nvgpu_aperture_mask(g, inst_block, - gr_fecs_current_ctx_target_sys_mem_ncoh_f(), - gr_fecs_current_ctx_target_vid_mem_f()); + gr_fecs_current_ctx_target_sys_mem_ncoh_f(), + gr_fecs_current_ctx_target_sys_mem_coh_f(), + gr_fecs_current_ctx_target_vid_mem_f()); return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture | gr_fecs_current_ctx_valid_f(1); @@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); gk20a_writel(g, gr_fecs_new_ctx_r(), - gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | - nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, + gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | + nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, gr_fecs_new_ctx_target_sys_mem_ncoh_f(), + gr_fecs_new_ctx_target_sys_mem_coh_f(), gr_fecs_new_ctx_target_vid_mem_f()) | - gr_fecs_new_ctx_valid_m()); + gr_fecs_new_ctx_valid_m()); gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), - gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | - nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, + gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | + nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), + gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(), gr_fecs_arb_ctx_ptr_target_vid_mem_f())); gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); @@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gk20a_writel(g, fb_mmu_debug_wr_r(), nvgpu_aperture_mask(g, &gr->mmu_wr_mem, - fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), - fb_mmu_debug_wr_aperture_vid_mem_f()) | + fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), + fb_mmu_debug_wr_aperture_sys_mem_coh_f(), + fb_mmu_debug_wr_aperture_vid_mem_f()) | fb_mmu_debug_wr_vol_false_f() | fb_mmu_debug_wr_addr_f(addr)); @@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gk20a_writel(g, fb_mmu_debug_rd_r(), nvgpu_aperture_mask(g, &gr->mmu_rd_mem, - fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), - fb_mmu_debug_rd_aperture_vid_mem_f()) | + fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), + fb_mmu_debug_wr_aperture_sys_mem_coh_f(), + fb_mmu_debug_rd_aperture_vid_mem_f()) | fb_mmu_debug_rd_vol_false_f() | fb_mmu_debug_rd_addr_f(addr)); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b27d1109..4ff6125b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g, { u32 pde0_bits = nvgpu_aperture_mask(g, pd->mem, - gmmu_pde_aperture_big_sys_mem_ncoh_f(), - gmmu_pde_aperture_big_video_memory_f()) | + gmmu_pde_aperture_big_sys_mem_ncoh_f(), + gmmu_pde_aperture_big_sys_mem_coh_f(), + gmmu_pde_aperture_big_video_memory_f()) | gmmu_pde_address_big_sys_f( (u32)(addr >> gmmu_pde_address_shift_v())); @@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g, { u32 pde1_bits = nvgpu_aperture_mask(g, pd->mem, - gmmu_pde_aperture_small_sys_mem_ncoh_f(), - gmmu_pde_aperture_small_video_memory_f()) | + gmmu_pde_aperture_small_sys_mem_ncoh_f(), + gmmu_pde_aperture_small_sys_mem_coh_f(), + gmmu_pde_aperture_small_video_memory_f()) | gmmu_pde_vol_small_true_f() | /* tbd: why? */ gmmu_pde_address_small_sys_f( (u32)(addr >> gmmu_pde_address_shift_v())); @@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm, pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, gmmu_pte_aperture_sys_mem_ncoh_f(), + gmmu_pte_aperture_sys_mem_coh_f(), gmmu_pte_aperture_video_memory_f()) | gmmu_pte_kind_f(attrs->kind_v) | gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); @@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, page_size >> 10, nvgpu_gmmu_perm_str(attrs->rw_flag), attrs->kind_v, - nvgpu_aperture_str(attrs->aperture), + nvgpu_aperture_str(g, attrs->aperture), attrs->cacheable ? 'C' : '-', attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', @@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, gk20a_dbg_info("pde pa=0x%llx", pdb_addr); nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), - nvgpu_aperture_mask(g, vm->pdb.mem, - ram_in_page_dir_base_target_sys_mem_ncoh_f(), - ram_in_page_dir_base_target_vid_mem_f()) | - ram_in_page_dir_base_vol_true_f() | - ram_in_page_dir_base_lo_f(pdb_addr_lo)); + nvgpu_aperture_mask(g, vm->pdb.mem, + ram_in_page_dir_base_target_sys_mem_ncoh_f(), + ram_in_page_dir_base_target_sys_mem_coh_f(), + ram_in_page_dir_base_target_vid_mem_f()) | + ram_in_page_dir_base_vol_true_f() | + ram_in_page_dir_base_lo_f(pdb_addr_lo)); nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), ram_in_page_dir_base_hi_f(pdb_addr_hi)); diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c index 05d0473e..711aeb0d 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c @@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, u32 lo = (u32)(addr & 0xfffff); u32 win = nvgpu_aperture_mask(g, mem, bus_bar0_window_target_sys_mem_noncoherent_f(), + bus_bar0_window_target_sys_mem_coherent_f(), bus_bar0_window_target_vid_mem_f()) | bus_bar0_window_base_f(hi); diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c index 34c8d4b7..cdd70d5b 100644 --- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "bus_gm20b.h" #include "gk20a/gk20a.h" @@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) gk20a_writel(g, bus_bar1_block_r(), nvgpu_aperture_mask(g, bar1_inst, - bus_bar1_block_target_sys_mem_ncoh_f(), - bus_bar1_block_target_vid_mem_f()) | + bus_bar1_block_target_sys_mem_ncoh_f(), + bus_bar1_block_target_sys_mem_coh_f(), + bus_bar1_block_target_vid_mem_f()) | bus_bar1_block_mode_virtual_f() | bus_bar1_block_ptr_f(ptr_v)); nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 0762e8bd..15612995 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c) gk20a_writel(g, ccsr_channel_inst_r(c->chid), - ccsr_channel_inst_ptr_f(inst_ptr) | - nvgpu_aperture_mask(g, &c->inst_block, - ccsr_channel_inst_target_sys_mem_ncoh_f(), - ccsr_channel_inst_target_vid_mem_f()) | - ccsr_channel_inst_bind_true_f()); + ccsr_channel_inst_ptr_f(inst_ptr) | + nvgpu_aperture_mask(g, &c->inst_block, + ccsr_channel_inst_target_sys_mem_ncoh_f(), + ccsr_channel_inst_target_sys_mem_coh_f(), + ccsr_channel_inst_target_vid_mem_f()) | + ccsr_channel_inst_bind_true_f()); gk20a_writel(g, ccsr_channel_r(c->chid), (gk20a_readl(g, ccsr_channel_r(c->chid)) & diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index 29aceb7c..8e4e5900 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c @@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu, nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | nvgpu_aperture_mask(g, &mm->pmu.inst_block, + pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_coh_f(), pwr_pmu_new_instblk_target_fb_f())); @@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g) nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | nvgpu_aperture_mask(g, &mm->pmu.inst_block, + pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_coh_f(), pwr_pmu_new_instblk_target_fb_f())); diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c index c82fb1cc..1436a260 100644 --- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "fifo_gp10b.h" @@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c) nvgpu_mem_wr32(g, &c->inst_block, ram_in_ramfc_w() + ram_fc_userd_w(), nvgpu_aperture_mask(g, &g->fifo.userd, - pbdma_userd_target_sys_mem_ncoh_f(), - pbdma_userd_target_vid_mem_f()) | + pbdma_userd_target_sys_mem_ncoh_f(), + pbdma_userd_target_sys_mem_coh_f(), + pbdma_userd_target_vid_mem_f()) | pbdma_userd_addr_f(addr_lo)); nvgpu_mem_wr32(g, &c->inst_block, diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 0439dda9..7ff5f6a6 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g) gk20a_writel(g, bus_bar2_block_r(), nvgpu_aperture_mask(g, inst_block, - bus_bar2_block_target_sys_mem_ncoh_f(), - bus_bar2_block_target_vid_mem_f()) | + bus_bar2_block_target_sys_mem_ncoh_f(), + bus_bar2_block_target_sys_mem_coh_f(), + bus_bar2_block_target_vid_mem_f()) | bus_bar2_block_mode_virtual_f() | bus_bar2_block_ptr_f(inst_pa)); @@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm, phys_addr >>= gmmu_new_pde_address_shift_v(); pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, - gmmu_new_pde_aperture_sys_mem_ncoh_f(), - gmmu_new_pde_aperture_video_memory_f()); + gmmu_new_pde_aperture_sys_mem_ncoh_f(), + gmmu_new_pde_aperture_sys_mem_coh_f(), + gmmu_new_pde_aperture_video_memory_f()); pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); pde_v[0] |= gmmu_new_pde_vol_true_f(); pde_v[1] |= phys_addr >> 24; @@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, gmmu_new_dual_pde_address_small_sys_f(small_addr); pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), + gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(), gmmu_new_dual_pde_aperture_small_video_memory_f()); pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); pde_v[3] |= small_addr >> 24; @@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), + gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(), gmmu_new_dual_pde_aperture_big_video_memory_f()); pde_v[1] |= big_addr >> 28; } @@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm, gmmu_new_pte_address_sys_f(phys_shifted) : gmmu_new_pte_address_vid_f(phys_shifted); u32 pte_tgt = __nvgpu_aperture_mask(g, - attrs->aperture, - attrs->coherent ? - gmmu_new_pte_aperture_sys_mem_coh_f() : - gmmu_new_pte_aperture_sys_mem_ncoh_f(), - gmmu_new_pte_aperture_video_memory_f()); + attrs->aperture, + gmmu_new_pte_aperture_sys_mem_ncoh_f(), + gmmu_new_pte_aperture_sys_mem_coh_f(), + gmmu_new_pte_aperture_video_memory_f()); pte_w[0] = pte_valid | pte_addr | pte_tgt; @@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, page_size >> 10, nvgpu_gmmu_perm_str(attrs->rw_flag), attrs->kind_v, - nvgpu_aperture_str(attrs->aperture), + nvgpu_aperture_str(g, attrs->aperture), attrs->cacheable ? 'C' : '-', attrs->sparse ? 'S' : '-', attrs->priv ? 'P' : '-', @@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), nvgpu_aperture_mask(g, vm->pdb.mem, - ram_in_page_dir_base_target_sys_mem_ncoh_f(), - ram_in_page_dir_base_target_vid_mem_f()) | + ram_in_page_dir_base_target_sys_mem_ncoh_f(), + ram_in_page_dir_base_target_sys_mem_coh_f(), + ram_in_page_dir_base_target_vid_mem_f()) | ram_in_page_dir_base_vol_true_f() | ram_in_big_page_size_64kb_f() | ram_in_page_dir_base_lo_f(pdb_addr_lo) | diff --git a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c index 799b2db4..4fa3f324 100644 --- a/drivers/gpu/nvgpu/gv11b/acr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/acr_gv11b.c @@ -27,9 +27,10 @@ #include #include #include -#include #include #include +#include +#include #include "gk20a/gk20a.h" #include "acr_gv11b.h" @@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu, pwr_pmu_new_instblk_ptr_f( nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | - pwr_pmu_new_instblk_target_sys_ncoh_f()); + (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? + pwr_pmu_new_instblk_target_sys_coh_f() : + pwr_pmu_new_instblk_target_sys_ncoh_f())) ; /*copy bootloader interface structure to dmem*/ nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, diff --git a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c index 617ea61d..86977bb3 100644 --- a/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/css_gr_gv11b.c @@ -31,14 +31,14 @@ #include #include #include +#include +#include +#include #include "gk20a/gk20a.h" #include "gk20a/css_gr_gk20a.h" #include "css_gr_gv11b.h" -#include -#include - #include #include @@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch, perf_pmasys_mem_block_valid_true_f() | nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, perf_pmasys_mem_block_target_sys_ncoh_f(), + perf_pmasys_mem_block_target_sys_coh_f(), perf_pmasys_mem_block_target_lfb_f())); diff --git a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c index e5d88e8c..562476ca 100644 --- a/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/dbg_gpu_gv11b.c @@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; gk20a_writel(g, perf_pmasys_mem_block_r(), - perf_pmasys_mem_block_base_f(inst_pa_page) | - perf_pmasys_mem_block_valid_true_f() | - nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, -+ perf_pmasys_mem_block_target_sys_ncoh_f(), -+ perf_pmasys_mem_block_target_lfb_f())); + perf_pmasys_mem_block_base_f(inst_pa_page) | + perf_pmasys_mem_block_valid_true_f() | + nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, + perf_pmasys_mem_block_target_sys_ncoh_f(), + perf_pmasys_mem_block_target_sys_coh_f(), + perf_pmasys_mem_block_target_lfb_f())); gk20a_idle(g); return 0; diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 97ab7aab..6b4b07a6 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist) c->runqueue_sel) | ram_rl_entry_chan_userd_target_f( nvgpu_aperture_mask(g, &g->fifo.userd, - ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), - ram_rl_entry_chan_userd_target_vid_mem_v())) | + ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), + ram_rl_entry_chan_userd_target_sys_mem_coh_v(), + ram_rl_entry_chan_userd_target_vid_mem_v())) | ram_rl_entry_chan_inst_target_f( nvgpu_aperture_mask(g, &c->inst_block, - ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), - ram_rl_entry_chan_inst_target_vid_mem_v())); + ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), + ram_rl_entry_chan_inst_target_sys_mem_coh_v(), + ram_rl_entry_chan_inst_target_vid_mem_v())); addr_lo = u64_lo32(c->userd_iova) >> ram_rl_entry_chan_userd_ptr_align_shift_v(); diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c index ade1d9fe..b46ecb0a 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" @@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g) gk20a_writel(g, bus_bar2_block_r(), nvgpu_aperture_mask(g, inst_block, - bus_bar2_block_target_sys_mem_ncoh_f(), - bus_bar2_block_target_vid_mem_f()) | + bus_bar2_block_target_sys_mem_ncoh_f(), + bus_bar2_block_target_sys_mem_coh_f(), + bus_bar2_block_target_vid_mem_f()) | bus_bar2_block_mode_virtual_f() | bus_bar2_block_ptr_f(inst_pa)); diff --git a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c index 7dd4f8f4..13e70eca 100644 --- a/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/pmu_gv11b.c @@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu) gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) - | pwr_pmu_new_instblk_valid_f(1) - | pwr_pmu_new_instblk_target_sys_ncoh_f()); + nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) | + pwr_pmu_new_instblk_valid_f(1) | + (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ? + pwr_pmu_new_instblk_target_sys_coh_f() : + pwr_pmu_new_instblk_target_sys_ncoh_f())); /* TBD: load all other surfaces */ g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 05d7dee0..bda4c8e4 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm, u32 pdb_addr_lo, pdb_addr_hi; u64 pdb_addr; u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, - ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), - ram_in_sc_page_dir_base_target_vid_mem_v()); + ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), + ram_in_sc_page_dir_base_target_sys_mem_coh_v(), + ram_in_sc_page_dir_base_target_vid_mem_v()); pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 2b8b7015..f1ab8a6e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -25,6 +25,7 @@ #include #include +#include #ifdef __KERNEL__ #include @@ -51,6 +52,10 @@ struct nvgpu_page_alloc; enum nvgpu_aperture { APERTURE_INVALID = 0, /* unallocated or N/A */ APERTURE_SYSMEM, + + /* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */ + __APERTURE_SYSMEM_COH, + APERTURE_VIDMEM }; @@ -187,12 +192,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node) clear_list_entry)); }; -static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) +static inline const char *nvgpu_aperture_str(struct gk20a *g, + enum nvgpu_aperture aperture) { switch (aperture) { - case APERTURE_INVALID: return "INVAL"; - case APERTURE_SYSMEM: return "SYSMEM"; - case APERTURE_VIDMEM: return "VIDMEM"; + case APERTURE_INVALID: + return "INVAL"; + case APERTURE_SYSMEM: + return "SYSMEM"; + case __APERTURE_SYSMEM_COH: + return "SYSCOH"; + case APERTURE_VIDMEM: + return "VIDMEM"; }; return "UNKNOWN"; } @@ -322,9 +333,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, - u32 sysmem_mask, u32 vidmem_mask); + u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, - u32 sysmem_mask, u32 vidmem_mask); + u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask); u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); -- cgit v1.2.2