From 9b5427da37161c350d28a821652f2bb84bca360f Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Thu, 31 Mar 2016 13:33:02 -0700 Subject: gpu: nvgpu: Support GPUs with no physical mode Support GPUs which cannot choose between SMMU and physical addressing. Change-Id: If3256fa1bc795a84d039ad3aa63ebdccf5cc0afb Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/1120469 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 23 +++++++++++------------ drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 5 +++-- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 14 +++++++++----- drivers/gpu/nvgpu/gk20a/gk20a.c | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 14 +++++++------- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 18 ++++++++++++++---- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 ++ drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 2 +- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 2 +- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 16 ++++++++-------- 12 files changed, 59 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 61211239..e8d82e0e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -333,33 +333,32 @@ static int channel_gk20a_setup_userd(struct channel_gk20a *c) return 0; } -static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) +static void channel_gk20a_bind(struct channel_gk20a *c) { - struct gk20a *g = ch_gk20a->g; + struct gk20a *g = c->g; struct fifo_gk20a *f = &g->fifo; struct fifo_engine_info_gk20a *engine_info = f->engine_info + ENGINE_GR_GK20A; - - u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block) + u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v(); gk20a_dbg_info("bind channel %d inst ptr 0x%08x", - ch_gk20a->hw_chid, inst_ptr); + c->hw_chid, inst_ptr); - ch_gk20a->bound = true; + c->bound = true; - gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), - (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & + gk20a_writel(g, ccsr_channel_r(c->hw_chid), + (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & ~ccsr_channel_runlist_f(~0)) | ccsr_channel_runlist_f(engine_info->runlist_id)); - gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), + gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_target_vid_mem_f() | ccsr_channel_inst_bind_true_f()); - gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), - (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & + gk20a_writel(g, ccsr_channel_r(c->hw_chid), + (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & ~ccsr_channel_enable_set_f(~0)) | ccsr_channel_enable_set_true_f()); } @@ -402,7 +401,7 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) return err; gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", - ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block)); + ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block)); gk20a_dbg_fn("done"); return 0; diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 3fb003bf..2008289b 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c @@ -281,7 +281,7 @@ static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size) gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); /* this field is aligned to 4K */ - inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12; + inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK * should be written last */ diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 309fe75a..95957788 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1046,7 +1046,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); /* this field is aligned to 4K */ - inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12; + inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK * should be written last */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 58d8fda1..f9cddc41 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -624,12 +624,13 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "hw_chid=%d context_ptr=%x inst_block=%llx", - ch->hw_chid, context_ptr, gk20a_mem_phys(&ch->inst_block)); + ch->hw_chid, context_ptr, + gk20a_mm_inst_block_addr(g, &ch->inst_block)); if (!trace) return -ENOMEM; - pa = gk20a_mem_phys(&trace->trace_buf); + pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); if (!pa) return -ENOMEM; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 4422bca4..44329a53 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -654,17 +654,21 @@ int gk20a_init_fifo_support(struct gk20a *g) static struct channel_gk20a * channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) { + struct gk20a *g = f->g; int ci; if (unlikely(!f->channel)) return NULL; for (ci = 0; ci < f->num_channels; ci++) { - struct channel_gk20a *ch = gk20a_channel_get(&f->channel[ci]); + struct channel_gk20a *ch; + u64 ch_inst_ptr; + + ch = gk20a_channel_get(&f->channel[ci]); /* only alive channels are searched */ if (!ch) continue; - if (ch->inst_block.cpu_va && - (inst_ptr == gk20a_mem_phys(&ch->inst_block))) + ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block); + if (ch->inst_block.cpu_va && inst_ptr == ch_inst_ptr) return ch; gk20a_channel_put(ch); @@ -1087,10 +1091,10 @@ static bool gk20a_fifo_handle_mmu_fault( ch->hw_chid); } } else if (f.inst_ptr == - gk20a_mem_phys(&g->mm.bar1.inst_block)) { + gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) { gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); } else if (f.inst_ptr == - gk20a_mem_phys(&g->mm.pmu.inst_block)) { + gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) { gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); } else gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index f3bf0517..1091b235 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1542,6 +1542,7 @@ static int gk20a_probe(struct platform_device *dev) gk20a->mm.ltc_enabled_debug = true; gk20a->mm.bypass_smmu = platform->bypass_smmu; gk20a->mm.disable_bigpage = platform->disable_bigpage; + gk20a->mm.has_physical_mode = true; gk20a->debugfs_ltc_enabled = debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, platform->debugfs, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ada67edd..27406f9e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -729,7 +729,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, struct channel_gk20a *c) { - u32 inst_base_ptr = u64_lo32(gk20a_mem_phys(&c->inst_block) + u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v()); u32 ret; @@ -1408,7 +1408,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) int ret; u32 inst_base_ptr = - u64_lo32(gk20a_mem_phys(&c->inst_block) + u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v()); @@ -1875,7 +1875,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, if (tegra_platform_is_linsim()) { u32 inst_base_ptr = - u64_lo32(gk20a_mem_phys(&c->inst_block) + u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v()); ret = gr_gk20a_submit_fecs_method_op(g, @@ -2103,7 +2103,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); - inst_ptr = gk20a_mem_phys(&ucode_info->inst_blk_desc); + inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc); gk20a_writel(g, gr_fecs_new_ctx_r(), gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | gr_fecs_new_ctx_target_m() | @@ -4712,7 +4712,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) err = gr_gk20a_fecs_set_reglist_bind_inst(g, - gk20a_mem_phys(&mm->pmu.inst_block)); + gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block)); if (err) { gk20a_err(dev_from_gk20a(g), "fail to bind pmu inst to gr"); @@ -4991,7 +4991,7 @@ int gk20a_gr_reset(struct gk20a *g) } err = gr_gk20a_fecs_set_reglist_bind_inst(g, - gk20a_mem_phys(&g->mm.pmu.inst_block)); + gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)); if (err) { gk20a_err(dev_from_gk20a(g), "fail to bind pmu inst to gr"); @@ -5372,7 +5372,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( if (!gk20a_channel_get(ch)) continue; - if ((u32)(gk20a_mem_phys(&ch->inst_block) >> + if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >> ram_in_base_shift_v()) == gr_fecs_current_ctx_ptr_v(curr_ctx)) { tsgid = ch->tsgid; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index ac4625e0..519faeeb 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -482,7 +482,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; struct mem_desc *inst_block = &mm->bar1.inst_block; - phys_addr_t inst_pa = gk20a_mem_phys(inst_block); + u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block); int err; gk20a_dbg_fn(""); @@ -2249,7 +2249,7 @@ void gk20a_free_sgtable(struct sg_table **sgt) u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) { - if (!device_is_iommuable(dev_from_gk20a(g))) + if (!device_is_iommuable(dev_from_gk20a(g)) || !g->mm.has_physical_mode) return iova; else return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g); @@ -3382,6 +3382,17 @@ void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block) gk20a_gmmu_free(g, inst_block); } +u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct mem_desc *inst_block) +{ + u64 addr; + if (g->mm.has_physical_mode) + addr = gk20a_mem_phys(inst_block); + else + addr = gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(inst_block->sgt->sgl)); + + return addr; +} + static int gk20a_init_bar1_vm(struct mm_gk20a *mm) { int err; @@ -3484,11 +3495,10 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, { struct gk20a *g = gk20a_from_vm(vm); u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.sgt->sgl, 0); - phys_addr_t inst_pa = gk20a_mem_phys(inst_block); void *inst_ptr = inst_block->cpu_va; gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", - (u64)inst_pa, inst_ptr); + gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 5390536e..4482a21d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -346,6 +346,7 @@ struct mm_gk20a { #endif u32 bypass_smmu; u32 disable_bigpage; + bool has_physical_mode; }; int gk20a_mm_init(struct mm_gk20a *mm); @@ -420,6 +421,7 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block); void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block); void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, u32 big_page_size); +u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct mem_desc *mem); void gk20a_mm_dump_vm(struct vm_gk20a *vm, u64 va_begin, u64 va_end, char *label); diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 3b154f30..957537a9 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -1827,7 +1827,7 @@ int pmu_bootstrap(struct pmu_gk20a *pmu) pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - gk20a_mem_phys(&mm->pmu.inst_block) >> 12) | + gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 79f90f0b..0e6e715d 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -1169,7 +1169,7 @@ static int bl_bootstrap(struct pmu_gk20a *pmu, pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - gk20a_mem_phys(&mm->pmu.inst_block) >> 12) | + gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index b9763224..188d1781 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -20,25 +20,25 @@ #include "hw_ram_gm20b.h" #include "hw_fifo_gm20b.h" -static void channel_gm20b_bind(struct channel_gk20a *ch_gk20a) +static void channel_gm20b_bind(struct channel_gk20a *c) { - struct gk20a *g = ch_gk20a->g; + struct gk20a *g = c->g; - u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block) + u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) >> ram_in_base_shift_v(); gk20a_dbg_info("bind channel %d inst ptr 0x%08x", - ch_gk20a->hw_chid, inst_ptr); + c->hw_chid, inst_ptr); - ch_gk20a->bound = true; + c->bound = true; - gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid), + gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid), ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_target_vid_mem_f() | ccsr_channel_inst_bind_true_f()); - gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid), - (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) & + gk20a_writel(g, ccsr_channel_r(c->hw_chid), + (gk20a_readl(g, ccsr_channel_r(c->hw_chid)) & ~ccsr_channel_enable_set_f(~0)) | ccsr_channel_enable_set_true_f()); } -- cgit v1.2.2