From 7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Thu, 26 Feb 2015 14:37:43 -0800 Subject: gpu: nvgpu: Implement common allocator and mem_desc Introduce mem_desc, which holds all information needed for a buffer. Implement helper functions for allocation and freeing that use this data type. Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/712699 --- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 75 ++++----------- drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 10 +- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 12 +-- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 125 +++++------------------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 5 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 166 ++++++++------------------------ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 11 +-- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 135 +++++++++++++++----------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 79 ++++++--------- drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 164 +++++-------------------------- drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 8 +- 14 files changed, 238 insertions(+), 558 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a') diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ea01914c..fb368fda 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -46,15 +46,11 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) { - struct device *dev = &cde_ctx->pdev->dev; int i; for (i = 0; i < cde_ctx->num_bufs; i++) { - struct gk20a_cde_mem_desc *mem = cde_ctx->mem + i; - gk20a_gmmu_unmap(cde_ctx->vm, mem->gpu_va, mem->num_bytes, 1); - gk20a_free_sgtable(&mem->sgt); - dma_free_writecombine(dev, mem->num_bytes, mem->cpuva, - mem->iova); + struct mem_desc *mem = cde_ctx->mem + i; + gk20a_gmmu_unmap_free(cde_ctx->vm, mem); } kfree(cde_ctx->init_convert_cmd); @@ -225,8 +221,7 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, const struct firmware *img, struct gk20a_cde_hdr_buf *buf) { - struct device *dev = &cde_ctx->pdev->dev; - struct gk20a_cde_mem_desc *mem; + struct mem_desc *mem; int err; /* check that the file can hold the buf */ @@ -246,49 +241,21 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, /* allocate buf */ mem = cde_ctx->mem + cde_ctx->num_bufs; - mem->num_bytes = buf->num_bytes; - mem->cpuva = dma_alloc_writecombine(dev, mem->num_bytes, &mem->iova, - GFP_KERNEL); - if (!mem->cpuva) { + err = gk20a_gmmu_alloc_map(cde_ctx->vm, buf->num_bytes, mem); + if (err) { gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d", cde_ctx->num_bufs); return -ENOMEM; } - err = gk20a_get_sgtable(dev, &mem->sgt, mem->cpuva, mem->iova, - mem->num_bytes); - if (err) { - gk20a_warn(&cde_ctx->pdev->dev, "cde: could not get sg table. buffer idx = %d", - cde_ctx->num_bufs); - err = -ENOMEM; - goto err_get_sgtable; - } - - mem->gpu_va = gk20a_gmmu_map(cde_ctx->vm, &mem->sgt, mem->num_bytes, - 0, - gk20a_mem_flag_none); - if (!mem->gpu_va) { - gk20a_warn(&cde_ctx->pdev->dev, "cde: could not map buffer to gpuva. buffer idx = %d", - cde_ctx->num_bufs); - err = -ENOMEM; - goto err_map_buffer; - } - /* copy the content */ if (buf->data_byte_offset != 0) - memcpy(mem->cpuva, img->data + buf->data_byte_offset, + memcpy(mem->cpu_va, img->data + buf->data_byte_offset, buf->num_bytes); cde_ctx->num_bufs++; return 0; - -err_map_buffer: - gk20a_free_sgtable(&mem->sgt); - kfree(mem->sgt); -err_get_sgtable: - dma_free_writecombine(dev, mem->num_bytes, &mem->cpuva, mem->iova); - return err; } static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, @@ -340,8 +307,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, const struct firmware *img, struct gk20a_cde_hdr_replace *replace) { - struct gk20a_cde_mem_desc *source_mem; - struct gk20a_cde_mem_desc *target_mem; + struct mem_desc *source_mem; + struct mem_desc *target_mem; u32 *target_mem_ptr; u64 vaddr; int err; @@ -356,15 +323,15 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, source_mem = cde_ctx->mem + replace->source_buf; target_mem = cde_ctx->mem + replace->target_buf; - target_mem_ptr = target_mem->cpuva; + target_mem_ptr = target_mem->cpu_va; - if (source_mem->num_bytes < (replace->source_byte_offset + 3) || - target_mem->num_bytes < (replace->target_byte_offset + 3)) { + if (source_mem->size < (replace->source_byte_offset + 3) || + target_mem->size < (replace->target_byte_offset + 3)) { gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", replace->target_byte_offset, replace->source_byte_offset, - source_mem->num_bytes, - target_mem->num_bytes); + source_mem->size, + target_mem->size); return -EINVAL; } @@ -390,7 +357,7 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) { struct gk20a *g = cde_ctx->g; - struct gk20a_cde_mem_desc *target_mem; + struct mem_desc *target_mem; u32 *target_mem_ptr; u64 new_data; int user_id = 0, i, err; @@ -398,7 +365,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) for (i = 0; i < cde_ctx->num_params; i++) { struct gk20a_cde_hdr_param *param = cde_ctx->params + i; target_mem = cde_ctx->mem + param->target_buf; - target_mem_ptr = target_mem->cpuva; + target_mem_ptr = target_mem->cpu_va; target_mem_ptr += (param->target_byte_offset / sizeof(u32)); switch (param->id) { @@ -472,7 +439,7 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, const struct firmware *img, struct gk20a_cde_hdr_param *param) { - struct gk20a_cde_mem_desc *target_mem; + struct mem_desc *target_mem; if (param->target_buf >= cde_ctx->num_bufs) { gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", @@ -482,10 +449,10 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, } target_mem = cde_ctx->mem + param->target_buf; - if (target_mem->num_bytes < (param->target_byte_offset + 3)) { + if (target_mem->size< (param->target_byte_offset + 3)) { gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", cde_ctx->num_params, param->target_byte_offset, - target_mem->num_bytes); + target_mem->size); return -EINVAL; } @@ -563,7 +530,7 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, gpfifo_elem = *gpfifo; for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { - struct gk20a_cde_mem_desc *target_mem; + struct mem_desc *target_mem; /* validate the current entry */ if (cmd_elem->target_buf >= cde_ctx->num_bufs) { @@ -573,10 +540,10 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, } target_mem = cde_ctx->mem + cmd_elem->target_buf; - if (target_mem->num_bytes < + if (target_mem->size< cmd_elem->target_byte_offset + cmd_elem->num_bytes) { gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", - target_mem->num_bytes, + target_mem->size, cmd_elem->target_byte_offset, cmd_elem->num_bytes); return -EINVAL; diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h index 58480d26..a5c75ae8 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h @@ -205,14 +205,6 @@ enum { TYPE_ARRAY }; -struct gk20a_cde_mem_desc { - struct sg_table *sgt; - dma_addr_t iova; - void *cpuva; - size_t num_bytes; - u64 gpu_va; -}; - struct gk20a_cde_param { u32 id; u32 padding; @@ -228,7 +220,7 @@ struct gk20a_cde_ctx { struct vm_gk20a *vm; /* buf converter configuration */ - struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS]; + struct mem_desc mem[MAX_CDE_BUFS]; int num_bufs; /* buffer patching params (where should patching be done) */ diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 62092930..9a0800d1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -89,7 +89,7 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) { gk20a_dbg_fn(""); - if (!c->inst_block.cpuva) + if (!c->inst_block.cpu_va) return -ENOMEM; gk20a_init_inst_block(&c->inst_block, c->vm, @@ -106,7 +106,7 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c) gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpuva; + inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; @@ -134,7 +134,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, int shift = 3; int value = timeslice_timeout; - inst_ptr = c->inst_block.cpuva; + inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; @@ -177,7 +177,7 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpuva; + inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; @@ -263,7 +263,7 @@ static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) struct fifo_engine_info_gk20a *engine_info = f->engine_info + ENGINE_GR_GK20A; - u32 inst_ptr = ch_gk20a->inst_block.cpu_pa + u32 inst_ptr = sg_phys(ch_gk20a->inst_block.sgt->sgl) >> ram_in_base_shift_v(); gk20a_dbg_info("bind channel %d inst ptr 0x%08x", @@ -322,7 +322,7 @@ int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) return err; gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", - ch->hw_chid, (u64)ch->inst_block.cpu_pa); + ch->hw_chid, (u64)sg_phys(ch->inst_block.sgt->sgl)); gk20a_dbg_fn("done"); return 0; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 034de53f..ddb91f9b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -99,7 +99,7 @@ struct channel_gk20a { struct channel_ctx_gk20a ch_ctx; - struct inst_desc inst_block; + struct mem_desc inst_block; struct mem_desc_sub ramfc; void *userd_cpu_va; diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 7cda9949..217f0056 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -851,7 +851,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, gk20a_writel(g, perf_pmasys_outsize_r(), virt_size); /* this field is aligned to 4K */ - inst_pa_page = g->mm.hwpm.inst_block.cpu_pa >> 12; + inst_pa_page = gk20a_mem_phys(&g->mm.hwpm.inst_block) >> 12; /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK * should be written last */ diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 9dfab370..ace05c07 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -104,7 +104,7 @@ static void gk20a_debug_show_channel(struct gk20a *g, u32 syncpointa, syncpointb; void *inst_ptr; - inst_ptr = ch->inst_block.cpuva; + inst_ptr = ch->inst_block.cpu_va; if (!inst_ptr) return; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index cf1242ab..dee58d0a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -159,7 +159,6 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g) static void gk20a_remove_fifo_support(struct fifo_gk20a *f) { struct gk20a *g = f->g; - struct device *d = dev_from_gk20a(g); struct fifo_engine_info_gk20a *engine_info; struct fifo_runlist_info_gk20a *runlist; u32 runlist_id; @@ -175,36 +174,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) } kfree(f->channel); } - if (f->userd.gpu_va) - gk20a_gmmu_unmap(&g->mm.bar1.vm, - f->userd.gpu_va, - f->userd.size, - gk20a_mem_flag_none); - - if (f->userd.sgt) - gk20a_free_sgtable(&f->userd.sgt); - - if (f->userd.cpuva) - dma_free_coherent(d, - f->userd_total_size, - f->userd.cpuva, - f->userd.iova); - f->userd.cpuva = NULL; - f->userd.iova = 0; + gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); engine_info = f->engine_info + ENGINE_GR_GK20A; runlist_id = engine_info->runlist_id; runlist = &f->runlist_info[runlist_id]; - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { - if (runlist->mem[i].cpuva) - dma_free_coherent(d, - runlist->mem[i].size, - runlist->mem[i].cpuva, - runlist->mem[i].iova); - runlist->mem[i].cpuva = NULL; - runlist->mem[i].iova = 0; - } + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) + gk20a_gmmu_free(g, &runlist->mem[i]); kfree(runlist->active_channels); kfree(runlist->active_tsgs); @@ -327,19 +304,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) runlist_size = ram_rl_entry_size_v() * f->num_channels; for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { - dma_addr_t iova; - - runlist->mem[i].cpuva = - dma_alloc_coherent(d, - runlist_size, - &iova, - GFP_KERNEL); - if (!runlist->mem[i].cpuva) { + int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); + if (err) { dev_err(d, "memory allocation failed\n"); goto clean_up_runlist; } - runlist->mem[i].iova = iova; - runlist->mem[i].size = runlist_size; } mutex_init(&runlist->mutex); @@ -351,15 +320,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) return 0; clean_up_runlist: - for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { - if (runlist->mem[i].cpuva) - dma_free_coherent(d, - runlist->mem[i].size, - runlist->mem[i].cpuva, - runlist->mem[i].iova); - runlist->mem[i].cpuva = NULL; - runlist->mem[i].iova = 0; - } + for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) + gk20a_gmmu_free(g, &runlist->mem[i]); kfree(runlist->active_channels); runlist->active_channels = NULL; @@ -502,7 +464,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) struct fifo_gk20a *f = &g->fifo; struct device *d = dev_from_gk20a(g); int chid, i, err = 0; - dma_addr_t iova; gk20a_dbg_fn(""); @@ -521,43 +482,17 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) f->max_engines = ENGINE_INVAL_GK20A; f->userd_entry_size = 1 << ram_userd_base_shift_v(); - f->userd_total_size = f->userd_entry_size * f->num_channels; - f->userd.cpuva = dma_alloc_coherent(d, - f->userd_total_size, - &iova, - GFP_KERNEL); - if (!f->userd.cpuva) { - dev_err(d, "memory allocation failed\n"); - err = -ENOMEM; - goto clean_up; - } - - f->userd.iova = iova; - err = gk20a_get_sgtable(d, &f->userd.sgt, - f->userd.cpuva, f->userd.iova, - f->userd_total_size); + err = gk20a_gmmu_alloc_map(&g->mm.bar1.vm, + f->userd_entry_size * f->num_channels, + &f->userd); if (err) { - dev_err(d, "failed to create sg table\n"); - goto clean_up; - } - - /* bar1 va */ - f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm, - &f->userd.sgt, - f->userd_total_size, - 0, /* flags */ - gk20a_mem_flag_none); - if (!f->userd.gpu_va) { - dev_err(d, "gmmu mapping failed\n"); - err = -ENOMEM; + dev_err(d, "memory allocation failed\n"); goto clean_up; } gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); - f->userd.size = f->userd_total_size; - f->channel = kzalloc(f->num_channels * sizeof(*f->channel), GFP_KERNEL); f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), @@ -582,9 +517,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) for (chid = 0; chid < f->num_channels; chid++) { f->channel[chid].userd_cpu_va = - f->userd.cpuva + chid * f->userd_entry_size; + f->userd.cpu_va + chid * f->userd_entry_size; f->channel[chid].userd_iova = - gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) + gk20a_mm_iova_addr(g, f->userd.sgt->sgl) + chid * f->userd_entry_size; f->channel[chid].userd_gpu_va = f->userd.gpu_va + chid * f->userd_entry_size; @@ -607,22 +542,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) clean_up: gk20a_dbg_fn("fail"); - if (f->userd.gpu_va) - gk20a_gmmu_unmap(&g->mm.bar1.vm, - f->userd.gpu_va, - f->userd.size, - gk20a_mem_flag_none); - if (f->userd.sgt) - gk20a_free_sgtable(&f->userd.sgt); - if (f->userd.cpuva) - dma_free_coherent(d, - f->userd_total_size, - f->userd.cpuva, - f->userd.iova); - f->userd.cpuva = NULL; - f->userd.iova = 0; - - memset(&f->userd, 0, sizeof(struct userd_desc)); + gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); kfree(f->channel); f->channel = NULL; @@ -657,7 +577,7 @@ static int gk20a_init_fifo_setup_hw(struct gk20a *g) u32 v, v1 = 0x33, v2 = 0x55; u32 bar1_vaddr = f->userd.gpu_va; - volatile u32 *cpu_vaddr = f->userd.cpuva; + volatile u32 *cpu_vaddr = f->userd.cpu_va; gk20a_dbg_info("test bar1 @ vaddr 0x%x", bar1_vaddr); @@ -725,8 +645,8 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) return NULL; for (ci = 0; ci < f->num_channels; ci++) { struct channel_gk20a *c = f->channel+ci; - if (c->inst_block.cpuva && - (inst_ptr == c->inst_block.cpu_pa)) + if (c->inst_block.cpu_va && + (inst_ptr == sg_phys(c->inst_block.sgt->sgl))) return f->channel+ci; } return NULL; @@ -1082,10 +1002,10 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) gk20a_fifo_set_ctx_mmu_error_ch(g, ch); gk20a_channel_abort(ch); } else if (f.inst_ptr == - g->mm.bar1.inst_block.cpu_pa) { + sg_phys(g->mm.bar1.inst_block.sgt->sgl)) { gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); } else if (f.inst_ptr == - g->mm.pmu.inst_block.cpu_pa) { + sg_phys(g->mm.pmu.inst_block.sgt->sgl)) { gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); } else gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); @@ -1893,7 +1813,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, bool wait_for_finish) { u32 ret = 0; - struct device *d = dev_from_gk20a(g); struct fifo_gk20a *f = &g->fifo; struct fifo_runlist_info_gk20a *runlist = NULL; u32 *runlist_entry_base = NULL; @@ -1935,15 +1854,15 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, new_buf = !runlist->cur_buffer; gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", - runlist_id, runlist->mem[new_buf].iova); + runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf])); - runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); + runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]); if (!runlist_pa) { ret = -EINVAL; goto clean_up; } - runlist_entry_base = runlist->mem[new_buf].cpuva; + runlist_entry_base = runlist->mem[new_buf].cpu_va; if (!runlist_entry_base) { ret = -ENOMEM; goto clean_up; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 4ff1398a..dd320ae1 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -32,7 +32,7 @@ struct fifo_runlist_info_gk20a { unsigned long *active_channels; unsigned long *active_tsgs; /* Each engine has its own SW and HW runlist buffer.*/ - struct runlist_mem_desc mem[MAX_RUNLIST_BUFFERS]; + struct mem_desc mem[MAX_RUNLIST_BUFFERS]; u32 cur_buffer; u32 total_entries; bool stopped; @@ -102,9 +102,8 @@ struct fifo_gk20a { struct fifo_runlist_info_gk20a *runlist_info; u32 max_runlists; - struct userd_desc userd; + struct mem_desc userd; u32 userd_entry_size; - u32 userd_total_size; struct channel_gk20a *channel; struct mutex ch_inuse_mutex; /* protect unused chid look up */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e9b209c4..a160942f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -567,7 +567,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpuva; + inst_ptr = c->inst_block.cpu_va; if (!inst_ptr) return -ENOMEM; @@ -674,7 +674,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, struct channel_gk20a *c) { - u32 inst_base_ptr = u64_lo32(c->inst_block.cpu_pa + u32 inst_base_ptr = u64_lo32(gk20a_mem_phys(&c->inst_block) >> ram_in_base_shift_v()); u32 ret; @@ -1375,7 +1375,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) int ret; u32 inst_base_ptr = - u64_lo32(c->inst_block.cpu_pa + u64_lo32(gk20a_mem_phys(&c->inst_block) >> ram_in_base_shift_v()); @@ -1671,7 +1671,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, if (tegra_platform_is_linsim()) { u32 inst_base_ptr = - u64_lo32(c->inst_block.cpu_pa + u64_lo32(gk20a_mem_phys(&c->inst_block) >> ram_in_base_shift_v()); ret = gr_gk20a_submit_fecs_method_op(g, @@ -1729,12 +1729,12 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) gk20a_init_inst_block(&ucode_info->inst_blk_desc, vm, 0); /* Map ucode surface to GMMU */ - ucode_info->ucode_gpuva = gk20a_gmmu_map(vm, + ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm, &ucode_info->surface_desc.sgt, ucode_info->surface_desc.size, 0, /* flags */ gk20a_mem_flag_read_only); - if (!ucode_info->ucode_gpuva) { + if (!ucode_info->surface_desc.gpu_va) { gk20a_err(d, "failed to update gmmu ptes\n"); return -ENOMEM; } @@ -1798,8 +1798,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) u8 *buf; u32 ucode_size; int err = 0; - dma_addr_t iova; - DEFINE_DMA_ATTRS(attrs); fecs_fw = gk20a_request_firmware(g, GK20A_FECS_UCODE_IMAGE); if (!fecs_fw) { @@ -1832,30 +1830,12 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); - ucode_info->surface_desc.size = ucode_size; - dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); - ucode_info->surface_desc.cpuva = dma_alloc_attrs(d, - ucode_info->surface_desc.size, - &iova, - GFP_KERNEL, - &attrs); - if (!ucode_info->surface_desc.cpuva) { - gk20a_err(d, "memory allocation failed\n"); - err = -ENOMEM; - goto clean_up; - } - - ucode_info->surface_desc.iova = iova; - err = gk20a_get_sgtable(d, &ucode_info->surface_desc.sgt, - ucode_info->surface_desc.cpuva, - ucode_info->surface_desc.iova, - ucode_info->surface_desc.size); - if (err) { - gk20a_err(d, "failed to create sg table\n"); + err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size, + &ucode_info->surface_desc); + if (err) goto clean_up; - } - buf = (u8 *)ucode_info->surface_desc.cpuva; + buf = (u8 *)ucode_info->surface_desc.cpu_va; if (!buf) { gk20a_err(d, "failed to map surface desc buffer"); err = -ENOMEM; @@ -1882,23 +1862,13 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) if (err) goto clean_up; - gk20a_free_sgtable(&ucode_info->surface_desc.sgt); - return 0; clean_up: - if (ucode_info->ucode_gpuva) - gk20a_gmmu_unmap(vm, ucode_info->ucode_gpuva, + if (ucode_info->surface_desc.gpu_va) + gk20a_gmmu_unmap(vm, ucode_info->surface_desc.gpu_va, ucode_info->surface_desc.size, gk20a_mem_flag_none); - if (ucode_info->surface_desc.sgt) - gk20a_free_sgtable(&ucode_info->surface_desc.sgt); - if (ucode_info->surface_desc.cpuva) - dma_free_attrs(d, ucode_info->surface_desc.size, - ucode_info->surface_desc.cpuva, - ucode_info->surface_desc.iova, - &attrs); - ucode_info->surface_desc.cpuva = NULL; - ucode_info->surface_desc.iova = 0; + gk20a_gmmu_free(g, &ucode_info->surface_desc); release_firmware(gpccs_fw); gpccs_fw = NULL; @@ -1928,7 +1898,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); - inst_ptr = ucode_info->inst_blk_desc.cpu_pa; + inst_ptr = gk20a_mem_phys(&ucode_info->inst_blk_desc); gk20a_writel(g, gr_fecs_new_ctx_r(), gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | gr_fecs_new_ctx_target_m() | @@ -2111,7 +2081,7 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) { struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; - u64 addr_base = ucode_info->ucode_gpuva; + u64 addr_base = ucode_info->surface_desc.gpu_va; gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); @@ -2128,6 +2098,7 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) { + int err; gk20a_dbg_fn(""); @@ -2147,8 +2118,12 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) gr_gk20a_load_falcon_imem(g); gr_gk20a_start_falcon_ucode(g); } else { - if (!g->gr.skip_ucode_init) - gr_gk20a_init_ctxsw_ucode(g); + if (!g->gr.skip_ucode_init) { + err = gr_gk20a_init_ctxsw_ucode(g); + + if (err) + return err; + } gr_gk20a_load_falcon_with_bootloader(g); g->gr.skip_ucode_init = true; } @@ -2976,21 +2951,13 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) gr_gk20a_free_global_ctx_buffers(g); - dma_free_coherent(d, gr->mmu_wr_mem.size, - gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); - gr->mmu_wr_mem.cpuva = NULL; - gr->mmu_wr_mem.iova = 0; - dma_free_coherent(d, gr->mmu_rd_mem.size, - gr->mmu_rd_mem.cpuva, gr->mmu_rd_mem.iova); - gr->mmu_rd_mem.cpuva = NULL; - gr->mmu_rd_mem.iova = 0; + gk20a_gmmu_free(g, &gr->mmu_wr_mem); + gk20a_gmmu_free(g, &gr->mmu_rd_mem); dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); dma_free_attrs(d, gr->compbit_store.size, gr->compbit_store.pages, gr->compbit_store.base_iova, &attrs); - memset(&gr->mmu_wr_mem, 0, sizeof(struct mmu_desc)); - memset(&gr->mmu_rd_mem, 0, sizeof(struct mmu_desc)); memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc)); kfree(gr->gpc_tpc_count); @@ -3234,33 +3201,19 @@ clean_up: static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr) { - struct device *d = dev_from_gk20a(g); - dma_addr_t iova; - - gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000; + int err; - gr->mmu_wr_mem.size = gr->mmu_wr_mem_size; - gr->mmu_wr_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_wr_mem_size, - &iova, GFP_KERNEL); - if (!gr->mmu_wr_mem.cpuva) + err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_wr_mem); + if (err) goto err; - gr->mmu_wr_mem.iova = iova; - - gr->mmu_rd_mem.size = gr->mmu_rd_mem_size; - gr->mmu_rd_mem.cpuva = dma_zalloc_coherent(d, gr->mmu_rd_mem_size, - &iova, GFP_KERNEL); - if (!gr->mmu_rd_mem.cpuva) + err = gk20a_gmmu_alloc(g, 0x1000, &gr->mmu_rd_mem); + if (err) goto err_free_wr_mem; - - gr->mmu_rd_mem.iova = iova; return 0; err_free_wr_mem: - dma_free_coherent(d, gr->mmu_wr_mem.size, - gr->mmu_wr_mem.cpuva, gr->mmu_wr_mem.iova); - gr->mmu_wr_mem.cpuva = NULL; - gr->mmu_wr_mem.iova = 0; + gk20a_gmmu_free(g, &gr->mmu_wr_mem); err: return -ENOMEM; } @@ -4241,7 +4194,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gk20a_dbg_fn(""); /* init mmu debug buffer */ - addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova); + addr = gk20a_mm_iova_addr(g, gr->mmu_wr_mem.sgt->sgl); addr >>= fb_mmu_debug_wr_addr_alignment_v(); gk20a_writel(g, fb_mmu_debug_wr_r(), @@ -4249,7 +4202,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) fb_mmu_debug_wr_vol_false_f() | fb_mmu_debug_wr_addr_f(addr)); - addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova); + addr = gk20a_mm_iova_addr(g, gr->mmu_rd_mem.sgt->sgl); addr >>= fb_mmu_debug_rd_addr_alignment_v(); gk20a_writel(g, fb_mmu_debug_rd_r(), @@ -4651,8 +4604,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) int err = 0; u32 size; - struct sg_table *sgt_pg_buf; - dma_addr_t iova; gk20a_dbg_fn(""); @@ -4665,50 +4616,24 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) return err; } - if (!pmu->pg_buf.cpuva) { - pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, - &iova, - GFP_KERNEL); - if (!pmu->pg_buf.cpuva) { + if (!pmu->pg_buf.cpu_va) { + err = gk20a_gmmu_alloc_map(vm, size, &pmu->pg_buf); + if (err) { gk20a_err(d, "failed to allocate memory\n"); return -ENOMEM; } - - pmu->pg_buf.iova = iova; - pmu->pg_buf.size = size; - - err = gk20a_get_sgtable(d, &sgt_pg_buf, - pmu->pg_buf.cpuva, - pmu->pg_buf.iova, - size); - if (err) { - gk20a_err(d, "failed to create sg table\n"); - goto err_free_pg_buf; - } - - pmu->pg_buf.pmu_va = gk20a_gmmu_map(vm, - &sgt_pg_buf, - size, - 0, /* flags */ - gk20a_mem_flag_none); - if (!pmu->pg_buf.pmu_va) { - gk20a_err(d, "failed to map fecs pg buffer"); - err = -ENOMEM; - goto err_free_sgtable; - } - - gk20a_free_sgtable(&sgt_pg_buf); } - err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); + err = gr_gk20a_fecs_set_reglist_bind_inst(g, + gk20a_mem_phys(&mm->pmu.inst_block)); if (err) { gk20a_err(dev_from_gk20a(g), "fail to bind pmu inst to gr"); return err; } - err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.pmu_va); + err = gr_gk20a_fecs_set_reglist_virtual_addr(g, pmu->pg_buf.gpu_va); if (err) { gk20a_err(dev_from_gk20a(g), "fail to set pg buffer pmu va"); @@ -4716,15 +4641,6 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g) } return err; - -err_free_sgtable: - gk20a_free_sgtable(&sgt_pg_buf); -err_free_pg_buf: - dma_free_coherent(d, size, - pmu->pg_buf.cpuva, pmu->pg_buf.iova); - pmu->pg_buf.cpuva = NULL; - pmu->pg_buf.iova = 0; - return err; } int gk20a_init_gr_support(struct gk20a *g) @@ -4983,14 +4899,14 @@ int gk20a_gr_reset(struct gk20a *g) } err = gr_gk20a_fecs_set_reglist_bind_inst(g, - g->mm.pmu.inst_block.cpu_pa); + gk20a_mem_phys(&g->mm.pmu.inst_block)); if (err) { gk20a_err(dev_from_gk20a(g), "fail to bind pmu inst to gr"); return err; } - err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.pmu_va); + err = gr_gk20a_fecs_set_reglist_virtual_addr(g, g->pmu.pg_buf.gpu_va); if (err) { gk20a_err(dev_from_gk20a(g), "fail to set pg buffer pmu va"); @@ -5357,7 +5273,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, /* slow path */ for (chid = 0; chid < f->num_channels; chid++) if (f->channel[chid].in_use) { - if ((u32)(f->channel[chid].inst_block.cpu_pa >> + if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> ram_in_base_shift_v()) == gr_fecs_current_ctx_ptr_v(curr_ctx)) { tsgid = f->channel[chid].tsgid; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 5dfaac5f..81615e0f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -256,10 +256,8 @@ struct gr_gk20a { struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; - struct mmu_desc mmu_wr_mem; - u32 mmu_wr_mem_size; - struct mmu_desc mmu_rd_mem; - u32 mmu_rd_mem_size; + struct mem_desc mmu_wr_mem; + struct mem_desc mmu_rd_mem; u8 *map_tiles; u32 map_tile_count; @@ -336,9 +334,8 @@ struct gk20a_ctxsw_ucode_segments { struct gk20a_ctxsw_ucode_info { u64 *p_va; - struct inst_desc inst_blk_desc; - struct surface_mem_desc surface_desc; - u64 ucode_gpuva; + struct mem_desc inst_blk_desc; + struct mem_desc surface_desc; struct gk20a_ctxsw_ucode_segments fecs; struct gk20a_ctxsw_ucode_segments gpccs; }; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index c3895a53..954249c6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -268,7 +268,7 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) return 0; } -static void gk20a_remove_vm(struct vm_gk20a *vm, struct inst_desc *inst_block) +static void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block) { struct gk20a *g = vm->mm->g; @@ -335,8 +335,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) int gk20a_init_mm_setup_hw(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; - struct inst_desc *inst_block = &mm->bar1.inst_block; - phys_addr_t inst_pa = inst_block->cpu_pa; + struct mem_desc *inst_block = &mm->bar1.inst_block; + phys_addr_t inst_pa = gk20a_mem_phys(inst_block); int err; gk20a_dbg_fn(""); @@ -1516,54 +1516,95 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, return vaddr; } -int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, - size_t size, struct mem_desc *mem) +int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct mem_desc *mem) +{ + return gk20a_gmmu_alloc_attr(g, 0, size, mem); +} + +int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, struct mem_desc *mem) { - struct gk20a *g = vm->mm->g; struct device *d = dev_from_gk20a(g); int err; - struct sg_table *sgt; + dma_addr_t iova; + + gk20a_dbg_fn(""); + + if (attr) { + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(attr, &attrs); + mem->cpu_va = + dma_alloc_attrs(d, size, &iova, GFP_KERNEL, &attrs); + } else { + mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL); + } - mem->cpu_va = dma_alloc_coherent(d, size, &mem->iova, GFP_KERNEL); if (!mem->cpu_va) return -ENOMEM; - err = gk20a_get_sgtable(d, &sgt, mem->cpu_va, mem->iova, size); + err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size); if (err) goto fail_free; - mem->gpu_va = gk20a_gmmu_map(vm, &sgt, size, 0, gk20a_mem_flag_none); - gk20a_free_sgtable(&sgt); + mem->size = size; + memset(mem->cpu_va, 0, size); + + gk20a_dbg_fn("done"); + + return 0; + +fail_free: + dma_free_coherent(d, size, mem->cpu_va, iova); + mem->cpu_va = NULL; + mem->sgt = NULL; + return err; +} + +void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) +{ + struct device *d = dev_from_gk20a(g); + + if (mem->cpu_va) + dma_free_coherent(d, mem->size, mem->cpu_va, + sg_dma_address(mem->sgt->sgl)); + mem->cpu_va = NULL; + + if (mem->sgt) + gk20a_free_sgtable(&mem->sgt); +} + +int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) +{ + return gk20a_gmmu_alloc_map_attr(vm, 0, size, mem); +} + +int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, + enum dma_attr attr, size_t size, struct mem_desc *mem) +{ + int err = gk20a_gmmu_alloc_attr(vm->mm->g, attr, size, mem); + + if (err) + return err; + + mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, gk20a_mem_flag_none); if (!mem->gpu_va) { err = -ENOMEM; goto fail_free; } - mem->size = size; - return 0; fail_free: - dma_free_coherent(d, size, mem->cpu_va, mem->iova); - mem->cpu_va = NULL; - mem->iova = 0; - + gk20a_gmmu_free(vm->mm->g, mem); return err; } void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem) { - struct gk20a *g = vm->mm->g; - struct device *d = dev_from_gk20a(g); - if (mem->gpu_va) gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none); mem->gpu_va = 0; - if (mem->cpu_va) - dma_free_coherent(d, mem->size, mem->cpu_va, mem->iova); - mem->cpu_va = NULL; - mem->iova = 0; + gk20a_gmmu_free(vm->mm->g, mem); } dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) @@ -2644,42 +2685,24 @@ void gk20a_deinit_vm(struct vm_gk20a *vm) kfree(vm->pdb.entries); } -int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block) +int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block) { struct device *dev = dev_from_gk20a(g); - dma_addr_t iova; + int err; - inst_block->size = ram_in_alloc_size_v(); - inst_block->cpuva = dma_alloc_coherent(dev, inst_block->size, - &iova, GFP_KERNEL); - if (!inst_block->cpuva) { + err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block); + if (err) { gk20a_err(dev, "%s: memory allocation failed\n", __func__); - return -ENOMEM; - } - - inst_block->iova = iova; - inst_block->cpu_pa = gk20a_get_phys_from_iova(dev, inst_block->iova); - if (!inst_block->cpu_pa) { - gk20a_err(dev, "%s: failed to get phys address\n", __func__); - gk20a_free_inst_block(g, inst_block); - return -ENOMEM; + return err; } - memset(inst_block->cpuva, 0, inst_block->size); - return 0; } -void gk20a_free_inst_block(struct gk20a *g, struct inst_desc *inst_block) +void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block) { - struct device *dev = dev_from_gk20a(g); - - if (inst_block->cpuva) { - dma_free_coherent(dev, inst_block->size, - inst_block->cpuva, inst_block->iova); - } - - memset(inst_block, 0, sizeof(*inst_block)); + if (inst_block->cpu_va) + gk20a_gmmu_free(g, inst_block); } static int gk20a_init_bar1_vm(struct mm_gk20a *mm) @@ -2687,7 +2710,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) int err; struct vm_gk20a *vm = &mm->bar1.vm; struct gk20a *g = gk20a_from_mm(mm); - struct inst_desc *inst_block = &mm->bar1.inst_block; + struct mem_desc *inst_block = &mm->bar1.inst_block; u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; @@ -2713,7 +2736,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) int err; struct vm_gk20a *vm = &mm->pmu.vm; struct gk20a *g = gk20a_from_mm(mm); - struct inst_desc *inst_block = &mm->pmu.inst_block; + struct mem_desc *inst_block = &mm->pmu.inst_block; u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; @@ -2739,7 +2762,7 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm) int err; struct vm_gk20a *vm = &mm->pmu.vm; struct gk20a *g = gk20a_from_mm(mm); - struct inst_desc *inst_block = &mm->hwpm.inst_block; + struct mem_desc *inst_block = &mm->hwpm.inst_block; err = gk20a_alloc_inst_block(g, inst_block); if (err) @@ -2763,13 +2786,13 @@ void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) ram_in_page_dir_base_hi_f(pdb_addr_hi)); } -void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, +void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, u32 big_page_size) { struct gk20a *g = gk20a_from_vm(vm); u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); - phys_addr_t inst_pa = inst_block->cpu_pa; - void *inst_ptr = inst_block->cpuva; + phys_addr_t inst_pa = gk20a_mem_phys(inst_block); + void *inst_ptr = inst_block->cpu_va; gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", (u64)inst_pa, inst_ptr); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 54028e73..ca7fef01 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -46,7 +46,7 @@ struct mem_desc { void *cpu_va; - dma_addr_t iova; + struct sg_table *sgt; size_t size; u64 gpu_va; }; @@ -70,40 +70,6 @@ struct gpfifo_desc { u64 gpu_va; }; -struct mmu_desc { - void *cpuva; - u64 iova; - size_t size; -}; - -struct inst_desc { - u64 iova; - void *cpuva; - phys_addr_t cpu_pa; - size_t size; -}; - -struct surface_mem_desc { - u64 iova; - void *cpuva; - struct sg_table *sgt; - size_t size; -}; - -struct userd_desc { - struct sg_table *sgt; - u64 iova; - void *cpuva; - size_t size; - u64 gpu_va; -}; - -struct runlist_mem_desc { - u64 iova; - void *cpuva; - size_t size; -}; - struct patch_desc { struct page **pages; u64 iova; @@ -113,13 +79,6 @@ struct patch_desc { u32 data_count; }; -struct pmu_mem_desc { - void *cpuva; - u64 iova; - u64 pmu_va; - size_t size; -}; - struct priv_cmd_queue_mem_desc { u64 base_iova; u32 *base_cpuva; @@ -336,24 +295,24 @@ struct mm_gk20a { struct { u32 aperture_size; struct vm_gk20a vm; - struct inst_desc inst_block; + struct mem_desc inst_block; } bar1; struct { u32 aperture_size; struct vm_gk20a vm; - struct inst_desc inst_block; + struct mem_desc inst_block; } bar2; struct { u32 aperture_size; struct vm_gk20a vm; - struct inst_desc inst_block; + struct mem_desc inst_block; } pmu; struct { /* using pmu vm currently */ - struct inst_desc inst_block; + struct mem_desc inst_block; } hwpm; @@ -406,9 +365,9 @@ static inline int max_vaddr_bits_gk20a(void) #define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() #endif -int gk20a_alloc_inst_block(struct gk20a *g, struct inst_desc *inst_block); -void gk20a_free_inst_block(struct gk20a *g, struct inst_desc *inst_block); -void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, +int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block); +void gk20a_free_inst_block(struct gk20a *g, struct mem_desc *inst_block); +void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, u32 big_page_size); void gk20a_mm_dump_vm(struct vm_gk20a *vm, @@ -448,9 +407,31 @@ int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, struct mem_desc *mem); +int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, + enum dma_attr attr, + size_t size, + struct mem_desc *mem); + void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem); +int gk20a_gmmu_alloc(struct gk20a *g, + size_t size, + struct mem_desc *mem); + +int gk20a_gmmu_alloc_attr(struct gk20a *g, + enum dma_attr attr, + size_t size, + struct mem_desc *mem); + +void gk20a_gmmu_free(struct gk20a *g, + struct mem_desc *mem); + +static inline phys_addr_t gk20a_mem_phys(struct mem_desc *mem) +{ + return sg_phys(mem->sgt->sgl); +} + u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u64 map_offset, struct sg_table *sgt, diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index f2430165..95bb1eb6 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -146,7 +146,7 @@ static void set_pmu_cmdline_args_falctracesize_v2( static void set_pmu_cmdline_args_falctracedmabase_v2(struct pmu_gk20a *pmu) { - pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; + pmu->args_v2.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100; } static void set_pmu_cmdline_args_falctracedmaidx_v2( @@ -177,7 +177,7 @@ static void set_pmu_cmdline_args_falctracesize_v3( static void set_pmu_cmdline_args_falctracedmabase_v3(struct pmu_gk20a *pmu) { - pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; + pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100; } static void set_pmu_cmdline_args_falctracedmaidx_v3( @@ -218,9 +218,9 @@ static bool find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos) static void printtrace(struct pmu_gk20a *pmu) { u32 i = 0, j = 0, k, l, m, count; - char *trace = pmu->trace_buf.cpuva; + char *trace = pmu->trace_buf.cpu_va; char part_str[40], buf[0x40]; - u32 *trace1 = pmu->trace_buf.cpuva; + u32 *trace1 = pmu->trace_buf.cpu_va; struct gk20a *g = gk20a_from_pmu(pmu); gk20a_err(dev_from_gk20a(g), "Dump pmutrace"); for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { @@ -249,7 +249,7 @@ static void printtrace(struct pmu_gk20a *pmu) static void set_pmu_cmdline_args_falctracedmabase_v1(struct pmu_gk20a *pmu) { - pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100; + pmu->args_v1.falc_trace_dma_base = ((u32)pmu->trace_buf.gpu_va)/0x100; } static void set_pmu_cmdline_args_falctracedmaidx_v1( @@ -1349,7 +1349,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu) pwr_falcon_itfen_ctxen_enable_f()); gk20a_writel(g, pwr_pmu_new_instblk_r(), pwr_pmu_new_instblk_ptr_f( - mm->pmu.inst_block.cpu_pa >> 12) | + sg_phys(mm->pmu.inst_block.sgt->sgl) >> 12) | pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_target_sys_coh_f()); @@ -1377,13 +1377,13 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu) pwr_falcon_dmemc_blk_f(0) | pwr_falcon_dmemc_aincw_f(1)); - addr_code = u64_lo32((pmu->ucode.pmu_va + + addr_code = u64_lo32((pmu->ucode.gpu_va + desc->app_start_offset + desc->app_resident_code_offset) >> 8) ; - addr_data = u64_lo32((pmu->ucode.pmu_va + + addr_data = u64_lo32((pmu->ucode.gpu_va + desc->app_start_offset + desc->app_resident_data_offset) >> 8); - addr_load = u64_lo32((pmu->ucode.pmu_va + + addr_load = u64_lo32((pmu->ucode.gpu_va + desc->bootloader_start_offset) >> 8); gk20a_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE); @@ -1942,13 +1942,10 @@ static int gk20a_prepare_ucode(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; int i, err = 0; - struct sg_table *sgt_pmu_ucode; - dma_addr_t iova; struct device *d = dev_from_gk20a(g); struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; void *ucode_ptr; - DEFINE_DMA_ATTRS(attrs); if (g->pmu_fw) { gk20a_init_pmu(pmu); @@ -1967,56 +1964,21 @@ static int gk20a_prepare_ucode(struct gk20a *g) pmu->ucode_image = (u32 *)((u8 *)pmu->desc + pmu->desc->descriptor_size); - dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); - pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, - &iova, - GFP_KERNEL, - &attrs); - if (!pmu->ucode.cpuva) { - gk20a_err(d, "failed to allocate memory\n"); - err = -ENOMEM; + err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY, + GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); + if (err) goto err_release_fw; - } - - pmu->ucode.iova = iova; - - err = gk20a_get_sgtable(d, &sgt_pmu_ucode, - pmu->ucode.cpuva, - pmu->ucode.iova, - GK20A_PMU_UCODE_SIZE_MAX); - if (err) { - gk20a_err(d, "failed to allocate sg table\n"); - goto err_free_pmu_ucode; - } - pmu->ucode.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode, - GK20A_PMU_UCODE_SIZE_MAX, - 0, /* flags */ - gk20a_mem_flag_read_only); - if (!pmu->ucode.pmu_va) { - gk20a_err(d, "failed to map pmu ucode memory!!"); - goto err_free_ucode_sgt; - } - - ucode_ptr = pmu->ucode.cpuva; + ucode_ptr = pmu->ucode.cpu_va; for (i = 0; i < (pmu->desc->app_start_offset + pmu->desc->app_size) >> 2; i++) gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); - gk20a_free_sgtable(&sgt_pmu_ucode); - gk20a_init_pmu(pmu); return 0; - err_free_ucode_sgt: - gk20a_free_sgtable(&sgt_pmu_ucode); - err_free_pmu_ucode: - dma_free_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, - pmu->ucode.cpuva, pmu->ucode.iova, &attrs); - pmu->ucode.cpuva = NULL; - pmu->ucode.iova = 0; err_release_fw: release_firmware(g->pmu_fw); @@ -2031,9 +1993,6 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g) struct device *d = dev_from_gk20a(g); int i, err = 0; u8 *ptr; - struct sg_table *sgt_seq_buf; - struct sg_table *sgt_pmu_buf; - dma_addr_t iova; gk20a_dbg_fn(""); @@ -2082,70 +2041,19 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g) INIT_WORK(&pmu->pg_init, pmu_setup_hw); - pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, - &iova, - GFP_KERNEL); - if (!pmu->seq_buf.cpuva) { + err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_SEQ_BUF_SIZE, &pmu->seq_buf); + if (err) { gk20a_err(d, "failed to allocate memory\n"); - err = -ENOMEM; goto err_free_seq; } - pmu->seq_buf.iova = iova; - - pmu->trace_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_TRACE_BUFSIZE, - &iova, - GFP_KERNEL); - if (!pmu->trace_buf.cpuva) { + err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_TRACE_BUFSIZE, &pmu->trace_buf); + if (err) { gk20a_err(d, "failed to allocate trace memory\n"); - err = -ENOMEM; goto err_free_seq_buf; } - pmu->trace_buf.iova = iova; - err = gk20a_get_sgtable(d, &sgt_seq_buf, - pmu->seq_buf.cpuva, - pmu->seq_buf.iova, - GK20A_PMU_SEQ_BUF_SIZE); - if (err) { - gk20a_err(d, "failed to allocate seq buf sg table\n"); - goto err_free_trace_buf; - } - - pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf, - GK20A_PMU_SEQ_BUF_SIZE, - 0, /* flags */ - gk20a_mem_flag_none); - if (!pmu->seq_buf.pmu_va) { - gk20a_err(d, "failed to gmmu map seq buf memory!!"); - err = -ENOMEM; - goto err_free_seq_buf_sgt; - } - - err = gk20a_get_sgtable(d, &sgt_pmu_buf, - pmu->trace_buf.cpuva, - pmu->trace_buf.iova, - GK20A_PMU_TRACE_BUFSIZE); - if (err) { - gk20a_err(d, "failed to allocate sg table for Trace\n"); - goto err_unmap_seq_buf; - } - - pmu->trace_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_buf, - GK20A_PMU_TRACE_BUFSIZE, - 0, /* flags */ - gk20a_mem_flag_none); - if (!pmu->trace_buf.pmu_va) { - gk20a_err(d, "failed to gmmu map pmu trace memory!!"); - err = -ENOMEM; - goto err_free_trace_buf_sgt; - } - - ptr = (u8 *)pmu->seq_buf.cpuva; - if (!ptr) { - gk20a_err(d, "failed to map cpu ptr for zbc buffer"); - goto err_unmap_trace_buf; - } + ptr = (u8 *)pmu->seq_buf.cpu_va; /* TBD: remove this if ZBC save/restore is handled by PMU * end an empty ZBC sequence for now */ @@ -2155,35 +2063,13 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g) pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; - gk20a_free_sgtable(&sgt_seq_buf); - gk20a_free_sgtable(&sgt_pmu_buf); - pmu->sw_ready = true; skip_init: gk20a_dbg_fn("done"); return 0; - err_unmap_trace_buf: - gk20a_gmmu_unmap(vm, pmu->trace_buf.pmu_va, - GK20A_PMU_TRACE_BUFSIZE, gk20a_mem_flag_none); - err_free_trace_buf_sgt: - gk20a_free_sgtable(&sgt_pmu_buf); - err_unmap_seq_buf: - gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va, - GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none); - err_free_seq_buf_sgt: - gk20a_free_sgtable(&sgt_seq_buf); - err_free_trace_buf: - dma_free_coherent(d, GK20A_PMU_TRACE_BUFSIZE, - pmu->trace_buf.cpuva, pmu->trace_buf.iova); - pmu->trace_buf.cpuva = NULL; - pmu->trace_buf.iova = 0; - err_free_seq_buf: - dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE, - pmu->seq_buf.cpuva, pmu->seq_buf.iova); - pmu->seq_buf.cpuva = NULL; - pmu->seq_buf.iova = 0; + gk20a_gmmu_unmap_free(vm, &pmu->seq_buf); err_free_seq: kfree(pmu->seq); err_free_mutex: @@ -2306,8 +2192,8 @@ int gk20a_init_pmu_bind_fecs(struct gk20a *g) cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_FECS; cmd.cmd.pg.eng_buf_load.buf_size = pmu->pg_buf.size; - cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.pmu_va >> 8); - cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.pmu_va & 0xFF); + cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->pg_buf.gpu_va >> 8); + cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->pg_buf.gpu_va & 0xFF); cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; pmu->buf_loaded = false; @@ -2331,8 +2217,8 @@ static void pmu_setup_hw_load_zbc(struct gk20a *g) cmd.cmd.pg.eng_buf_load.engine_id = ENGINE_GR_GK20A; cmd.cmd.pg.eng_buf_load.buf_idx = PMU_PGENG_GR_BUFFER_IDX_ZBC; cmd.cmd.pg.eng_buf_load.buf_size = pmu->seq_buf.size; - cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.pmu_va >> 8); - cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.pmu_va & 0xFF); + cmd.cmd.pg.eng_buf_load.dma_base = u64_lo32(pmu->seq_buf.gpu_va >> 8); + cmd.cmd.pg.eng_buf_load.dma_offset = (u8)(pmu->seq_buf.gpu_va & 0xFF); cmd.cmd.pg.eng_buf_load.dma_idx = PMU_DMAIDX_VIRT; pmu->buf_loaded = false; @@ -4100,9 +3986,9 @@ static int falc_trace_show(struct seq_file *s, void *data) struct gk20a *g = s->private; struct pmu_gk20a *pmu = &g->pmu; u32 i = 0, j = 0, k, l, m; - char *trace = pmu->trace_buf.cpuva; + char *trace = pmu->trace_buf.cpu_va; char part_str[40]; - u32 *trace1 = pmu->trace_buf.cpuva; + u32 *trace1 = pmu->trace_buf.cpu_va; for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { for (j = 0; j < 0x40; j++) if (trace1[(i / 4) + j]) diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index e4865180..6cd173e8 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -1124,12 +1124,12 @@ struct pmu_pg_stats { struct pmu_gk20a { struct pmu_ucode_desc *desc; - struct pmu_mem_desc ucode; + struct mem_desc ucode; - struct pmu_mem_desc pg_buf; + struct mem_desc pg_buf; /* TBD: remove this if ZBC seq is fixed */ - struct pmu_mem_desc seq_buf; - struct pmu_mem_desc trace_buf; + struct mem_desc seq_buf; + struct mem_desc trace_buf; bool buf_loaded; struct pmu_sha1_gid gid_info; -- cgit v1.2.2