From b8915ab5aabb02866019221c51d96f304658207f Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Fri, 17 Jun 2016 15:56:07 +0300 Subject: gpu: nvgpu: support in-kernel vidmem mappings Propagate the buffer aperture flag in gk20a_locked_gmmu_map up so that buffers represented as a mem_desc and present in vidmem can be mapped to gpu. JIRA DNVGPU-18 JIRA DNVGPU-76 Change-Id: I46cf87e27229123016727339b9349d5e2c835b3e Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1169308 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 3 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 3 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 66 ++++++++++++-------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 38 ++++++++++----- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 16 +++++-- drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 7 +-- drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 6 ++- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 6 ++- drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 3 +- 9 files changed, 85 insertions(+), 63 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 7818f046..02b1938a 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -1215,7 +1215,8 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) g->gr.compbit_store.mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_read_only, - false); + false, + gr->compbit_store.mem.aperture); if (!vaddr) { gk20a_warn(cde_ctx->dev, "cde: cannot map compression bit backing store"); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 45e16ad9..b8a2fc3e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -492,7 +492,8 @@ struct gpu_ops { bool clear_ctags, bool sparse, bool priv, - struct vm_gk20a_mapping_batch *batch); + struct vm_gk20a_mapping_batch *batch, + enum gk20a_aperture aperture); void (*gmmu_unmap)(struct vm_gk20a *vm, u64 vaddr, u64 size, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index bdc65cab..0d97e84c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1824,7 +1824,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, &pm_ctx->mem.sgt, pm_ctx->mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none, true); + gk20a_mem_flag_none, true, + pm_ctx->mem.aperture); if (!pm_ctx->mem.gpu_va) { gk20a_err(dev_from_gk20a(g), "failed to map pm ctxt buffer"); @@ -2046,7 +2047,8 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) ucode_info->surface_desc.size, 0, /* flags */ gk20a_mem_flag_read_only, - false); + false, + ucode_info->surface_desc.aperture); if (!ucode_info->surface_desc.gpu_va) { gk20a_err(d, "failed to update gmmu ptes\n"); return -ENOMEM; @@ -2650,82 +2652,73 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; struct gr_gk20a *gr = &g->gr; - struct sg_table *sgt; - u64 size; + struct mem_desc *mem; u64 gpu_va; u32 i; gk20a_dbg_fn(""); /* Circular Buffer */ if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].mem.sgt == NULL)) { - sgt = gr->global_ctx_buffer[CIRCULAR].mem.sgt; - size = gr->global_ctx_buffer[CIRCULAR].mem.size; + mem = &gr->global_ctx_buffer[CIRCULAR].mem; } else { - sgt = gr->global_ctx_buffer[CIRCULAR_VPR].mem.sgt; - size = gr->global_ctx_buffer[CIRCULAR_VPR].mem.size; + mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem; } - gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none, true); + gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) goto clean_up; g_bfr_va[CIRCULAR_VA] = gpu_va; - g_bfr_size[CIRCULAR_VA] = size; + g_bfr_size[CIRCULAR_VA] = mem->size; /* Attribute Buffer */ if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.sgt == NULL)) { - sgt = gr->global_ctx_buffer[ATTRIBUTE].mem.sgt; - size = gr->global_ctx_buffer[ATTRIBUTE].mem.size; + mem = &gr->global_ctx_buffer[ATTRIBUTE].mem; } else { - sgt = gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.sgt; - size = gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.size; + mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem; } - gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none, false); + gk20a_mem_flag_none, false, mem->aperture); if (!gpu_va) goto clean_up; g_bfr_va[ATTRIBUTE_VA] = gpu_va; - g_bfr_size[ATTRIBUTE_VA] = size; + g_bfr_size[ATTRIBUTE_VA] = mem->size; /* Page Pool */ if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.sgt == NULL)) { - sgt = gr->global_ctx_buffer[PAGEPOOL].mem.sgt; - size = gr->global_ctx_buffer[PAGEPOOL].mem.size; + mem = &gr->global_ctx_buffer[PAGEPOOL].mem; } else { - sgt = gr->global_ctx_buffer[PAGEPOOL_VPR].mem.sgt; - size = gr->global_ctx_buffer[PAGEPOOL_VPR].mem.size; + mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem; } - gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none, true); + gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) goto clean_up; g_bfr_va[PAGEPOOL_VA] = gpu_va; - g_bfr_size[PAGEPOOL_VA] = size; + g_bfr_size[PAGEPOOL_VA] = mem->size; /* Golden Image */ - sgt = gr->global_ctx_buffer[GOLDEN_CTX].mem.sgt; - size = gr->global_ctx_buffer[GOLDEN_CTX].mem.size; - gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, - gk20a_mem_flag_none, true); + mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; + gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, 0, + gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) goto clean_up; g_bfr_va[GOLDEN_CTX_VA] = gpu_va; - g_bfr_size[GOLDEN_CTX_VA] = size; + g_bfr_size[GOLDEN_CTX_VA] = mem->size; /* Priv register Access Map */ - sgt = gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.sgt; - size = gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size; - gpu_va = gk20a_gmmu_map(ch_vm, &sgt, size, 0, - gk20a_mem_flag_none, true); + mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; + gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, 0, + gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) goto clean_up; g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; - g_bfr_size[PRIV_ACCESS_MAP_VA] = size; + g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; c->ch_ctx.global_ctx_buffer_mapped = true; return 0; @@ -2793,7 +2786,8 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, gr_ctx->mem.gpu_va = gk20a_gmmu_map(vm, &gr_ctx->mem.sgt, gr_ctx->mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, - gk20a_mem_flag_none, true); + gk20a_mem_flag_none, true, + gr_ctx->mem.aperture); if (!gr_ctx->mem.gpu_va) goto err_free_mem; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 6fdfacdd..bb32749d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1594,7 +1594,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, bool clear_ctags, bool sparse, bool priv, - struct vm_gk20a_mapping_batch *batch) + struct vm_gk20a_mapping_batch *batch, + enum gk20a_aperture aperture) { int err = 0; bool allocated = false; @@ -1642,7 +1643,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, rw_flag, sparse, priv, - APERTURE_SYSMEM); /* no vidmem bufs yet */ + aperture); if (err) { gk20a_err(d, "failed to update ptes on map"); goto fail_validate; @@ -1998,7 +1999,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, clear_ctags, false, false, - batch); + batch, + APERTURE_SYSMEM); /* no vidmem yet */ if (!map_offset) goto clean_up; @@ -2256,7 +2258,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm, false, /* clear_ctags */ false, /* sparse */ false, /* priv */ - NULL); /* mapping_batch handle */ + NULL, /* mapping_batch handle */ + g->gr.compbit_store.mem.aperture); if (!mapped_buffer->ctag_map_win_addr) { mutex_unlock(&vm->update_gmmu_lock); @@ -2295,7 +2298,8 @@ static u64 __gk20a_gmmu_map(struct vm_gk20a *vm, u64 size, u32 flags, int rw_flag, - bool priv) + bool priv, + enum gk20a_aperture aperture) { struct gk20a *g = gk20a_from_vm(vm); u64 vaddr; @@ -2312,7 +2316,8 @@ static u64 __gk20a_gmmu_map(struct vm_gk20a *vm, false, /* clear_ctags */ false, /* sparse */ priv, /* priv */ - NULL); /* mapping_batch handle */ + NULL, /* mapping_batch handle */ + aperture); mutex_unlock(&vm->update_gmmu_lock); if (!vaddr) { gk20a_err(dev_from_vm(vm), "failed to allocate va space"); @@ -2327,9 +2332,11 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, u64 size, u32 flags, int rw_flag, - bool priv) + bool priv, + enum gk20a_aperture aperture) { - return __gk20a_gmmu_map(vm, sgt, 0, size, flags, rw_flag, priv); + return __gk20a_gmmu_map(vm, sgt, 0, size, flags, rw_flag, priv, + aperture); } /* @@ -2341,9 +2348,11 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm, u64 size, u32 flags, int rw_flag, - bool priv) + bool priv, + enum gk20a_aperture aperture) { - return __gk20a_gmmu_map(vm, sgt, addr, size, flags, rw_flag, priv); + return __gk20a_gmmu_map(vm, sgt, addr, size, flags, rw_flag, priv, + aperture); } int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct mem_desc *mem) @@ -2599,7 +2608,8 @@ int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, return err; mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, - gk20a_mem_flag_none, false); + gk20a_mem_flag_none, false, + mem->aperture); if (!mem->gpu_va) { err = -ENOMEM; goto fail_free; @@ -2626,7 +2636,8 @@ int gk20a_gmmu_alloc_map_attr_vid(struct vm_gk20a *vm, return err; mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, - gk20a_mem_flag_none, false); + gk20a_mem_flag_none, false, + mem->aperture); if (!mem->gpu_va) { err = -ENOMEM; goto fail_free; @@ -3727,7 +3738,8 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, false, true, false, - NULL); + NULL, + APERTURE_INVALID); if (!map_offset) { mutex_unlock(&vm->update_gmmu_lock); gk20a_bfree(vma, vaddr_start); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index a697e520..f87ba605 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -40,8 +40,13 @@ outer_flush_range(pa, pa + (size_t)(size)); \ } while (0) +/* + * Real location of a buffer - gk20a_aperture_mask() will deduce what will be + * told to the gpu about the aperture, but this flag designates where the + * memory actually was allocated from. + */ enum gk20a_aperture { - APERTURE_INVALID, /* e.g., unallocated */ + APERTURE_INVALID, /* unallocated or N/A */ APERTURE_SYSMEM, APERTURE_VIDMEM }; @@ -520,14 +525,16 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, u64 size, u32 flags, int rw_flag, - bool priv); + bool priv, + enum gk20a_aperture aperture); u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm, struct sg_table **sgt, u64 addr, u64 size, u32 flags, int rw_flag, - bool priv); + bool priv, + enum gk20a_aperture aperture); int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size, @@ -619,7 +626,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, bool clear_ctags, bool sparse, bool priv, - struct vm_gk20a_mapping_batch *batch); + struct vm_gk20a_mapping_batch *batch, + enum gk20a_aperture aperture); void gk20a_gmmu_unmap(struct vm_gk20a *vm, u64 vaddr, diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index b8f70ab3..25f9a8dd 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c @@ -130,9 +130,6 @@ int gk20a_tegra_secure_alloc(struct device *dev, if (dma_mapping_error(&tegra_vpr_dev, iova)) return -ENOMEM; - desc->mem.size = size; - desc->destroy = gk20a_tegra_secure_destroy; - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); if (!sgt) { gk20a_err(dev, "failed to allocate memory\n"); @@ -148,7 +145,11 @@ int gk20a_tegra_secure_alloc(struct device *dev, /* This bypasses SMMU for VPR during gmmu_map. */ sg_dma_address(sgt->sgl) = 0; + desc->destroy = gk20a_tegra_secure_destroy; + desc->mem.sgt = sgt; + desc->mem.size = size; + desc->mem.aperture = APERTURE_SYSMEM; return err; diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index aa375b24..113c59ef 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c @@ -187,7 +187,8 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, /* Map into the GPU... Doesn't need to be fixed. */ p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, - 0, gk20a_mem_flag_none, false); + 0, gk20a_mem_flag_none, false, + APERTURE_SYSMEM); if (!p->gpu_va) { err = -ENOMEM; goto fail_unmap_sgt; @@ -204,7 +205,8 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, p->sema_sea->gpu_va, p->sema_sea->map_size, 0, gk20a_mem_flag_read_only, - false); + false, + APERTURE_SYSMEM); if (!addr) { err = -ENOMEM; BUG(); diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index eb9ae08c..c503bc48 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -412,7 +412,8 @@ int prepare_ucode_blob(struct gk20a *g) sg_dma_address(sgt->sgl) = 0; g->pmu.wpr_buf.gpu_va = gk20a_gmmu_map(vm, &sgt, wprsize, - 0, gk20a_mem_flag_none, false); + 0, gk20a_mem_flag_none, false, + APERTURE_SYSMEM); gm20b_dbg_pmu("wpr mapped gpu va :%llx\n", g->pmu.wpr_buf.gpu_va); /* Discover all managed falcons*/ @@ -1412,7 +1413,8 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, &acr->hsbl_ucode.sgt, bl_sz, 0, /* flags */ - gk20a_mem_flag_read_only, false); + gk20a_mem_flag_read_only, false, + acr->hsbl_ucode.aperture); if (!acr->hsbl_ucode.gpu_va) { gk20a_err(d, "failed to map pmu ucode memory!!"); goto err_free_ucode; diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 8af01158..2239fcbc 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -80,7 +80,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, bool clear_ctags, bool sparse, bool priv, - struct vm_gk20a_mapping_batch *batch) + struct vm_gk20a_mapping_batch *batch, + enum gk20a_aperture aperture) { int err = 0; struct device *d = dev_from_vm(vm); -- cgit v1.2.2