From e32f62fadfcde413bcd9b5af61ad884e27ba2bf1 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Thu, 6 Apr 2017 15:30:01 -0700 Subject: gpu: nvgpu: Move Linux nvgpu_mem fields Hide the Linux specific nvgpu_mem fields so that in subsequent patches core code can instead of using struct sg_table it can use mem_desc. Routines for accessing system specific fields will be added as needed. This is the first step in a fairly major overhaul of the GMMU mapping routines. There are numerous issues with the current design (or lack there of): massively coupled code, system dependencies, disorganization, etc. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I2e7d3ae3a07468cfc17c1c642d28ed1b0952474d Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1464076 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/dma.c | 60 +++++++++++----------- drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 2 +- drivers/gpu/nvgpu/common/pramin.c | 2 +- drivers/gpu/nvgpu/common/semaphore.c | 4 +- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/fb_gk20a.c | 7 +-- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 33 ++++++------ drivers/gpu/nvgpu/gk20a/ltc_common.c | 2 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 53 +++++++++---------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 +- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 5 +- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 2 +- drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 +- drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h | 29 +++++++++++ drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 48 ++++++++++------- .../gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c | 10 ++-- drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 4 +- drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 2 +- 19 files changed, 160 insertions(+), 113 deletions(-) create mode 100644 drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index 2a75ad13..832d0f47 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c @@ -107,10 +107,10 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, nvgpu_dma_flags_to_attrs(&dma_attrs, flags); if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { - mem->pages = dma_alloc_attrs(d, + mem->priv.pages = dma_alloc_attrs(d, size, &iova, GFP_KERNEL, __DMA_ATTR(dma_attrs)); - if (!mem->pages) + if (!mem->priv.pages) return -ENOMEM; } else { mem->cpu_va = dma_alloc_attrs(d, @@ -126,10 +126,12 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, } if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) - err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages, + err = gk20a_get_sgtable_from_pages(d, &mem->priv.sgt, + mem->priv.pages, iova, size); else { - err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size); + err = gk20a_get_sgtable(d, &mem->priv.sgt, mem->cpu_va, + iova, size); memset(mem->cpu_va, 0, size); } if (err) @@ -137,7 +139,7 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, mem->size = size; mem->aperture = APERTURE_SYSMEM; - mem->flags = flags; + mem->priv.flags = flags; gk20a_dbg_fn("done"); @@ -146,7 +148,7 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, fail_free: dma_free_coherent(d, size, mem->cpu_va, iova); mem->cpu_va = NULL; - mem->sgt = NULL; + mem->priv.sgt = NULL; return err; } @@ -204,23 +206,23 @@ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, else mem->fixed = false; - mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); - if (!mem->sgt) { + mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table)); + if (!mem->priv.sgt) { err = -ENOMEM; goto fail_physfree; } - err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL); + err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL); if (err) goto fail_kfree; - set_vidmem_page_alloc(mem->sgt->sgl, addr); - sg_set_page(mem->sgt->sgl, NULL, size, 0); + set_vidmem_page_alloc(mem->priv.sgt->sgl, addr); + sg_set_page(mem->priv.sgt->sgl, NULL, size, 0); mem->size = size; mem->aperture = APERTURE_VIDMEM; mem->allocator = vidmem_alloc; - mem->flags = flags; + mem->priv.flags = flags; nvgpu_init_list_node(&mem->clear_list_entry); @@ -229,7 +231,7 @@ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags, return 0; fail_kfree: - nvgpu_kfree(g, mem->sgt); + nvgpu_kfree(g, mem->priv.sgt); fail_physfree: nvgpu_free(&g->mm.vidmem.allocator, addr); return err; @@ -283,7 +285,7 @@ int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags, if (err) return err; - mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, + mem->gpu_va = gk20a_gmmu_map(vm, &mem->priv.sgt, size, 0, gk20a_mem_flag_none, false, mem->aperture); if (!mem->gpu_va) { @@ -313,7 +315,7 @@ int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags, if (err) return err; - mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, + mem->gpu_va = gk20a_gmmu_map(vm, &mem->priv.sgt, size, 0, gk20a_mem_flag_none, false, mem->aperture); if (!mem->gpu_va) { @@ -332,31 +334,31 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) { struct device *d = dev_from_gk20a(g); - if (mem->cpu_va || mem->pages) { - if (mem->flags) { + if (mem->cpu_va || mem->priv.pages) { + if (mem->priv.flags) { DEFINE_DMA_ATTRS(dma_attrs); - nvgpu_dma_flags_to_attrs(&dma_attrs, mem->flags); + nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags); - if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) { - dma_free_attrs(d, mem->size, mem->pages, - sg_dma_address(mem->sgt->sgl), + if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) { + dma_free_attrs(d, mem->size, mem->priv.pages, + sg_dma_address(mem->priv.sgt->sgl), __DMA_ATTR(dma_attrs)); } else { dma_free_attrs(d, mem->size, mem->cpu_va, - sg_dma_address(mem->sgt->sgl), + sg_dma_address(mem->priv.sgt->sgl), __DMA_ATTR(dma_attrs)); } } else { dma_free_coherent(d, mem->size, mem->cpu_va, - sg_dma_address(mem->sgt->sgl)); + sg_dma_address(mem->priv.sgt->sgl)); } mem->cpu_va = NULL; - mem->pages = NULL; + mem->priv.pages = NULL; } - if (mem->sgt) - gk20a_free_sgtable(g, &mem->sgt); + if (mem->priv.sgt) + gk20a_free_sgtable(g, &mem->priv.sgt); mem->size = 0; mem->aperture = APERTURE_INVALID; @@ -368,7 +370,7 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) bool was_empty; /* Sanity check - only this supported when allocating. */ - WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING); + WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING); if (mem->user_mem) { nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex); @@ -385,8 +387,8 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem) } else { nvgpu_memset(g, mem, 0, 0, mem->size); nvgpu_free(mem->allocator, - (u64)get_vidmem_page_alloc(mem->sgt->sgl)); - gk20a_free_sgtable(g, &mem->sgt); + (u64)get_vidmem_page_alloc(mem->priv.sgt->sgl)); + gk20a_free_sgtable(g, &mem->priv.sgt); mem->size = 0; mem->aperture = APERTURE_INVALID; diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index eb214aad..bb19dd61 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -57,7 +57,7 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) return -EBUSY; } - cpu_va = vmap(mem->pages, + cpu_va = vmap(mem->priv.pages, PAGE_ALIGN(mem->size) >> PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index 378711fc..688e5ce8 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c @@ -87,7 +87,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, struct page_alloc_chunk *chunk = NULL; u32 byteoff, start_reg, until_end, n; - alloc = get_vidmem_page_alloc(mem->sgt->sgl); + alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, page_alloc_chunk, list_entry) { if (offset >= chunk->length) diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 9e437410..bf7b6348 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c @@ -60,7 +60,7 @@ static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea) if (ret) goto out; - sea->ro_sg_table = sea->sea_mem.sgt; + sea->ro_sg_table = sea->sea_mem.priv.sgt; sea->size = SEMAPHORE_POOL_COUNT; sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE; @@ -154,7 +154,7 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc( page_idx = (unsigned long)ret; - p->page = sea->sea_mem.pages[page_idx]; + p->page = sea->sea_mem.priv.pages[page_idx]; p->ro_sg_table = sea->ro_sg_table; p->page_idx = page_idx; p->sema_sea = sea; diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 18432c55..391f6612 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -1239,7 +1239,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) } /* map backing store to gpu virtual space */ - vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.mem.sgt, + vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.mem.priv.sgt, g->gr.compbit_store.mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_read_only, diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index 214014ce..4a76bd6b 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c @@ -44,9 +44,10 @@ void fb_gk20a_reset(struct gk20a *g) void gk20a_fb_init_hw(struct gk20a *g) { - gk20a_writel(g, fb_niso_flush_sysmem_addr_r(), - g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) - >> 8); + u32 addr = g->ops.mm.get_iova_addr(g, + g->mm.sysmem_flush.priv.sgt->sgl, 0) >> 8; + + gk20a_writel(g, fb_niso_flush_sysmem_addr_r(), addr); } static void gk20a_fb_set_mmu_page_size(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 12bb3688..314d4551 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -954,7 +954,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) for (chid = 0; chid < f->num_channels; chid++) { f->channel[chid].userd_iova = - g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) + g->ops.mm.get_iova_addr(g, f->userd.priv.sgt->sgl, 0) + chid * f->userd_entry_size; f->channel[chid].userd_gpu_va = f->userd.gpu_va + chid * f->userd_entry_size; @@ -3148,7 +3148,7 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, new_buf = !runlist->cur_buffer; runlist_iova = g->ops.mm.get_iova_addr( - g, runlist->mem[new_buf].sgt->sgl, 0); + g, runlist->mem[new_buf].priv.sgt->sgl, 0); gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", runlist_id, (u64)runlist_iova); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 22093a34..f47d3b12 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1943,7 +1943,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } pm_ctx->mem.gpu_va = gk20a_gmmu_map(c->vm, - &pm_ctx->mem.sgt, + &pm_ctx->mem.priv.sgt, pm_ctx->mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, true, @@ -2205,7 +2205,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) /* Map ucode surface to GMMU */ ucode_info->surface_desc.gpu_va = gk20a_gmmu_map(vm, - &ucode_info->surface_desc.sgt, + &ucode_info->surface_desc.priv.sgt, ucode_info->surface_desc.size, 0, /* flags */ gk20a_mem_flag_read_only, @@ -2823,13 +2823,14 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, gk20a_dbg_fn(""); /* Circular Buffer */ - if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].mem.sgt == NULL)) { + if (!c->vpr || + (gr->global_ctx_buffer[CIRCULAR_VPR].mem.priv.sgt == NULL)) { mem = &gr->global_ctx_buffer[CIRCULAR].mem; } else { mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem; } - gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) @@ -2838,13 +2839,14 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, g_bfr_size[CIRCULAR_VA] = mem->size; /* Attribute Buffer */ - if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.sgt == NULL)) { + if (!c->vpr || + (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.priv.sgt == NULL)) { mem = &gr->global_ctx_buffer[ATTRIBUTE].mem; } else { mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem; } - gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, false, mem->aperture); if (!gpu_va) @@ -2853,13 +2855,14 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, g_bfr_size[ATTRIBUTE_VA] = mem->size; /* Page Pool */ - if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.sgt == NULL)) { + if (!c->vpr || + (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.priv.sgt == NULL)) { mem = &gr->global_ctx_buffer[PAGEPOOL].mem; } else { mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem; } - gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) @@ -2869,7 +2872,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, /* Golden Image */ mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; - gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, 0, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, 0, gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) goto clean_up; @@ -2878,7 +2881,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, /* Priv register Access Map */ mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; - gpu_va = gk20a_gmmu_map(ch_vm, &mem->sgt, mem->size, 0, + gpu_va = gk20a_gmmu_map(ch_vm, &mem->priv.sgt, mem->size, 0, gk20a_mem_flag_none, true, mem->aperture); if (!gpu_va) goto clean_up; @@ -2950,7 +2953,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, goto err_free_ctx; gr_ctx->mem.gpu_va = gk20a_gmmu_map(vm, - &gr_ctx->mem.sgt, + &gr_ctx->mem.priv.sgt, gr_ctx->mem.size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_FALSE, gk20a_mem_flag_none, true, @@ -3196,7 +3199,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, } /* allocate patch buffer */ - if (ch_ctx->patch_ctx.mem.sgt == NULL) { + if (ch_ctx->patch_ctx.mem.priv.sgt == NULL) { err = gr_gk20a_alloc_channel_patch_ctx(g, c); if (err) { nvgpu_err(g, @@ -4735,7 +4738,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) gk20a_dbg_fn(""); /* init mmu debug buffer */ - addr = g->ops.mm.get_iova_addr(g, gr->mmu_wr_mem.sgt->sgl, 0); + addr = g->ops.mm.get_iova_addr(g, gr->mmu_wr_mem.priv.sgt->sgl, 0); addr >>= fb_mmu_debug_wr_addr_alignment_v(); gk20a_writel(g, fb_mmu_debug_wr_r(), @@ -4745,7 +4748,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) fb_mmu_debug_wr_vol_false_f() | fb_mmu_debug_wr_addr_f(addr)); - addr = g->ops.mm.get_iova_addr(g, gr->mmu_rd_mem.sgt->sgl, 0); + addr = g->ops.mm.get_iova_addr(g, gr->mmu_rd_mem.priv.sgt->sgl, 0); addr >>= fb_mmu_debug_rd_addr_alignment_v(); gk20a_writel(g, fb_mmu_debug_rd_r(), @@ -8405,7 +8408,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, } if (!pm_ctx_ready) { /* Make sure ctx buffer was initialized */ - if (!ch_ctx->pm_ctx.mem.pages) { + if (!ch_ctx->pm_ctx.mem.priv.pages) { nvgpu_err(g, "Invalid ctx buffer"); err = -EINVAL; diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index b92dda6d..1958c11c 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c @@ -96,7 +96,7 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem); else compbit_store_iova = g->ops.mm.get_iova_addr(g, - gr->compbit_store.mem.sgt->sgl, 0); + gr->compbit_store.mem.priv.sgt->sgl, 0); compbit_base_post_divide64 = compbit_store_iova >> ltc_ltcs_ltss_cbc_base_alignment_shift_v(); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 1db52c85..69e00c5e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -817,27 +817,28 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, gk20a_dbg(gpu_dbg_pte, "alloc_pages failed"); goto err_out; } - entry->mem.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.sgt)); - if (!entry->mem.sgt) { + entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); + if (!entry->mem.priv.sgt) { gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); goto err_alloced; } - err = sg_alloc_table(entry->mem.sgt, 1, GFP_KERNEL); + err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); if (err) { gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed"); goto err_sg_table; } - sg_set_page(entry->mem.sgt->sgl, pages, len, 0); + sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); entry->mem.cpu_va = page_address(pages); memset(entry->mem.cpu_va, 0, len); entry->mem.size = len; entry->mem.aperture = APERTURE_SYSMEM; - FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); + FLUSH_CPU_DCACHE(entry->mem.cpu_va, + sg_phys(entry->mem.priv.sgt->sgl), len); return 0; err_sg_table: - nvgpu_kfree(vm->mm->g, entry->mem.sgt); + nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); err_alloced: __free_pages(pages, order); err_out: @@ -854,9 +855,9 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm, free_pages((unsigned long)entry->mem.cpu_va, get_order(entry->mem.size)); entry->mem.cpu_va = NULL; - sg_free_table(entry->mem.sgt); - nvgpu_kfree(vm->mm->g, entry->mem.sgt); - entry->mem.sgt = NULL; + sg_free_table(entry->mem.priv.sgt); + nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); + entry->mem.priv.sgt = NULL; entry->mem.size = 0; entry->mem.aperture = APERTURE_INVALID; } @@ -864,16 +865,16 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm, static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) { FLUSH_CPU_DCACHE(entry->mem.cpu_va, - sg_phys(entry->mem.sgt->sgl), - entry->mem.sgt->sgl->length); + sg_phys(entry->mem.priv.sgt->sgl), + entry->mem.priv.sgt->sgl->length); return 0; } static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) { FLUSH_CPU_DCACHE(entry->mem.cpu_va, - sg_phys(entry->mem.sgt->sgl), - entry->mem.sgt->sgl->length); + sg_phys(entry->mem.priv.sgt->sgl), + entry->mem.priv.sgt->sgl->length); } static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, @@ -941,7 +942,7 @@ int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) return 0; FLUSH_CPU_DCACHE(entry->mem.cpu_va, - sg_phys(entry->mem.sgt->sgl), + sg_phys(entry->mem.priv.sgt->sgl), entry->mem.size); } else { int err = nvgpu_mem_begin(g, &entry->mem); @@ -967,7 +968,7 @@ void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) return; FLUSH_CPU_DCACHE(entry->mem.cpu_va, - sg_phys(entry->mem.sgt->sgl), + sg_phys(entry->mem.priv.sgt->sgl), entry->mem.size); } else { nvgpu_mem_end(g, &entry->mem); @@ -1028,9 +1029,9 @@ static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", entry, - (entry->mem.sgt && entry->mem.aperture == APERTURE_SYSMEM) ? - g->ops.mm.get_iova_addr(g, entry->mem.sgt->sgl, 0) - : 0, + (entry->mem.priv.sgt && + entry->mem.aperture == APERTURE_SYSMEM) ? + g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, order, entry->woffset); if (err) return err; @@ -1726,7 +1727,7 @@ static struct sg_table *gk20a_vidbuf_map_dma_buf( { struct gk20a_vidmem_buf *buf = attach->dmabuf->priv; - return buf->mem->sgt; + return buf->mem->priv.sgt; } static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach, @@ -2398,7 +2399,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm, g->ops.mm.gmmu_map( vm, !fixed_mapping ? 0 : *compbits_win_gva, /* va */ - g->gr.compbit_store.mem.sgt, + g->gr.compbit_store.mem.priv.sgt, cacheline_offset_start, /* sg offset */ mapped_buffer->ctag_map_win_size, /* size */ small_pgsz_index, @@ -2518,7 +2519,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) if (g->mm.vidmem.ce_ctx_id == (u32)~0) return -EINVAL; - alloc = get_vidmem_page_alloc(mem->sgt->sgl); + alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, page_alloc_chunk, list_entry) { @@ -2580,14 +2581,14 @@ u64 gk20a_mem_get_base_addr(struct gk20a *g, struct nvgpu_mem *mem, u64 addr; if (mem->aperture == APERTURE_VIDMEM) { - alloc = get_vidmem_page_alloc(mem->sgt->sgl); + alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); /* This API should not be used with > 1 chunks */ WARN_ON(alloc->nr_chunks != 1); addr = alloc->base; } else { - addr = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, flags); + addr = g->ops.mm.get_iova_addr(g, mem->priv.sgt->sgl, flags); } return addr; @@ -2619,8 +2620,8 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work) while ((mem = get_pending_mem_desc(mm)) != NULL) { gk20a_gmmu_clear_vidmem_mem(g, mem); nvgpu_free(mem->allocator, - (u64)get_vidmem_page_alloc(mem->sgt->sgl)); - gk20a_free_sgtable(g, &mem->sgt); + (u64)get_vidmem_page_alloc(mem->priv.sgt->sgl)); + gk20a_free_sgtable(g, &mem->priv.sgt); WARN_ON(atomic64_sub_return(mem->size, &g->mm.vidmem.bytes_pending) < 0); @@ -2774,7 +2775,7 @@ u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry) u64 base; if (g->mm.has_physical_mode) - base = sg_phys(entry->mem.sgt->sgl); + base = sg_phys(entry->mem.priv.sgt->sgl); else base = gk20a_mem_get_base_addr(g, &entry->mem, 0); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7fac811e..94dc0b6f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -539,7 +539,7 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem) { /* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr * during channel deletion - attempt to fix at least null derefs */ - struct sg_table *sgt = mem->sgt; + struct sg_table *sgt = mem->priv.sgt; if (sgt) { struct scatterlist *sgl = sgt->sgl; diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index b6afa748..7f0edbb2 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -1075,7 +1075,7 @@ static int gm20b_bootstrap_hs_flcn(struct gk20a *g) u32 *acr_ucode_header_t210_load; u32 *acr_ucode_data_t210_load; - start = g->ops.mm.get_iova_addr(g, acr->ucode_blob.sgt->sgl, 0); + start = g->ops.mm.get_iova_addr(g, acr->ucode_blob.priv.sgt->sgl, 0); size = acr->ucode_blob.size; gm20b_dbg_pmu(""); @@ -1419,7 +1419,8 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) goto err_done; } - acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, &acr->hsbl_ucode.sgt, + acr->hsbl_ucode.gpu_va = gk20a_gmmu_map(vm, + &acr->hsbl_ucode.priv.sgt, bl_sz, 0, /* flags */ gk20a_mem_flag_read_only, false, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 24e7ffad..7ae6abc2 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -842,7 +842,7 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, return err; mem->gpu_va = gk20a_gmmu_map(vm, - &mem->sgt, + &mem->priv.sgt, size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index c2e0fddf..c5149f22 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -41,7 +41,7 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g) g->ops.fb.set_mmu_page_size(g); gk20a_writel(g, fb_niso_flush_sysmem_addr_r(), - (g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0) + (g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.priv.sgt->sgl, 0) >> 8ULL)); g->ops.bus.bar1_bind(g, inst_block); diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h new file mode 100644 index 00000000..8b1e646e --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVGPU_LINUX_NVGPU_MEM_H__ +#define __NVGPU_LINUX_NVGPU_MEM_H__ + +struct page; +struct sg_table; + +struct nvgpu_mem_priv { + struct page **pages; + struct sg_table *sgt; + unsigned long flags; +}; + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index ae5dcc6e..1590ee7a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -17,10 +17,11 @@ #ifndef __NVGPU_NVGPU_MEM_H__ #define __NVGPU_NVGPU_MEM_H__ -#include - +#include #include +#include + struct page; struct sg_table; @@ -39,18 +40,32 @@ enum nvgpu_aperture { }; struct nvgpu_mem { - void *cpu_va; /* sysmem only */ - struct page **pages; /* sysmem only */ - struct sg_table *sgt; - enum nvgpu_aperture aperture; - size_t size; - u64 gpu_va; - bool fixed; /* vidmem only */ - bool user_mem; /* vidmem only */ - struct nvgpu_allocator *allocator; /* vidmem only */ - struct nvgpu_list_node clear_list_entry; /* vidmem only */ - bool skip_wmb; - unsigned long flags; + /* + * Populated for all nvgpu_mem structs - vidmem or system. + */ + enum nvgpu_aperture aperture; + size_t size; + u64 gpu_va; + bool skip_wmb; + + /* + * Only populated for a sysmem allocation. + */ + void *cpu_va; + + /* + * Fields only populated for vidmem allocations. + */ + bool fixed; + bool user_mem; + struct nvgpu_allocator *allocator; + struct nvgpu_list_node clear_list_entry; + + /* + * This is defined by the system specific header. It can be empty if + * there's no system specific stuff for a given system. + */ + struct nvgpu_mem_priv priv; }; static inline struct nvgpu_mem * @@ -61,11 +76,6 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node) clear_list_entry)); }; -struct nvgpu_mem_sub { - u32 offset; - u32 size; -}; - static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) { switch (aperture) { diff --git a/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c index 7ce66707..852dcdf2 100644 --- a/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/tegra/linux/platform_gk20a_tegra.c @@ -138,13 +138,13 @@ static void gk20a_tegra_secure_destroy(struct gk20a *g, { DEFINE_DMA_ATTRS(attrs); - if (desc->mem.sgt) { - phys_addr_t pa = sg_phys(desc->mem.sgt->sgl); + if (desc->mem.priv.sgt) { + phys_addr_t pa = sg_phys(desc->mem.priv.sgt->sgl); dma_free_attrs(&tegra_vpr_dev, desc->mem.size, (void *)(uintptr_t)pa, pa, __DMA_ATTR(attrs)); - gk20a_free_sgtable(g, &desc->mem.sgt); - desc->mem.sgt = NULL; + gk20a_free_sgtable(g, &desc->mem.priv.sgt); + desc->mem.priv.sgt = NULL; } } @@ -184,7 +184,7 @@ int gk20a_tegra_secure_alloc(struct device *dev, desc->destroy = gk20a_tegra_secure_destroy; - desc->mem.sgt = sgt; + desc->mem.priv.sgt = sgt; desc->mem.size = size; desc->mem.aperture = APERTURE_SYSMEM; diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 67def777..9122e48b 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -270,7 +270,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) } /* bar1 va */ - f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd.size); + f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.priv.sgt, f->userd.size); if (!f->userd.gpu_va) { dev_err(d, "gmmu mapping failed\n"); goto clean_up; @@ -304,7 +304,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) for (chid = 0; chid < f->num_channels; chid++) { f->channel[chid].userd_iova = - g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) + g->ops.mm.get_iova_addr(g, f->userd.priv.sgt->sgl, 0) + chid * f->userd_entry_size; f->channel[chid].userd_gpu_va = f->userd.gpu_va + chid * f->userd_entry_size; diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 702ae97b..2a674feb 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -558,7 +558,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, } /* allocate patch buffer */ - if (ch_ctx->patch_ctx.mem.pages == NULL) { + if (ch_ctx->patch_ctx.mem.priv.pages == NULL) { err = vgpu_gr_alloc_channel_patch_ctx(g, c); if (err) { nvgpu_err(g, "fail to allocate patch buffer"); -- cgit v1.2.2