From ee66559a0b3b82b3dc9be684261ddd0954731ff5 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Wed, 18 Jun 2014 18:32:03 +0530 Subject: gpu: nvgpu: add TSG support for engine context All channels in a TSG need to share same engine context i.e. pointer in RAMFC of all channels in a TSG must point to same NV_RAMIN_GR_WFI_TARGET To get this, add a pointer to gr_ctx inside TSG struct so that TSG can maintain its own unique gr_ctx Also, change the type of gr_ctx in a channel to pointer variable so that if channel is part of TSG it can point to TSG's gr_ctx otherwise it will point to its own gr_ctx In gk20a_alloc_obj_ctx(), allocate gr_ctx as below : 1) If channel is not part of any TSG - allocate its own gr_ctx buffer if it is already not allocated 2) If channel is part of TSG - Check if TSG has already allocated gr_ctx (as part of TSG) - If yes, channel's gr_ctx will point to that of TSG's - If not, then it means channels is first to be bounded to this TSG - And in this case we will allocate new gr_ctx on TSG first and then make channel's gr_ctx to point to this gr_ctx Also, gr_ctx will be released as below ; 1) If channels is not part of TSG, then it will be released when channels is closed 2) Otherwise, it will be released when TSG itself is closed Bug 1470692 Change-Id: Id347217d5b462e0e972cd3d79d17795b37034a50 Signed-off-by: Deepak Nibade Reviewed-on: http://git-master/r/417065 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 155 +++++++++++++++++++++++--------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 + drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 10 +++ drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 4 + 5 files changed, 131 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 4d236a70..21949012 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -56,7 +56,7 @@ struct fence { /* contexts associated with a channel */ struct channel_ctx_gk20a { - struct gr_ctx_desc gr_ctx; + struct gr_ctx_desc *gr_ctx; struct pm_ctx_desc pm_ctx; struct patch_desc patch_ctx; struct zcull_ctx_desc zcull_ctx; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 0e178e9e..4a6dd6c5 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -801,8 +801,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, gk20a_dbg_fn(""); - ctx_ptr = vmap(ch_ctx->gr_ctx.pages, - PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + ctx_ptr = vmap(ch_ctx->gr_ctx->pages, + PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); if (!ctx_ptr) return -ENOMEM; @@ -1562,8 +1562,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, if (!gold_ptr) goto clean_up; - ctx_ptr = vmap(ch_ctx->gr_ctx.pages, - PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + ctx_ptr = vmap(ch_ctx->gr_ctx->pages, + PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); if (!ctx_ptr) goto clean_up; @@ -1602,7 +1602,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, gk20a_mem_rd32(gold_ptr, i); } - gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); + gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va); gr->ctx_vars.golden_image_initialized = true; @@ -1636,8 +1636,8 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, Flush and invalidate before cpu update. */ gk20a_mm_l2_flush(g, true); - ctx_ptr = vmap(ch_ctx->gr_ctx.pages, - PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + ctx_ptr = vmap(ch_ctx->gr_ctx->pages, + PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); if (!ctx_ptr) return -ENOMEM; @@ -1676,8 +1676,8 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, Flush and invalidate before cpu update. */ gk20a_mm_l2_flush(g, true); - ctx_ptr = vmap(ch_ctx->gr_ctx.pages, - PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + ctx_ptr = vmap(ch_ctx->gr_ctx->pages, + PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); if (!ctx_ptr) return -ENOMEM; @@ -2521,12 +2521,11 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) c->ch_ctx.global_ctx_buffer_mapped = false; } -static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, - struct channel_gk20a *c) +static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm) { + struct gr_ctx_desc *gr_ctx = NULL; struct gr_gk20a *gr = &g->gr; - struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx; - struct vm_gk20a *ch_vm = c->vm; struct device *d = dev_from_gk20a(g); struct sg_table *sgt; DEFINE_DMA_ATTRS(attrs); @@ -2542,12 +2541,18 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; + gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL); + if (!gr_ctx) + return -ENOMEM; + gr_ctx->size = gr->ctx_vars.buffer_total_size; dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size, &iova, GFP_KERNEL, &attrs); - if (!gr_ctx->pages) - return -ENOMEM; + if (!gr_ctx->pages) { + err = -ENOMEM; + goto err_free_ctx; + } gr_ctx->iova = iova; err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages, @@ -2555,7 +2560,7 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, if (err) goto err_free; - gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size, + gr_ctx->gpu_va = gk20a_gmmu_map(vm, &sgt, gr_ctx->size, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none); if (!gr_ctx->gpu_va) @@ -2563,6 +2568,8 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, gk20a_free_sgtable(&sgt); + *__gr_ctx = gr_ctx; + return 0; err_free_sgt: @@ -2572,30 +2579,74 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, gr_ctx->pages, gr_ctx->iova, &attrs); gr_ctx->pages = NULL; gr_ctx->iova = 0; + err_free_ctx: + kfree(gr_ctx); + gr_ctx = NULL; return err; } -static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) +static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, + struct tsg_gk20a *tsg) +{ + struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; + int err; + + if (!tsg->vm) { + gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n"); + return -ENOMEM; + } + + err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm); + if (err) + return err; + + return 0; +} + +static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, + struct channel_gk20a *c) +{ + struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; + int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm); + if (err) + return err; + + return 0; +} + +static void __gr_gk20a_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) { - struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - struct vm_gk20a *ch_vm = c->vm; - struct gk20a *g = c->g; struct device *d = dev_from_gk20a(g); DEFINE_DMA_ATTRS(attrs); gk20a_dbg_fn(""); - if (!ch_ctx->gr_ctx.gpu_va) + if (!gr_ctx || !gr_ctx->gpu_va) return; - gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va, - ch_ctx->gr_ctx.size, gk20a_mem_flag_none); + gk20a_gmmu_unmap(vm, gr_ctx->gpu_va, + gr_ctx->size, gk20a_mem_flag_none); dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); - dma_free_attrs(d, ch_ctx->gr_ctx.size, - ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs); - ch_ctx->gr_ctx.pages = NULL; - ch_ctx->gr_ctx.iova = 0; + dma_free_attrs(d, gr_ctx->size, + gr_ctx->pages, gr_ctx->iova, &attrs); + gr_ctx->pages = NULL; + gr_ctx->iova = 0; +} + +void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) +{ + if (!tsg->vm) { + gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n"); + return; + } + __gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); +} + +static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) +{ + __gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); } static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, @@ -2684,7 +2735,8 @@ void gk20a_free_channel_ctx(struct channel_gk20a *c) { gr_gk20a_unmap_global_ctx_buffers(c); gr_gk20a_free_channel_patch_ctx(c); - gr_gk20a_free_channel_gr_ctx(c); + if (!gk20a_is_channel_marked_as_tsg(c)) + gr_gk20a_free_channel_gr_ctx(c); /* zcull_ctx, pm_ctx */ @@ -2717,7 +2769,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, struct nvhost_alloc_obj_ctx_args *args) { struct gk20a *g = c->g; + struct fifo_gk20a *f = &g->fifo; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct tsg_gk20a *tsg = NULL; int err = 0; gk20a_dbg_fn(""); @@ -2736,27 +2790,44 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, err = -EINVAL; goto out; } + c->obj_class = args->class_num; + + if (gk20a_is_channel_marked_as_tsg(c)) + tsg = &f->tsg[c->tsgid]; /* allocate gr ctx buffer */ - if (ch_ctx->gr_ctx.pages == NULL) { - err = gr_gk20a_alloc_channel_gr_ctx(g, c); - if (err) { + if (!tsg) { + if (!ch_ctx->gr_ctx) { + err = gr_gk20a_alloc_channel_gr_ctx(g, c); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to allocate gr ctx buffer"); + goto out; + } + } else { + /*TBD: needs to be more subtle about which is + * being allocated as some are allowed to be + * allocated along same channel */ gk20a_err(dev_from_gk20a(g), - "fail to allocate gr ctx buffer"); + "too many classes alloc'd on same channel"); + err = -EINVAL; goto out; } - c->obj_class = args->class_num; } else { - /*TBD: needs to be more subtle about which is being allocated - * as some are allowed to be allocated along same channel */ - gk20a_err(dev_from_gk20a(g), - "too many classes alloc'd on same channel"); - err = -EINVAL; - goto out; + if (!tsg->tsg_gr_ctx) { + tsg->vm = c->vm; + err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg); + if (err) { + gk20a_err(dev_from_gk20a(g), + "fail to allocate TSG gr ctx buffer"); + goto out; + } + } + ch_ctx->gr_ctx = tsg->tsg_gr_ctx; } /* commit gr ctx buffer */ - err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); + err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va); if (err) { gk20a_err(dev_from_gk20a(g), "fail to commit gr ctx buffer"); @@ -6657,8 +6728,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, /* would have been a variant of gr_gk20a_apply_instmem_overrides */ /* recoded in-place instead.*/ - ctx_ptr = vmap(ch_ctx->gr_ctx.pages, - PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, + ctx_ptr = vmap(ch_ctx->gr_ctx->pages, + PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT, 0, pgprot_dmacoherent(PAGE_KERNEL)); if (!ctx_ptr) { err = -ENOMEM; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 05c27ffd..cae69ba6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -20,6 +20,7 @@ #include +#include "tsg_gk20a.h" #include "gr_ctx_gk20a.h" #define GR_IDLE_CHECK_DEFAULT 100 /* usec */ @@ -414,4 +415,6 @@ void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 **sm_dsm_perf_regs, u32 *perf_register_stride); int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); + +void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 7c65c695..d4ece147 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -165,6 +165,9 @@ int gk20a_tsg_dev_open(struct inode *inode, struct file *filp) tsg->g = g; tsg->num_runnable_channels = 0; + tsg->tsg_gr_ctx = NULL; + tsg->vm = NULL; + filp->private_data = tsg; gk20a_dbg(gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid); @@ -185,6 +188,13 @@ int gk20a_tsg_dev_release(struct inode *inode, struct file *filp) return -EBUSY; } + if (tsg->tsg_gr_ctx) { + gr_gk20a_free_tsg_gr_ctx(tsg); + tsg->tsg_gr_ctx = NULL; + } + if (tsg->vm) + tsg->vm = NULL; + release_used_tsg(&g->fifo, tsg); gk20a_dbg(gpu_dbg_fn, "tsg released %d\n", tsg->tsgid); diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 2530a4bd..63113b60 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -39,6 +39,10 @@ struct tsg_gk20a { struct list_head ch_runnable_list; int num_runnable_channels; struct mutex ch_list_lock; + + struct gr_ctx_desc *tsg_gr_ctx; + + struct vm_gk20a *vm; }; #endif /* __TSG_GK20A_H_ */ -- cgit v1.2.2