gpu: nvgpu: add TSG support for engine context

All channels in a TSG need to share same engine context i.e. pointer in RAMFC of all channels in a TSG must point to same NV_RAMIN_GR_WFI_TARGET To get this, add a pointer to gr_ctx inside TSG struct so that TSG can maintain its own unique gr_ctx Also, change the type of gr_ctx in a channel to pointer variable so that if channel is part of TSG it can point to TSG's gr_ctx otherwise it will point to its own gr_ctx In gk20a_alloc_obj_ctx(), allocate gr_ctx as below : 1) If channel is not part of any TSG - allocate its own gr_ctx buffer if it is already not allocated 2) If channel is part of TSG - Check if TSG has already allocated gr_ctx (as part of TSG) - If yes, channel's gr_ctx will point to that of TSG's - If not, then it means channels is first to be bounded to this TSG - And in this case we will allocate new gr_ctx on TSG first and then make channel's gr_ctx to point to this gr_ctx Also, gr_ctx will be released as below ; 1) If channels is not part of TSG, then it will be released when channels is closed 2) Otherwise, it will be released when TSG itself is closed Bug 1470692 Change-Id: Id347217d5b462e0e972cd3d79d17795b37034a50 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/417065 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2014-06-18 09:02:03 -0400
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:10:17 -0400
commit: ee66559a0b3b82b3dc9be684261ddd0954731ff5 (patch)
tree: 34156c1d4f3393a5a5fe945185b8548ae2427a07 /drivers
parent: b6466fbe07d28fcc1a2ea93715a1f88b48dd8550 (diff)
5 files changed, 131 insertions, 43 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4d236a70..21949012 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -56,7 +56,7 @@ struct fence {
 /* contexts associated with a channel */
 struct channel_ctx_gk20a {
-        struct gr_ctx_desc      gr_ctx;
+        struct gr_ctx_desc      *gr_ctx;
        struct pm_ctx_desc      pm_ctx;
        struct patch_desc       patch_ctx;
        struct zcull_ctx_desc   zcull_ctx;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 0e178e9e..4a6dd6c5 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -801,8 +801,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
        gk20a_dbg_fn("");
-        ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+        ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
-                        PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+                        PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
                        0, pgprot_dmacoherent(PAGE_KERNEL));
        if (!ctx_ptr)
                return -ENOMEM;
@@ -1562,8 +1562,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
        if (!gold_ptr)
                goto clean_up;
-        ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+        ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
-                        PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+                        PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
                        0, pgprot_dmacoherent(PAGE_KERNEL));
        if (!ctx_ptr)
                goto clean_up;
@@ -1602,7 +1602,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
                                gk20a_mem_rd32(gold_ptr, i);
        }
-        gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
+        gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
        gr->ctx_vars.golden_image_initialized = true;
@@ -1636,8 +1636,8 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
           Flush and invalidate before cpu update. */
        gk20a_mm_l2_flush(g, true);
-        ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+        ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
-                        PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+                        PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
                        0, pgprot_dmacoherent(PAGE_KERNEL));
        if (!ctx_ptr)
                return -ENOMEM;
@@ -1676,8 +1676,8 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
           Flush and invalidate before cpu update. */
        gk20a_mm_l2_flush(g, true);
-        ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+        ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
-                        PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+                        PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
                        0, pgprot_dmacoherent(PAGE_KERNEL));
        if (!ctx_ptr)
                return -ENOMEM;
@@ -2521,12 +2521,11 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
        c->ch_ctx.global_ctx_buffer_mapped = false;
 }
-static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
+static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g,
-                                struct channel_gk20a *c)
+                struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm)
 {
+        struct gr_ctx_desc *gr_ctx = NULL;
        struct gr_gk20a *gr = &g->gr;
-        struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
-        struct vm_gk20a *ch_vm = c->vm;
        struct device *d = dev_from_gk20a(g);
        struct sg_table *sgt;
        DEFINE_DMA_ATTRS(attrs);
@@ -2542,12 +2541,18 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
        gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
        gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
+        gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
+        if (!gr_ctx)
+                return -ENOMEM;
        gr_ctx->size = gr->ctx_vars.buffer_total_size;
        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
        gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size,
                                &iova, GFP_KERNEL, &attrs);
-        if (!gr_ctx->pages)
+        if (!gr_ctx->pages) {
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto err_free_ctx;
+        }
        gr_ctx->iova = iova;
        err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages,
@@ -2555,7 +2560,7 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
        if (err)
                goto err_free;
-        gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size,
+        gr_ctx->gpu_va = gk20a_gmmu_map(vm, &sgt, gr_ctx->size,
                                NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
                                gk20a_mem_flag_none);
        if (!gr_ctx->gpu_va)
@@ -2563,6 +2568,8 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
        gk20a_free_sgtable(&sgt);
+        *__gr_ctx = gr_ctx;
        return 0;
 err_free_sgt:
@@ -2572,30 +2579,74 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
                gr_ctx->pages, gr_ctx->iova, &attrs);
        gr_ctx->pages = NULL;
        gr_ctx->iova = 0;
+ err_free_ctx:
+        kfree(gr_ctx);
+        gr_ctx = NULL;
        return err;
 }
-static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
+static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
+                        struct tsg_gk20a *tsg)
+{
+        struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx;
+        int err;
+        if (!tsg->vm) {
+                gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
+                return -ENOMEM;
+        }
+        err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm);
+        if (err)
+                return err;
+        return 0;
+}
+static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
+                                struct channel_gk20a *c)
+{
+        struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx;
+        int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm);
+        if (err)
+                return err;
+        return 0;
+}
+static void __gr_gk20a_free_gr_ctx(struct gk20a *g,
+        struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx)
 {
-        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
-        struct vm_gk20a *ch_vm = c->vm;
-        struct gk20a *g = c->g;
        struct device *d = dev_from_gk20a(g);
        DEFINE_DMA_ATTRS(attrs);
        gk20a_dbg_fn("");
-        if (!ch_ctx->gr_ctx.gpu_va)
+        if (!gr_ctx || !gr_ctx->gpu_va)
                return;
-        gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va,
+        gk20a_gmmu_unmap(vm, gr_ctx->gpu_va,
-                        ch_ctx->gr_ctx.size, gk20a_mem_flag_none);
+                        gr_ctx->size, gk20a_mem_flag_none);
        dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
-        dma_free_attrs(d, ch_ctx->gr_ctx.size,
+        dma_free_attrs(d, gr_ctx->size,
-                ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs);
+                gr_ctx->pages, gr_ctx->iova, &attrs);
-        ch_ctx->gr_ctx.pages = NULL;
+        gr_ctx->pages = NULL;
-        ch_ctx->gr_ctx.iova = 0;
+        gr_ctx->iova = 0;
+}
+void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
+{
+        if (!tsg->vm) {
+                gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
+                return;
+        }
+        __gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx);
+}
+static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
+{
+        __gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
 }
 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
@@ -2684,7 +2735,8 @@ void gk20a_free_channel_ctx(struct channel_gk20a *c)
 {
        gr_gk20a_unmap_global_ctx_buffers(c);
        gr_gk20a_free_channel_patch_ctx(c);
-        gr_gk20a_free_channel_gr_ctx(c);
+        if (!gk20a_is_channel_marked_as_tsg(c))
+                gr_gk20a_free_channel_gr_ctx(c);
        /* zcull_ctx, pm_ctx */
@@ -2717,7 +2769,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
                        struct nvhost_alloc_obj_ctx_args *args)
 {
        struct gk20a *g = c->g;
+        struct fifo_gk20a *f = &g->fifo;
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+        struct tsg_gk20a *tsg = NULL;
        int err = 0;
        gk20a_dbg_fn("");
@@ -2736,27 +2790,44 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
                err = -EINVAL;
                goto out;
        }
+        c->obj_class = args->class_num;
+        if (gk20a_is_channel_marked_as_tsg(c))
+                tsg = &f->tsg[c->tsgid];
        /* allocate gr ctx buffer */
-        if (ch_ctx->gr_ctx.pages == NULL) {
+        if (!tsg) {
-                err = gr_gk20a_alloc_channel_gr_ctx(g, c);
+                if (!ch_ctx->gr_ctx) {
-                if (err) {
+                        err = gr_gk20a_alloc_channel_gr_ctx(g, c);
+                        if (err) {
+                                gk20a_err(dev_from_gk20a(g),
+                                        "fail to allocate gr ctx buffer");
+                                goto out;
+                        }
+                } else {
+                        /*TBD: needs to be more subtle about which is
+                         * being allocated as some are allowed to be
+                         * allocated along same channel */
                        gk20a_err(dev_from_gk20a(g),
-                                "fail to allocate gr ctx buffer");
+                                "too many classes alloc'd on same channel");
+                        err = -EINVAL;
                        goto out;
                }
-                c->obj_class = args->class_num;
        } else {
-                /*TBD: needs to be more subtle about which is being allocated
+                if (!tsg->tsg_gr_ctx) {
-                * as some are allowed to be allocated along same channel */
+                        tsg->vm = c->vm;
-                gk20a_err(dev_from_gk20a(g),
+                        err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg);
-                        "too many classes alloc'd on same channel");
+                        if (err) {
-                err = -EINVAL;
+                                gk20a_err(dev_from_gk20a(g),
-                goto out;
+                                        "fail to allocate TSG gr ctx buffer");
+                                goto out;
+                        }
+                }
+                ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
        }
        /* commit gr ctx buffer */
-        err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
+        err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
        if (err) {
                gk20a_err(dev_from_gk20a(g),
                        "fail to commit gr ctx buffer");
@@ -6657,8 +6728,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
        /* would have been a variant of gr_gk20a_apply_instmem_overrides */
        /* recoded in-place instead.*/
-        ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+        ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
-                        PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+                        PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
                        0, pgprot_dmacoherent(PAGE_KERNEL));
        if (!ctx_ptr) {
                err = -ENOMEM;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 05c27ffd..cae69ba6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
+#include "tsg_gk20a.h"
 #include "gr_ctx_gk20a.h"
 #define GR_IDLE_CHECK_DEFAULT           100 /* usec */
@@ -414,4 +415,6 @@ void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
                                        u32 **sm_dsm_perf_regs,
                                        u32 *perf_register_stride);
 int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
+void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 7c65c695..d4ece147 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -165,6 +165,9 @@ int gk20a_tsg_dev_open(struct inode *inode, struct file *filp)
        tsg->g = g;
        tsg->num_runnable_channels = 0;
+        tsg->tsg_gr_ctx = NULL;
+        tsg->vm = NULL;
        filp->private_data = tsg;
        gk20a_dbg(gpu_dbg_fn, "tsg opened %d\n", tsg->tsgid);
@@ -185,6 +188,13 @@ int gk20a_tsg_dev_release(struct inode *inode, struct file *filp)
                return -EBUSY;
        }
+        if (tsg->tsg_gr_ctx) {
+                gr_gk20a_free_tsg_gr_ctx(tsg);
+                tsg->tsg_gr_ctx = NULL;
+        }
+        if (tsg->vm)
+                tsg->vm = NULL;
        release_used_tsg(&g->fifo, tsg);
        gk20a_dbg(gpu_dbg_fn, "tsg released %d\n", tsg->tsgid);
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index 2530a4bd..63113b60 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -39,6 +39,10 @@ struct tsg_gk20a {
        struct list_head ch_runnable_list;
        int num_runnable_channels;
        struct mutex ch_list_lock;
+        struct gr_ctx_desc *tsg_gr_ctx;
+        struct vm_gk20a *vm;
 };
 #endif /* __TSG_GK20A_H_ */
author	Deepak Nibade <dnibade@nvidia.com>	2014-06-18 09:02:03 -0400
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:10:17 -0400
commit	ee66559a0b3b82b3dc9be684261ddd0954731ff5 (patch)
tree	34156c1d4f3393a5a5fe945185b8548ae2427a07 /drivers
parent	b6466fbe07d28fcc1a2ea93715a1f88b48dd8550 (diff)