gpu: nvgpu: Per-chip context creation

Add HAL for context creation, and expose functions that T18x context creation needs. Bug 1517461 Bug 1521790 Bug 200063473 Change-Id: I63d1c52594e851570b677184a4585d402125a86d Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/660237
author: Terje Bergstrom <tbergstrom@nvidia.com> 2014-12-03 09:13:39 -0500
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-03-18 15:12:27 -0400
commit: 0d9bb7f82e99a014d56d2662a67fc8efa86d398a (patch)
tree: e0ff8d4663dfc32fbdf061a6b04db5ec05459c72 /drivers/gpu
parent: 5477d0f4c226847fe030ad00425e00206118b0d6 (diff)
5 files changed, 83 insertions, 39 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ef43d29a..75c6ef89 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -144,6 +144,16 @@ struct gpu_ops {
                int (*add_zbc_depth)(struct gk20a *g, struct gr_gk20a *gr,
                                  struct zbc_entry *depth_val, u32 index);
                u32 (*pagepool_default_size)(struct gk20a *g);
+                int (*init_ctx_state)(struct gk20a *g);
+                int (*alloc_gr_ctx)(struct gk20a *g,
+                          struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm,
+                          u32 padding);
+                void (*free_gr_ctx)(struct gk20a *g,
+                          struct vm_gk20a *vm,
+                          struct gr_ctx_desc *gr_ctx);
+                void (*update_ctxsw_preemption_mode)(struct gk20a *g,
+                                struct channel_ctx_gk20a *ch_ctx,
+                                void *ctx_ptr);
        } gr;
        const char *name;
        struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 867e775a..e9b39487 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -67,7 +67,7 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
 /* channel gr ctx buffer */
 static int  gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
-                                        struct channel_gk20a *c);
+                                        struct channel_gk20a *c, u32 padding);
 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
 /* channel patch ctx buffer */
@@ -469,29 +469,7 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
 /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
 * We should replace most, if not all, fecs method calls to this instead. */
-struct fecs_method_op_gk20a {
+int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
-        struct {
-                u32 addr;
-                u32 data;
-        } method;
-        struct {
-                u32 id;
-                u32 data;
-                u32 clr;
-                u32 *ret;
-                u32 ok;
-                u32 fail;
-        } mailbox;
-        struct {
-                u32 ok;
-                u32 fail;
-        } cond;
-};
-static int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
                                   struct fecs_method_op_gk20a op)
 {
        struct gr_gk20a *gr = &g->gr;
@@ -1649,6 +1627,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
        gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v);
+        if (g->ops.gr.update_ctxsw_preemption_mode)
+                g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr);
        vunmap(ctx_ptr);
@@ -2198,7 +2178,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
        return 0;
 }
-static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
+int gr_gk20a_init_ctx_state(struct gk20a *g)
 {
        u32 pm_ctx_image_size;
        u32 ret;
@@ -2515,8 +2495,9 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
        c->ch_ctx.global_ctx_buffer_mapped = false;
 }
-static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g,
+int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
-                struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm)
+                          struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm,
+                          u32 padding)
 {
        struct gr_ctx_desc *gr_ctx = NULL;
        struct gr_gk20a *gr = &g->gr;
@@ -2581,7 +2562,7 @@ static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g,
 }
 static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
-                        struct tsg_gk20a *tsg)
+                        struct tsg_gk20a *tsg, u32 padding)
 {
        struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx;
        int err;
@@ -2591,7 +2572,7 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
                return -ENOMEM;
        }
-        err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm);
+        err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, padding);
        if (err)
                return err;
@@ -2599,18 +2580,19 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
 }
 static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
-                                struct channel_gk20a *c)
+                                struct channel_gk20a *c,
+                                u32 padding)
 {
        struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx;
-        int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm);
+        int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, padding);
        if (err)
                return err;
        return 0;
 }
-static void __gr_gk20a_free_gr_ctx(struct gk20a *g,
+void gr_gk20a_free_gr_ctx(struct gk20a *g,
-        struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx)
+                          struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx)
 {
        struct device *d = dev_from_gk20a(g);
        DEFINE_DMA_ATTRS(attrs);
@@ -2636,12 +2618,14 @@ void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
                gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
                return;
        }
-        __gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx);
+        tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx);
+        tsg->tsg_gr_ctx = NULL;
 }
 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
 {
-        __gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
+        c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
+        c->ch_ctx.gr_ctx = NULL;
 }
 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
@@ -2793,7 +2777,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
        /* allocate gr ctx buffer */
        if (!tsg) {
                if (!ch_ctx->gr_ctx) {
-                        err = gr_gk20a_alloc_channel_gr_ctx(g, c);
+                        err = gr_gk20a_alloc_channel_gr_ctx(g, c,
+                                                            args->padding);
                        if (err) {
                                gk20a_err(dev_from_gk20a(g),
                                        "fail to allocate gr ctx buffer");
@@ -2812,7 +2797,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
                if (!tsg->tsg_gr_ctx) {
                        tsg->vm = c->vm;
                        gk20a_vm_get(tsg->vm);
-                        err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg);
+                        err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, args->padding);
                        if (err) {
                                gk20a_err(dev_from_gk20a(g),
                                        "fail to allocate TSG gr ctx buffer");
@@ -4480,7 +4465,6 @@ static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g)
 static int gr_gk20a_init_ctxsw(struct gk20a *g)
 {
-        struct gr_gk20a *gr = &g->gr;
        u32 err = 0;
        err = g->ops.gr.load_ctxsw_ucode(g);
@@ -4493,7 +4477,7 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g)
        /* this appears query for sw states but fecs actually init
           ramchain, etc so this is hw init */
-        err = gr_gk20a_init_ctx_state(g, gr);
+        err = g->ops.gr.init_ctx_state(g);
        if (err)
                goto out;
@@ -7357,5 +7341,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.add_zbc_color = gr_gk20a_add_zbc_color;
        gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth;
        gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size;
+        gops->gr.init_ctx_state = gr_gk20a_init_ctx_state;
+        gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx;
+        gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index e5d315e5..309faf3b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -19,6 +19,9 @@
 #define GR_GK20A_H
 #include <linux/slab.h>
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+#include "gr_t18x.h"
+#endif
 #include "tsg_gk20a.h"
 #include "gr_ctx_gk20a.h"
@@ -284,6 +287,10 @@ struct gr_gk20a {
        void (*remove_support)(struct gr_gk20a *gr);
        bool sw_ready;
        bool skip_ucode_init;
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        struct gr_t18x t18x;
+#endif
 };
 void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
@@ -336,6 +343,28 @@ struct gk20a_ctxsw_bootloader_desc {
        u32 entry_point;
 };
+struct fecs_method_op_gk20a {
+        struct {
+                u32 addr;
+                u32 data;
+        } method;
+        struct {
+                u32 id;
+                u32 data;
+                u32 clr;
+                u32 *ret;
+                u32 ok;
+                u32 fail;
+        } mailbox;
+        struct {
+                u32 ok;
+                u32 fail;
+        } cond;
+};
 struct gpu_ops;
 int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                                        struct channel_gk20a *c);
@@ -462,4 +491,12 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
                           struct zbc_entry *depth_val, u32 index);
 int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
                       u32 expect_delay);
+int gr_gk20a_init_ctx_state(struct gk20a *g);
+int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
+                                   struct fecs_method_op_gk20a op);
+int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
+                          struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm,
+                          u32 padding);
+void gr_gk20a_free_gr_ctx(struct gk20a *g,
+                          struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 041c7edf..4dbde580 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -141,11 +141,18 @@ struct gr_ctx_buffer_desc {
        void *priv;
 };
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+#include "gr_t18x.h"
+#endif
 struct gr_ctx_desc {
        struct page **pages;
        u64 iova;
        size_t size;
        u64 gpu_va;
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        struct gr_ctx_desc_t18x t18x;
+#endif
 };
 struct compbit_store_desc {
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 66b94943..cba51cd6 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -813,4 +813,7 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.add_zbc_color = gr_gk20a_add_zbc_color;
        gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth;
        gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size;
+        gops->gr.init_ctx_state = gr_gk20a_init_ctx_state;
+        gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx;
+        gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx;
 }
author	Terje Bergstrom <tbergstrom@nvidia.com>	2014-12-03 09:13:39 -0500
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-03-18 15:12:27 -0400
commit	0d9bb7f82e99a014d56d2662a67fc8efa86d398a (patch)
tree	e0ff8d4663dfc32fbdf061a6b04db5ec05459c72 /drivers/gpu
parent	5477d0f4c226847fe030ad00425e00206118b0d6 (diff)