From 0d9bb7f82e99a014d56d2662a67fc8efa86d398a Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 3 Dec 2014 16:13:39 +0200 Subject: gpu: nvgpu: Per-chip context creation Add HAL for context creation, and expose functions that T18x context creation needs. Bug 1517461 Bug 1521790 Bug 200063473 Change-Id: I63d1c52594e851570b677184a4585d402125a86d Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/660237 --- drivers/gpu/nvgpu/gk20a/gk20a.h | 10 ++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 65 +++++++++++++++----------------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 37 ++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 7 ++++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 3 ++ 5 files changed, 83 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ef43d29a..75c6ef89 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -144,6 +144,16 @@ struct gpu_ops { int (*add_zbc_depth)(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *depth_val, u32 index); u32 (*pagepool_default_size)(struct gk20a *g); + int (*init_ctx_state)(struct gk20a *g); + int (*alloc_gr_ctx)(struct gk20a *g, + struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + u32 padding); + void (*free_gr_ctx)(struct gk20a *g, + struct vm_gk20a *vm, + struct gr_ctx_desc *gr_ctx); + void (*update_ctxsw_preemption_mode)(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + void *ctx_ptr); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 867e775a..e9b39487 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -67,7 +67,7 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); /* channel gr ctx buffer */ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, - struct channel_gk20a *c); + struct channel_gk20a *c, u32 padding); static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); /* channel patch ctx buffer */ @@ -469,29 +469,7 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, /* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...) * We should replace most, if not all, fecs method calls to this instead. */ -struct fecs_method_op_gk20a { - struct { - u32 addr; - u32 data; - } method; - - struct { - u32 id; - u32 data; - u32 clr; - u32 *ret; - u32 ok; - u32 fail; - } mailbox; - - struct { - u32 ok; - u32 fail; - } cond; - -}; - -static int gr_gk20a_submit_fecs_method_op(struct gk20a *g, +int gr_gk20a_submit_fecs_method_op(struct gk20a *g, struct fecs_method_op_gk20a op) { struct gr_gk20a *gr = &g->gr; @@ -1649,6 +1627,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); + if (g->ops.gr.update_ctxsw_preemption_mode) + g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); vunmap(ctx_ptr); @@ -2198,7 +2178,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) return 0; } -static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr) +int gr_gk20a_init_ctx_state(struct gk20a *g) { u32 pm_ctx_image_size; u32 ret; @@ -2515,8 +2495,9 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) c->ch_ctx.global_ctx_buffer_mapped = false; } -static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g, - struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm) +int gr_gk20a_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + u32 padding) { struct gr_ctx_desc *gr_ctx = NULL; struct gr_gk20a *gr = &g->gr; @@ -2581,7 +2562,7 @@ static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g, } static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, - struct tsg_gk20a *tsg) + struct tsg_gk20a *tsg, u32 padding) { struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; int err; @@ -2591,7 +2572,7 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, return -ENOMEM; } - err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm); + err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, padding); if (err) return err; @@ -2599,18 +2580,19 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, } static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, - struct channel_gk20a *c) + struct channel_gk20a *c, + u32 padding) { struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; - int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm); + int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, padding); if (err) return err; return 0; } -static void __gr_gk20a_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) +void gr_gk20a_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) { struct device *d = dev_from_gk20a(g); DEFINE_DMA_ATTRS(attrs); @@ -2636,12 +2618,14 @@ void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n"); return; } - __gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); + tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); + tsg->tsg_gr_ctx = NULL; } static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) { - __gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); + c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); + c->ch_ctx.gr_ctx = NULL; } static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, @@ -2793,7 +2777,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, /* allocate gr ctx buffer */ if (!tsg) { if (!ch_ctx->gr_ctx) { - err = gr_gk20a_alloc_channel_gr_ctx(g, c); + err = gr_gk20a_alloc_channel_gr_ctx(g, c, + args->padding); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate gr ctx buffer"); @@ -2812,7 +2797,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, if (!tsg->tsg_gr_ctx) { tsg->vm = c->vm; gk20a_vm_get(tsg->vm); - err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg); + err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, args->padding); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate TSG gr ctx buffer"); @@ -4480,7 +4465,6 @@ static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g) static int gr_gk20a_init_ctxsw(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 err = 0; err = g->ops.gr.load_ctxsw_ucode(g); @@ -4493,7 +4477,7 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g) /* this appears query for sw states but fecs actually init ramchain, etc so this is hw init */ - err = gr_gk20a_init_ctx_state(g, gr); + err = g->ops.gr.init_ctx_state(g); if (err) goto out; @@ -7357,5 +7341,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size; + gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; + gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; + gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e5d315e5..309faf3b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -19,6 +19,9 @@ #define GR_GK20A_H #include +#ifdef CONFIG_ARCH_TEGRA_18x_SOC +#include "gr_t18x.h" +#endif #include "tsg_gk20a.h" #include "gr_ctx_gk20a.h" @@ -284,6 +287,10 @@ struct gr_gk20a { void (*remove_support)(struct gr_gk20a *gr); bool sw_ready; bool skip_ucode_init; + +#ifdef CONFIG_ARCH_TEGRA_18x_SOC + struct gr_t18x t18x; +#endif }; void gk20a_fecs_dump_falcon_stats(struct gk20a *g); @@ -336,6 +343,28 @@ struct gk20a_ctxsw_bootloader_desc { u32 entry_point; }; +struct fecs_method_op_gk20a { + struct { + u32 addr; + u32 data; + } method; + + struct { + u32 id; + u32 data; + u32 clr; + u32 *ret; + u32 ok; + u32 fail; + } mailbox; + + struct { + u32 ok; + u32 fail; + } cond; + +}; + struct gpu_ops; int gr_gk20a_load_golden_ctx_image(struct gk20a *g, struct channel_gk20a *c); @@ -462,4 +491,12 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *depth_val, u32 index); int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, u32 expect_delay); +int gr_gk20a_init_ctx_state(struct gk20a *g); +int gr_gk20a_submit_fecs_method_op(struct gk20a *g, + struct fecs_method_op_gk20a op); +int gr_gk20a_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + u32 padding); +void gr_gk20a_free_gr_ctx(struct gk20a *g, + struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 041c7edf..4dbde580 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -141,11 +141,18 @@ struct gr_ctx_buffer_desc { void *priv; }; +#ifdef CONFIG_ARCH_TEGRA_18x_SOC +#include "gr_t18x.h" +#endif + struct gr_ctx_desc { struct page **pages; u64 iova; size_t size; u64 gpu_va; +#ifdef CONFIG_ARCH_TEGRA_18x_SOC + struct gr_ctx_desc_t18x t18x; +#endif }; struct compbit_store_desc { diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 66b94943..cba51cd6 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -813,4 +813,7 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.add_zbc_color = gr_gk20a_add_zbc_color; gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; + gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; + gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; + gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; } -- cgit v1.2.2