From 5df3d09e16c9d2f413cea53d16bc8ca42ae42d6e Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Tue, 9 Dec 2014 10:04:05 +0200 Subject: gpu: nvgpu: gm20b: Enable CTA preemption CTA preemption needs to be enabled by setting a value in context. Set it for gm20b. Bug 200063473 Bug 1517461 Change-Id: I080cd71b348d08f834fd23ebbe7443dba79224db Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/661299 --- drivers/gpu/nvgpu/gk20a/gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 16 +++++++++----- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 ++++ drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 44 +++++++++++++++++++++++++++++++++++++- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9bb890ca..184ef168 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -151,7 +151,7 @@ struct gpu_ops { int (*init_ctx_state)(struct gk20a *g); int (*alloc_gr_ctx)(struct gk20a *g, struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, - u32 padding); + u32 class, u32 padding); void (*free_gr_ctx)(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4f6c885c..37cccba3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -67,7 +67,8 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); /* channel gr ctx buffer */ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, - struct channel_gk20a *c, u32 padding); + struct channel_gk20a *c, + u32 class, u32 padding); static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); /* channel patch ctx buffer */ @@ -2486,6 +2487,7 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) int gr_gk20a_alloc_gr_ctx(struct gk20a *g, struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, + u32 class, u32 padding) { struct gr_ctx_desc *gr_ctx = NULL; @@ -2551,7 +2553,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, } static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, - struct tsg_gk20a *tsg, u32 padding) + struct tsg_gk20a *tsg, u32 class, u32 padding) { struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; int err; @@ -2561,7 +2563,7 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, return -ENOMEM; } - err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, padding); + err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, tsg->vm, class, padding); if (err) return err; @@ -2570,10 +2572,11 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, struct channel_gk20a *c, + u32 class, u32 padding) { struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; - int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, padding); + int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding); if (err) return err; @@ -2767,6 +2770,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, if (!tsg) { if (!ch_ctx->gr_ctx) { err = gr_gk20a_alloc_channel_gr_ctx(g, c, + args->class_num, args->padding); if (err) { gk20a_err(dev_from_gk20a(g), @@ -2786,7 +2790,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, if (!tsg->tsg_gr_ctx) { tsg->vm = c->vm; gk20a_vm_get(tsg->vm); - err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, args->padding); + err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, + args->class_num, + args->padding); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate TSG gr ctx buffer"); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 309faf3b..f130b830 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -496,7 +496,7 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, struct fecs_method_op_gk20a op); int gr_gk20a_alloc_gr_ctx(struct gk20a *g, struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, - u32 padding); + u32 class, u32 padding); void gr_gk20a_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d3ee8670..04f9446b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -150,11 +150,15 @@ struct gr_ctx_desc { u64 iova; size_t size; u64 gpu_va; + int preempt_mode; #ifdef CONFIG_ARCH_TEGRA_18x_SOC struct gr_ctx_desc_t18x t18x; #endif }; +#define NVGPU_GR_PREEMPTION_MODE_WFI 0 +#define NVGPU_GR_PREEMPTION_MODE_CTA 2 + struct compbit_store_desc { struct page **pages; struct sg_table *sgt; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index cba51cd6..5f544819 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -775,6 +775,46 @@ static u32 gr_gm20b_pagepool_default_size(struct gk20a *g) return gr_scc_pagepool_total_pages_hwmax_value_v(); } +int gr_gm20b_alloc_gr_ctx(struct gk20a *g, + struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, + u32 class, + u32 flags) +{ + int err; + + gk20a_dbg_fn(""); + + err = gr_gk20a_alloc_gr_ctx(g, gr_ctx, vm, class, flags); + if (err) + return err; + + if (class == MAXWELL_COMPUTE_B) + (*gr_ctx)->preempt_mode = NVGPU_GR_PREEMPTION_MODE_CTA; + + gk20a_dbg_fn("done"); + + return 0; +} + +static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, + struct channel_ctx_gk20a *ch_ctx, + void *ctx_ptr) +{ + struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; + u32 cta_preempt_option = + ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); + + gk20a_dbg_fn(""); + + if (gr_ctx->preempt_mode == NVGPU_GR_PREEMPTION_MODE_CTA) { + gk20a_dbg_info("CTA: %x", cta_preempt_option); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0, + cta_preempt_option); + } + + gk20a_dbg_fn("done"); +} + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -814,6 +854,8 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; - gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; + gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx; gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; + gops->gr.update_ctxsw_preemption_mode = + gr_gm20b_update_ctxsw_preemption_mode; } -- cgit v1.2.2