From ab93322b25c9dd6058fac6523f41571d77eeaeb9 Mon Sep 17 00:00:00 2001 From: sujeet baranwal Date: Mon, 28 Sep 2015 15:26:23 -0700 Subject: gpu: nvgpu: Add CDE bits in FECS header In case of CDE channel, T1 (Tex) unit needs to be promoted to 128B aligned, otherwise causes a HW deadlock. Gpu driver makes changes in FECS header which FECS uses to configure the T1 promotions to aligned 128B accesses. Bug 200096226 Change-Id: I8a8deaf6fb91f4bbceacd491db7eb6f7bca5001b Signed-off-by: sujeet baranwal Reviewed-by: Terje Bergstrom Signed-off-by: sujeet baranwal Reviewed-on: http://git-master/r/804625 Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 3 +++ drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 3 +++ drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | 2 +- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 10 ++++++++++ drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h | 12 ++++++++++++ 7 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ddca39f3..cd4e71bf 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -493,6 +493,9 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, alloc_obj_ctx.class_num = required_class; alloc_obj_ctx.flags = 0; + /* CDE enabled */ + cde_ctx->ch->cde = true; + err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); if (err) { gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d", diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 70930291..9d74b412 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -100,6 +100,7 @@ struct channel_gk20a { bool bound; bool first_init; bool vpr; + bool cde; pid_t pid; struct mutex ioctl_lock; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d734c21f..207239d1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -180,6 +180,7 @@ struct gpu_ops { int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, u32 expect_delay); void (*init_cyclestats)(struct gk20a *g); + void (*enable_cde_in_fecs)(void *ctx_ptr); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index fdd18d23..672ea521 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1632,6 +1632,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, if (!ctx_ptr) return -ENOMEM; + if (g->ops.gr.enable_cde_in_fecs && c->cde) + g->ops.gr.enable_cde_in_fecs(ctx_ptr); + for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h index 3d9095a8..4e3fae13 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 17b4b8ea..df7f2af9 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1052,6 +1052,15 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g) #endif } +void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) +{ + u32 cde_v; + + cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); + cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); + gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); +} + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -1107,4 +1116,5 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; gops->gr.wait_empty = gr_gk20a_wait_idle; gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; + gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h index cefd91e1..34f8a6a4 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h @@ -58,6 +58,18 @@ static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008; } +static inline u32 ctxsw_prog_main_image_ctl_o(void) +{ + return 0x0000000c; +} +static inline u32 ctxsw_prog_main_image_ctl_cde_enabled_f(void) +{ + return 0x400; +} +static inline u32 ctxsw_prog_main_image_ctl_cde_disabled_f(void) +{ + return 0x0; +} static inline u32 ctxsw_prog_main_image_patch_count_o(void) { return 0x00000010; -- cgit v1.2.2