diff options
author | sujeet baranwal <sbaranwal@nvidia.com> | 2015-09-28 18:26:23 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-09-29 16:15:15 -0400 |
commit | ab93322b25c9dd6058fac6523f41571d77eeaeb9 (patch) | |
tree | ad403ae2dea3fe8842d0c60076ee59c4f5bcb95c /drivers | |
parent | 39e8bff2fc02b4037dc925076e5f42f6519101eb (diff) |
gpu: nvgpu: Add CDE bits in FECS header
In case of CDE channel, T1 (Tex) unit needs to be promoted to 128B
aligned, otherwise causes a HW deadlock. Gpu driver makes changes in
FECS header which FECS uses to configure the T1 promotions to aligned
128B accesses.
Bug 200096226
Change-Id: I8a8deaf6fb91f4bbceacd491db7eb6f7bca5001b
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Reviewed-on: http://git-master/r/804625
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h | 12 |
7 files changed, 31 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ddca39f3..cd4e71bf 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -493,6 +493,9 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | |||
493 | alloc_obj_ctx.class_num = required_class; | 493 | alloc_obj_ctx.class_num = required_class; |
494 | alloc_obj_ctx.flags = 0; | 494 | alloc_obj_ctx.flags = 0; |
495 | 495 | ||
496 | /* CDE enabled */ | ||
497 | cde_ctx->ch->cde = true; | ||
498 | |||
496 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); | 499 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); |
497 | if (err) { | 500 | if (err) { |
498 | gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d", | 501 | gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d", |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 70930291..9d74b412 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -100,6 +100,7 @@ struct channel_gk20a { | |||
100 | bool bound; | 100 | bool bound; |
101 | bool first_init; | 101 | bool first_init; |
102 | bool vpr; | 102 | bool vpr; |
103 | bool cde; | ||
103 | pid_t pid; | 104 | pid_t pid; |
104 | struct mutex ioctl_lock; | 105 | struct mutex ioctl_lock; |
105 | 106 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d734c21f..207239d1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -180,6 +180,7 @@ struct gpu_ops { | |||
180 | int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, | 180 | int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, |
181 | u32 expect_delay); | 181 | u32 expect_delay); |
182 | void (*init_cyclestats)(struct gk20a *g); | 182 | void (*init_cyclestats)(struct gk20a *g); |
183 | void (*enable_cde_in_fecs)(void *ctx_ptr); | ||
183 | } gr; | 184 | } gr; |
184 | const char *name; | 185 | const char *name; |
185 | struct { | 186 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index fdd18d23..672ea521 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1632,6 +1632,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1632 | if (!ctx_ptr) | 1632 | if (!ctx_ptr) |
1633 | return -ENOMEM; | 1633 | return -ENOMEM; |
1634 | 1634 | ||
1635 | if (g->ops.gr.enable_cde_in_fecs && c->cde) | ||
1636 | g->ops.gr.enable_cde_in_fecs(ctx_ptr); | ||
1637 | |||
1635 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | 1638 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) |
1636 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); | 1639 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); |
1637 | 1640 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h index 3d9095a8..4e3fae13 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ctxsw_prog_gk20a.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms and conditions of the GNU General Public License, | 5 | * under the terms and conditions of the GNU General Public License, |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 17b4b8ea..df7f2af9 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1052,6 +1052,15 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g) | |||
1052 | #endif | 1052 | #endif |
1053 | } | 1053 | } |
1054 | 1054 | ||
1055 | void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) | ||
1056 | { | ||
1057 | u32 cde_v; | ||
1058 | |||
1059 | cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); | ||
1060 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); | ||
1061 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); | ||
1062 | } | ||
1063 | |||
1055 | void gm20b_init_gr(struct gpu_ops *gops) | 1064 | void gm20b_init_gr(struct gpu_ops *gops) |
1056 | { | 1065 | { |
1057 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 1066 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -1107,4 +1116,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1107 | gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; | 1116 | gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; |
1108 | gops->gr.wait_empty = gr_gk20a_wait_idle; | 1117 | gops->gr.wait_empty = gr_gk20a_wait_idle; |
1109 | gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; | 1118 | gops->gr.init_cyclestats = gr_gm20b_init_cyclestats; |
1119 | gops->gr.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs; | ||
1110 | } | 1120 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h index cefd91e1..34f8a6a4 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ctxsw_prog_gm20b.h | |||
@@ -58,6 +58,18 @@ static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) | |||
58 | { | 58 | { |
59 | return 0x00000008; | 59 | return 0x00000008; |
60 | } | 60 | } |
61 | static inline u32 ctxsw_prog_main_image_ctl_o(void) | ||
62 | { | ||
63 | return 0x0000000c; | ||
64 | } | ||
65 | static inline u32 ctxsw_prog_main_image_ctl_cde_enabled_f(void) | ||
66 | { | ||
67 | return 0x400; | ||
68 | } | ||
69 | static inline u32 ctxsw_prog_main_image_ctl_cde_disabled_f(void) | ||
70 | { | ||
71 | return 0x0; | ||
72 | } | ||
61 | static inline u32 ctxsw_prog_main_image_patch_count_o(void) | 73 | static inline u32 ctxsw_prog_main_image_patch_count_o(void) |
62 | { | 74 | { |
63 | return 0x00000010; | 75 | return 0x00000010; |