summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-09-07 13:43:47 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-11 18:10:52 -0400
commitc37c9baae65bcf0ef08a319488c09f57131026cc (patch)
treee3b4252bfff7436574a909dd625de49229d538da
parent17451138cf60f5d64eed88cc5defd44981926d9d (diff)
gpu: nvgpu: Move CDE code to Linux module
CDE is only used in Linux platforms, and the code is highly dependent on Linux APIs. Move the common CDE code to Linux module and leave only the chip specific parts to HAL. Change-Id: I507fe7eceaf7607303dfdddcf438449a5f582ea7 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554755 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu2
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c (renamed from drivers/gpu/nvgpu/gk20a/cde_gk20a.c)238
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.h (renamed from drivers/gpu/nvgpu/gk20a/cde_gk20a.h)16
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_linux.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gm20b/cde_gm20b.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/cde_gm20b.h1
-rw-r--r--drivers/gpu/nvgpu/gp10b/cde_gp10b.c5
-rw-r--r--drivers/gpu/nvgpu/gp10b/cde_gp10b.h1
13 files changed, 171 insertions, 136 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 25545f29..87199316 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -45,6 +45,7 @@ nvgpu-y := \
45 common/linux/vm.o \ 45 common/linux/vm.o \
46 common/linux/intr.o \ 46 common/linux/intr.o \
47 common/linux/sysfs.o \ 47 common/linux/sysfs.o \
48 common/linux/cde.o \
48 common/mm/nvgpu_allocator.o \ 49 common/mm/nvgpu_allocator.o \
49 common/mm/bitmap_allocator.o \ 50 common/mm/bitmap_allocator.o \
50 common/mm/buddy_allocator.o \ 51 common/mm/buddy_allocator.o \
@@ -92,7 +93,6 @@ nvgpu-y := \
92 gk20a/ltc_gk20a.o \ 93 gk20a/ltc_gk20a.o \
93 gk20a/fb_gk20a.o \ 94 gk20a/fb_gk20a.o \
94 gk20a/hal.o \ 95 gk20a/hal.o \
95 gk20a/cde_gk20a.o \
96 gk20a/tsg_gk20a.o \ 96 gk20a/tsg_gk20a.o \
97 gk20a/ctxsw_trace_gk20a.o \ 97 gk20a/ctxsw_trace_gk20a.o \
98 gk20a/fecs_trace_gk20a.o \ 98 gk20a/fecs_trace_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/common/linux/cde.c
index 506207f2..5b0fb910 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -31,13 +31,14 @@
31#include <nvgpu/bug.h> 31#include <nvgpu/bug.h>
32#include <nvgpu/firmware.h> 32#include <nvgpu/firmware.h>
33 33
34#include "gk20a.h" 34#include "gk20a/gk20a.h"
35#include "channel_gk20a.h" 35#include "gk20a/channel_gk20a.h"
36#include "mm_gk20a.h" 36#include "gk20a/mm_gk20a.h"
37#include "cde_gk20a.h" 37#include "gk20a/fence_gk20a.h"
38#include "fence_gk20a.h" 38#include "gk20a/gr_gk20a.h"
39#include "gr_gk20a.h" 39
40#include "common/linux/os_linux.h" 40#include "cde.h"
41#include "os_linux.h"
41 42
42#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> 43#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 44#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -49,7 +50,7 @@
49#include "common/linux/vm_priv.h" 50#include "common/linux/vm_priv.h"
50 51
51static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); 52static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
52static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); 53static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
53 54
54#define CTX_DELETE_TIME 1000 55#define CTX_DELETE_TIME 1000
55 56
@@ -65,7 +66,7 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
65 nvgpu_dma_unmap_free(cde_ctx->vm, mem); 66 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
66 } 67 }
67 68
68 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); 69 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
69 70
70 cde_ctx->convert_cmd = NULL; 71 cde_ctx->convert_cmd = NULL;
71 cde_ctx->init_convert_cmd = NULL; 72 cde_ctx->init_convert_cmd = NULL;
@@ -79,7 +80,8 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
79static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) 80static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
80__must_hold(&cde_app->mutex) 81__must_hold(&cde_app->mutex)
81{ 82{
82 struct gk20a *g = cde_ctx->g; 83 struct nvgpu_os_linux *l = cde_ctx->l;
84 struct gk20a *g = &l->g;
83 struct channel_gk20a *ch = cde_ctx->ch; 85 struct channel_gk20a *ch = cde_ctx->ch;
84 struct vm_gk20a *vm = ch->vm; 86 struct vm_gk20a *vm = ch->vm;
85 87
@@ -95,7 +97,7 @@ __must_hold(&cde_app->mutex)
95 97
96 /* housekeeping on app */ 98 /* housekeeping on app */
97 nvgpu_list_del(&cde_ctx->list); 99 nvgpu_list_del(&cde_ctx->list);
98 cde_ctx->g->cde_app.ctx_count--; 100 l->cde_app.ctx_count--;
99 nvgpu_kfree(g, cde_ctx); 101 nvgpu_kfree(g, cde_ctx);
100} 102}
101 103
@@ -104,7 +106,7 @@ static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
104__releases(&cde_app->mutex) 106__releases(&cde_app->mutex)
105__acquires(&cde_app->mutex) 107__acquires(&cde_app->mutex)
106{ 108{
107 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 109 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
108 110
109 /* permanent contexts do not have deleter works */ 111 /* permanent contexts do not have deleter works */
110 if (!cde_ctx->is_temporary) 112 if (!cde_ctx->is_temporary)
@@ -119,10 +121,10 @@ __acquires(&cde_app->mutex)
119 } 121 }
120} 122}
121 123
122static void gk20a_cde_remove_contexts(struct gk20a *g) 124static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
123__must_hold(&cde_app->mutex) 125__must_hold(&l->cde_app->mutex)
124{ 126{
125 struct gk20a_cde_app *cde_app = &g->cde_app; 127 struct gk20a_cde_app *cde_app = &l->cde_app;
126 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; 128 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
127 129
128 /* safe to go off the mutex in cancel_deleter since app is 130 /* safe to go off the mutex in cancel_deleter since app is
@@ -142,38 +144,38 @@ __must_hold(&cde_app->mutex)
142 } 144 }
143} 145}
144 146
145static void gk20a_cde_stop(struct gk20a *g) 147static void gk20a_cde_stop(struct nvgpu_os_linux *l)
146__must_hold(&cde_app->mutex) 148__must_hold(&l->cde_app->mutex)
147{ 149{
148 struct gk20a_cde_app *cde_app = &g->cde_app; 150 struct gk20a_cde_app *cde_app = &l->cde_app;
149 151
150 /* prevent further conversions and delayed works from working */ 152 /* prevent further conversions and delayed works from working */
151 cde_app->initialised = false; 153 cde_app->initialised = false;
152 /* free all data, empty the list */ 154 /* free all data, empty the list */
153 gk20a_cde_remove_contexts(g); 155 gk20a_cde_remove_contexts(l);
154} 156}
155 157
156void gk20a_cde_destroy(struct gk20a *g) 158void gk20a_cde_destroy(struct nvgpu_os_linux *l)
157__acquires(&cde_app->mutex) 159__acquires(&l->cde_app->mutex)
158__releases(&cde_app->mutex) 160__releases(&l->cde_app->mutex)
159{ 161{
160 struct gk20a_cde_app *cde_app = &g->cde_app; 162 struct gk20a_cde_app *cde_app = &l->cde_app;
161 163
162 if (!cde_app->initialised) 164 if (!cde_app->initialised)
163 return; 165 return;
164 166
165 nvgpu_mutex_acquire(&cde_app->mutex); 167 nvgpu_mutex_acquire(&cde_app->mutex);
166 gk20a_cde_stop(g); 168 gk20a_cde_stop(l);
167 nvgpu_mutex_release(&cde_app->mutex); 169 nvgpu_mutex_release(&cde_app->mutex);
168 170
169 nvgpu_mutex_destroy(&cde_app->mutex); 171 nvgpu_mutex_destroy(&cde_app->mutex);
170} 172}
171 173
172void gk20a_cde_suspend(struct gk20a *g) 174void gk20a_cde_suspend(struct nvgpu_os_linux *l)
173__acquires(&cde_app->mutex) 175__acquires(&l->cde_app->mutex)
174__releases(&cde_app->mutex) 176__releases(&l->cde_app->mutex)
175{ 177{
176 struct gk20a_cde_app *cde_app = &g->cde_app; 178 struct gk20a_cde_app *cde_app = &l->cde_app;
177 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; 179 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
178 180
179 if (!cde_app->initialised) 181 if (!cde_app->initialised)
@@ -195,13 +197,13 @@ __releases(&cde_app->mutex)
195 197
196} 198}
197 199
198static int gk20a_cde_create_context(struct gk20a *g) 200static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
199__must_hold(&cde_app->mutex) 201__must_hold(&l->cde_app->mutex)
200{ 202{
201 struct gk20a_cde_app *cde_app = &g->cde_app; 203 struct gk20a_cde_app *cde_app = &l->cde_app;
202 struct gk20a_cde_ctx *cde_ctx; 204 struct gk20a_cde_ctx *cde_ctx;
203 205
204 cde_ctx = gk20a_cde_allocate_context(g); 206 cde_ctx = gk20a_cde_allocate_context(l);
205 if (IS_ERR(cde_ctx)) 207 if (IS_ERR(cde_ctx))
206 return PTR_ERR(cde_ctx); 208 return PTR_ERR(cde_ctx);
207 209
@@ -213,21 +215,21 @@ __must_hold(&cde_app->mutex)
213 return 0; 215 return 0;
214} 216}
215 217
216static int gk20a_cde_create_contexts(struct gk20a *g) 218static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
217__must_hold(&g->cde_app->mutex) 219__must_hold(&l->cde_app->mutex)
218{ 220{
219 int err; 221 int err;
220 int i; 222 int i;
221 223
222 for (i = 0; i < NUM_CDE_CONTEXTS; i++) { 224 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
223 err = gk20a_cde_create_context(g); 225 err = gk20a_cde_create_context(l);
224 if (err) 226 if (err)
225 goto out; 227 goto out;
226 } 228 }
227 229
228 return 0; 230 return 0;
229out: 231out:
230 gk20a_cde_remove_contexts(g); 232 gk20a_cde_remove_contexts(l);
231 return err; 233 return err;
232} 234}
233 235
@@ -236,7 +238,8 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
236 struct gk20a_cde_hdr_buf *buf) 238 struct gk20a_cde_hdr_buf *buf)
237{ 239{
238 struct nvgpu_mem *mem; 240 struct nvgpu_mem *mem;
239 struct gk20a *g = cde_ctx->g; 241 struct nvgpu_os_linux *l = cde_ctx->l;
242 struct gk20a *g = &l->g;
240 int err; 243 int err;
241 244
242 /* check that the file can hold the buf */ 245 /* check that the file can hold the buf */
@@ -276,7 +279,8 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
276static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, 279static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
277 int type, s32 shift, u64 mask, u64 value) 280 int type, s32 shift, u64 mask, u64 value)
278{ 281{
279 struct gk20a *g = cde_ctx->g; 282 struct nvgpu_os_linux *l = cde_ctx->l;
283 struct gk20a *g = &l->g;
280 u32 *target_mem_ptr = target; 284 u32 *target_mem_ptr = target;
281 u64 *target_mem_ptr_u64 = target; 285 u64 *target_mem_ptr_u64 = target;
282 u64 current_value, new_value; 286 u64 current_value, new_value;
@@ -325,7 +329,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
325{ 329{
326 struct nvgpu_mem *source_mem; 330 struct nvgpu_mem *source_mem;
327 struct nvgpu_mem *target_mem; 331 struct nvgpu_mem *target_mem;
328 struct gk20a *g = cde_ctx->g; 332 struct nvgpu_os_linux *l = cde_ctx->l;
333 struct gk20a *g = &l->g;
329 u32 *target_mem_ptr; 334 u32 *target_mem_ptr;
330 u64 vaddr; 335 u64 vaddr;
331 int err; 336 int err;
@@ -373,7 +378,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
373 378
374static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) 379static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
375{ 380{
376 struct gk20a *g = cde_ctx->g; 381 struct nvgpu_os_linux *l = cde_ctx->l;
382 struct gk20a *g = &l->g;
377 struct nvgpu_mem *target_mem; 383 struct nvgpu_mem *target_mem;
378 u32 *target_mem_ptr; 384 u32 *target_mem_ptr;
379 u64 new_data; 385 u64 new_data;
@@ -464,7 +470,8 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
464 struct gk20a_cde_hdr_param *param) 470 struct gk20a_cde_hdr_param *param)
465{ 471{
466 struct nvgpu_mem *target_mem; 472 struct nvgpu_mem *target_mem;
467 struct gk20a *g = cde_ctx->g; 473 struct nvgpu_os_linux *l = cde_ctx->l;
474 struct gk20a *g = &l->g;
468 475
469 if (param->target_buf >= cde_ctx->num_bufs) { 476 if (param->target_buf >= cde_ctx->num_bufs) {
470 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", 477 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
@@ -506,7 +513,8 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
506 struct nvgpu_firmware *img, 513 struct nvgpu_firmware *img,
507 u32 required_class) 514 u32 required_class)
508{ 515{
509 struct gk20a *g = cde_ctx->g; 516 struct nvgpu_os_linux *l = cde_ctx->l;
517 struct gk20a *g = &l->g;
510 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx; 518 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx;
511 int err; 519 int err;
512 520
@@ -532,7 +540,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
532 struct gk20a_cde_cmd_elem *cmd_elem, 540 struct gk20a_cde_cmd_elem *cmd_elem,
533 u32 num_elems) 541 u32 num_elems)
534{ 542{
535 struct gk20a *g = cde_ctx->g; 543 struct nvgpu_os_linux *l = cde_ctx->l;
544 struct gk20a *g = &l->g;
536 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem; 545 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
537 u32 *num_entries; 546 u32 *num_entries;
538 unsigned int i; 547 unsigned int i;
@@ -551,7 +560,7 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
551 } 560 }
552 561
553 /* allocate gpfifo entries to be pushed */ 562 /* allocate gpfifo entries to be pushed */
554 *gpfifo = nvgpu_kzalloc(cde_ctx->g, 563 *gpfifo = nvgpu_kzalloc(g,
555 sizeof(struct nvgpu_gpfifo) * num_elems); 564 sizeof(struct nvgpu_gpfifo) * num_elems);
556 if (!*gpfifo) { 565 if (!*gpfifo) {
557 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); 566 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
@@ -596,7 +605,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
596 605
597static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) 606static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
598{ 607{
599 struct gk20a *g = cde_ctx->g; 608 struct nvgpu_os_linux *l = cde_ctx->l;
609 struct gk20a *g = &l->g;
600 unsigned long init_bytes = cde_ctx->init_cmd_num_entries * 610 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
601 sizeof(struct nvgpu_gpfifo); 611 sizeof(struct nvgpu_gpfifo);
602 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * 612 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
@@ -605,7 +615,7 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
605 struct nvgpu_gpfifo *combined_cmd; 615 struct nvgpu_gpfifo *combined_cmd;
606 616
607 /* allocate buffer that has space for both */ 617 /* allocate buffer that has space for both */
608 combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes); 618 combined_cmd = nvgpu_kzalloc(g, total_bytes);
609 if (!combined_cmd) { 619 if (!combined_cmd) {
610 nvgpu_warn(g, 620 nvgpu_warn(g,
611 "cde: could not allocate memory for gpfifo entries"); 621 "cde: could not allocate memory for gpfifo entries");
@@ -617,8 +627,8 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
617 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, 627 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
618 cde_ctx->convert_cmd, conv_bytes); 628 cde_ctx->convert_cmd, conv_bytes);
619 629
620 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); 630 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
621 nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd); 631 nvgpu_kfree(g, cde_ctx->convert_cmd);
622 632
623 cde_ctx->init_convert_cmd = combined_cmd; 633 cde_ctx->init_convert_cmd = combined_cmd;
624 cde_ctx->convert_cmd = combined_cmd 634 cde_ctx->convert_cmd = combined_cmd
@@ -630,8 +640,9 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
630static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, 640static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
631 struct nvgpu_firmware *img) 641 struct nvgpu_firmware *img)
632{ 642{
633 struct gk20a *g = cde_ctx->g; 643 struct nvgpu_os_linux *l = cde_ctx->l;
634 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 644 struct gk20a *g = &l->g;
645 struct gk20a_cde_app *cde_app = &l->cde_app;
635 u32 *data = (u32 *)img->data; 646 u32 *data = (u32 *)img->data;
636 u32 num_of_elems; 647 u32 num_of_elems;
637 struct gk20a_cde_hdr_elem *elem; 648 struct gk20a_cde_hdr_elem *elem;
@@ -724,7 +735,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
724 u32 op, struct nvgpu_fence *fence, 735 u32 op, struct nvgpu_fence *fence,
725 u32 flags, struct gk20a_fence **fence_out) 736 u32 flags, struct gk20a_fence **fence_out)
726{ 737{
727 struct gk20a *g = cde_ctx->g; 738 struct nvgpu_os_linux *l = cde_ctx->l;
739 struct gk20a *g = &l->g;
728 struct nvgpu_gpfifo *gpfifo = NULL; 740 struct nvgpu_gpfifo *gpfifo = NULL;
729 int num_entries = 0; 741 int num_entries = 0;
730 742
@@ -756,7 +768,7 @@ static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
756__acquires(&cde_app->mutex) 768__acquires(&cde_app->mutex)
757__releases(&cde_app->mutex) 769__releases(&cde_app->mutex)
758{ 770{
759 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 771 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
760 772
761 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); 773 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
762 trace_gk20a_cde_release(cde_ctx); 774 trace_gk20a_cde_release(cde_ctx);
@@ -781,8 +793,9 @@ __releases(&cde_app->mutex)
781 struct delayed_work *delay_work = to_delayed_work(work); 793 struct delayed_work *delay_work = to_delayed_work(work);
782 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, 794 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
783 struct gk20a_cde_ctx, ctx_deleter_work); 795 struct gk20a_cde_ctx, ctx_deleter_work);
784 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 796 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
785 struct gk20a *g = cde_ctx->g; 797 struct nvgpu_os_linux *l = cde_ctx->l;
798 struct gk20a *g = &l->g;
786 int err; 799 int err;
787 800
788 /* someone has just taken it? engine deletion started? */ 801 /* someone has just taken it? engine deletion started? */
@@ -823,10 +836,11 @@ out:
823 gk20a_idle(g); 836 gk20a_idle(g);
824} 837}
825 838
826static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g) 839static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
827__must_hold(&cde_app->mutex) 840__must_hold(&cde_app->mutex)
828{ 841{
829 struct gk20a_cde_app *cde_app = &g->cde_app; 842 struct gk20a *g = &l->g;
843 struct gk20a_cde_app *cde_app = &l->cde_app;
830 struct gk20a_cde_ctx *cde_ctx; 844 struct gk20a_cde_ctx *cde_ctx;
831 845
832 /* exhausted? */ 846 /* exhausted? */
@@ -862,7 +876,7 @@ __must_hold(&cde_app->mutex)
862 "cde: no free contexts, count=%d", 876 "cde: no free contexts, count=%d",
863 cde_app->ctx_count); 877 cde_app->ctx_count);
864 878
865 cde_ctx = gk20a_cde_allocate_context(g); 879 cde_ctx = gk20a_cde_allocate_context(l);
866 if (IS_ERR(cde_ctx)) { 880 if (IS_ERR(cde_ctx)) {
867 nvgpu_warn(g, "cde: cannot allocate context: %ld", 881 nvgpu_warn(g, "cde: cannot allocate context: %ld",
868 PTR_ERR(cde_ctx)); 882 PTR_ERR(cde_ctx));
@@ -881,11 +895,12 @@ __must_hold(&cde_app->mutex)
881 return cde_ctx; 895 return cde_ctx;
882} 896}
883 897
884static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) 898static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
885__releases(&cde_app->mutex) 899__releases(&cde_app->mutex)
886__acquires(&cde_app->mutex) 900__acquires(&cde_app->mutex)
887{ 901{
888 struct gk20a_cde_app *cde_app = &g->cde_app; 902 struct gk20a *g = &l->g;
903 struct gk20a_cde_app *cde_app = &l->cde_app;
889 struct gk20a_cde_ctx *cde_ctx = NULL; 904 struct gk20a_cde_ctx *cde_ctx = NULL;
890 struct nvgpu_timeout timeout; 905 struct nvgpu_timeout timeout;
891 906
@@ -893,7 +908,7 @@ __acquires(&cde_app->mutex)
893 NVGPU_TIMER_CPU_TIMER); 908 NVGPU_TIMER_CPU_TIMER);
894 909
895 do { 910 do {
896 cde_ctx = gk20a_cde_do_get_context(g); 911 cde_ctx = gk20a_cde_do_get_context(l);
897 if (PTR_ERR(cde_ctx) != -EAGAIN) 912 if (PTR_ERR(cde_ctx) != -EAGAIN)
898 break; 913 break;
899 914
@@ -906,8 +921,9 @@ __acquires(&cde_app->mutex)
906 return cde_ctx; 921 return cde_ctx;
907} 922}
908 923
909static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) 924static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
910{ 925{
926 struct gk20a *g = &l->g;
911 struct gk20a_cde_ctx *cde_ctx; 927 struct gk20a_cde_ctx *cde_ctx;
912 int ret; 928 int ret;
913 929
@@ -915,7 +931,7 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
915 if (!cde_ctx) 931 if (!cde_ctx)
916 return ERR_PTR(-ENOMEM); 932 return ERR_PTR(-ENOMEM);
917 933
918 cde_ctx->g = g; 934 cde_ctx->l = l;
919 cde_ctx->dev = dev_from_gk20a(g); 935 cde_ctx->dev = dev_from_gk20a(g);
920 936
921 ret = gk20a_cde_load(cde_ctx); 937 ret = gk20a_cde_load(cde_ctx);
@@ -935,16 +951,17 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
935 return cde_ctx; 951 return cde_ctx;
936} 952}
937 953
938int gk20a_cde_convert(struct gk20a *g, 954int gk20a_cde_convert(struct nvgpu_os_linux *l,
939 struct dma_buf *compbits_scatter_buf, 955 struct dma_buf *compbits_scatter_buf,
940 u64 compbits_byte_offset, 956 u64 compbits_byte_offset,
941 u64 scatterbuffer_byte_offset, 957 u64 scatterbuffer_byte_offset,
942 struct nvgpu_fence *fence, 958 struct nvgpu_fence *fence,
943 u32 __flags, struct gk20a_cde_param *params, 959 u32 __flags, struct gk20a_cde_param *params,
944 int num_params, struct gk20a_fence **fence_out) 960 int num_params, struct gk20a_fence **fence_out)
945__acquires(&cde_app->mutex) 961__acquires(&l->cde_app->mutex)
946__releases(&cde_app->mutex) 962__releases(&l->cde_app->mutex)
947{ 963{
964 struct gk20a *g = &l->g;
948 struct gk20a_cde_ctx *cde_ctx = NULL; 965 struct gk20a_cde_ctx *cde_ctx = NULL;
949 struct gk20a_comptags comptags; 966 struct gk20a_comptags comptags;
950 u64 mapped_compbits_offset = 0; 967 u64 mapped_compbits_offset = 0;
@@ -972,9 +989,9 @@ __releases(&cde_app->mutex)
972 if (err) 989 if (err)
973 return err; 990 return err;
974 991
975 nvgpu_mutex_acquire(&g->cde_app.mutex); 992 nvgpu_mutex_acquire(&l->cde_app.mutex);
976 cde_ctx = gk20a_cde_get_context(g); 993 cde_ctx = gk20a_cde_get_context(l);
977 nvgpu_mutex_release(&g->cde_app.mutex); 994 nvgpu_mutex_release(&l->cde_app.mutex);
978 if (IS_ERR(cde_ctx)) { 995 if (IS_ERR(cde_ctx)) {
979 err = PTR_ERR(cde_ctx); 996 err = PTR_ERR(cde_ctx);
980 goto exit_idle; 997 goto exit_idle;
@@ -1158,8 +1175,9 @@ __acquires(&cde_app->mutex)
1158__releases(&cde_app->mutex) 1175__releases(&cde_app->mutex)
1159{ 1176{
1160 struct gk20a_cde_ctx *cde_ctx = data; 1177 struct gk20a_cde_ctx *cde_ctx = data;
1161 struct gk20a *g = cde_ctx->g; 1178 struct nvgpu_os_linux *l = cde_ctx->l;
1162 struct gk20a_cde_app *cde_app = &g->cde_app; 1179 struct gk20a *g = &l->g;
1180 struct gk20a_cde_app *cde_app = &l->cde_app;
1163 bool channel_idle; 1181 bool channel_idle;
1164 1182
1165 channel_gk20a_joblist_lock(ch); 1183 channel_gk20a_joblist_lock(ch);
@@ -1188,7 +1206,7 @@ __releases(&cde_app->mutex)
1188 /* mark it to be deleted, replace with a new one */ 1206 /* mark it to be deleted, replace with a new one */
1189 nvgpu_mutex_acquire(&cde_app->mutex); 1207 nvgpu_mutex_acquire(&cde_app->mutex);
1190 cde_ctx->is_temporary = true; 1208 cde_ctx->is_temporary = true;
1191 if (gk20a_cde_create_context(g)) { 1209 if (gk20a_cde_create_context(l)) {
1192 nvgpu_err(g, "cde: can't replace context"); 1210 nvgpu_err(g, "cde: can't replace context");
1193 } 1211 }
1194 nvgpu_mutex_release(&cde_app->mutex); 1212 nvgpu_mutex_release(&cde_app->mutex);
@@ -1208,7 +1226,8 @@ __releases(&cde_app->mutex)
1208 1226
1209static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) 1227static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1210{ 1228{
1211 struct gk20a *g = cde_ctx->g; 1229 struct nvgpu_os_linux *l = cde_ctx->l;
1230 struct gk20a *g = &l->g;
1212 struct nvgpu_firmware *img; 1231 struct nvgpu_firmware *img;
1213 struct channel_gk20a *ch; 1232 struct channel_gk20a *ch;
1214 struct gr_gk20a *gr = &g->gr; 1233 struct gr_gk20a *gr = &g->gr;
@@ -1288,11 +1307,12 @@ err_get_gk20a_channel:
1288 return err; 1307 return err;
1289} 1308}
1290 1309
1291int gk20a_cde_reload(struct gk20a *g) 1310int gk20a_cde_reload(struct nvgpu_os_linux *l)
1292__acquires(&cde_app->mutex) 1311__acquires(&l->cde_app->mutex)
1293__releases(&cde_app->mutex) 1312__releases(&l->cde_app->mutex)
1294{ 1313{
1295 struct gk20a_cde_app *cde_app = &g->cde_app; 1314 struct gk20a *g = &l->g;
1315 struct gk20a_cde_app *cde_app = &l->cde_app;
1296 int err; 1316 int err;
1297 1317
1298 if (!cde_app->initialised) 1318 if (!cde_app->initialised)
@@ -1304,9 +1324,9 @@ __releases(&cde_app->mutex)
1304 1324
1305 nvgpu_mutex_acquire(&cde_app->mutex); 1325 nvgpu_mutex_acquire(&cde_app->mutex);
1306 1326
1307 gk20a_cde_stop(g); 1327 gk20a_cde_stop(l);
1308 1328
1309 err = gk20a_cde_create_contexts(g); 1329 err = gk20a_cde_create_contexts(l);
1310 if (!err) 1330 if (!err)
1311 cde_app->initialised = true; 1331 cde_app->initialised = true;
1312 1332
@@ -1316,11 +1336,11 @@ __releases(&cde_app->mutex)
1316 return err; 1336 return err;
1317} 1337}
1318 1338
1319int gk20a_init_cde_support(struct gk20a *g) 1339int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1320__acquires(&cde_app->mutex) 1340__acquires(&cde_app->mutex)
1321__releases(&cde_app->mutex) 1341__releases(&cde_app->mutex)
1322{ 1342{
1323 struct gk20a_cde_app *cde_app = &g->cde_app; 1343 struct gk20a_cde_app *cde_app = &l->cde_app;
1324 int err; 1344 int err;
1325 1345
1326 if (cde_app->initialised) 1346 if (cde_app->initialised)
@@ -1340,7 +1360,7 @@ __releases(&cde_app->mutex)
1340 cde_app->ctx_count_top = 0; 1360 cde_app->ctx_count_top = 0;
1341 cde_app->ctx_usecount = 0; 1361 cde_app->ctx_usecount = 0;
1342 1362
1343 err = gk20a_cde_create_contexts(g); 1363 err = gk20a_cde_create_contexts(l);
1344 if (!err) 1364 if (!err)
1345 cde_app->initialised = true; 1365 cde_app->initialised = true;
1346 1366
@@ -1393,7 +1413,7 @@ enum cde_launch_patch_id {
1393#define MAX_CDE_LAUNCH_PATCHES 32 1413#define MAX_CDE_LAUNCH_PATCHES 32
1394 1414
1395static int gk20a_buffer_convert_gpu_to_cde_v1( 1415static int gk20a_buffer_convert_gpu_to_cde_v1(
1396 struct gk20a *g, 1416 struct nvgpu_os_linux *l,
1397 struct dma_buf *dmabuf, u32 consumer, 1417 struct dma_buf *dmabuf, u32 consumer,
1398 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1418 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1399 u64 scatterbuffer_offset, 1419 u64 scatterbuffer_offset,
@@ -1401,6 +1421,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1401 u32 submit_flags, struct nvgpu_fence *fence_in, 1421 u32 submit_flags, struct nvgpu_fence *fence_in,
1402 struct gk20a_buffer_state *state) 1422 struct gk20a_buffer_state *state)
1403{ 1423{
1424 struct gk20a *g = &l->g;
1404 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; 1425 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1405 int param = 0; 1426 int param = 0;
1406 int err = 0; 1427 int err = 0;
@@ -1426,6 +1447,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1426 1447
1427 if (g->ops.cde.get_program_numbers) 1448 if (g->ops.cde.get_program_numbers)
1428 g->ops.cde.get_program_numbers(g, block_height_log2, 1449 g->ops.cde.get_program_numbers(g, block_height_log2,
1450 l->cde_app.shader_parameter,
1429 &hprog, &vprog); 1451 &hprog, &vprog);
1430 else { 1452 else {
1431 nvgpu_warn(g, "cde: chip not supported"); 1453 nvgpu_warn(g, "cde: chip not supported");
@@ -1450,11 +1472,11 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1450 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); 1472 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1451 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", 1473 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1452 hprog, 1474 hprog,
1453 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], 1475 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1454 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], 1476 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1455 vprog, 1477 vprog,
1456 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], 1478 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1457 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); 1479 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1458 1480
1459 /* Write parameters */ 1481 /* Write parameters */
1460#define WRITE_PATCH(NAME, VALUE) \ 1482#define WRITE_PATCH(NAME, VALUE) \
@@ -1483,40 +1505,40 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1483 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); 1505 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1484 1506
1485 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, 1507 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1486 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); 1508 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1487 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, 1509 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1488 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); 1510 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1489 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, 1511 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1490 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); 1512 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1491 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, 1513 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1492 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); 1514 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1493 1515
1494 if (consumer & NVGPU_GPU_COMPBITS_CDEH) { 1516 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1495 WRITE_PATCH(PATCH_H_LAUNCH_WORD1, 1517 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1496 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); 1518 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1497 WRITE_PATCH(PATCH_H_LAUNCH_WORD2, 1519 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1498 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); 1520 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1499 } else { 1521 } else {
1500 WRITE_PATCH(PATCH_H_LAUNCH_WORD1, 1522 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1501 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); 1523 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1502 WRITE_PATCH(PATCH_H_LAUNCH_WORD2, 1524 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1503 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); 1525 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1504 } 1526 }
1505 1527
1506 if (consumer & NVGPU_GPU_COMPBITS_CDEV) { 1528 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1507 WRITE_PATCH(PATCH_V_LAUNCH_WORD1, 1529 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1508 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); 1530 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1509 WRITE_PATCH(PATCH_V_LAUNCH_WORD2, 1531 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1510 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); 1532 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1511 } else { 1533 } else {
1512 WRITE_PATCH(PATCH_V_LAUNCH_WORD1, 1534 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1513 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); 1535 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1514 WRITE_PATCH(PATCH_V_LAUNCH_WORD2, 1536 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1515 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); 1537 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1516 } 1538 }
1517#undef WRITE_PATCH 1539#undef WRITE_PATCH
1518 1540
1519 err = gk20a_cde_convert(g, dmabuf, 1541 err = gk20a_cde_convert(l, dmabuf,
1520 compbits_hoffset, 1542 compbits_hoffset,
1521 scatterbuffer_offset, 1543 scatterbuffer_offset,
1522 fence_in, submit_flags, 1544 fence_in, submit_flags,
@@ -1534,30 +1556,31 @@ out:
1534} 1556}
1535 1557
1536static int gk20a_buffer_convert_gpu_to_cde( 1558static int gk20a_buffer_convert_gpu_to_cde(
1537 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, 1559 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1538 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1560 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1539 u64 scatterbuffer_offset, 1561 u64 scatterbuffer_offset,
1540 u32 width, u32 height, u32 block_height_log2, 1562 u32 width, u32 height, u32 block_height_log2,
1541 u32 submit_flags, struct nvgpu_fence *fence_in, 1563 u32 submit_flags, struct nvgpu_fence *fence_in,
1542 struct gk20a_buffer_state *state) 1564 struct gk20a_buffer_state *state)
1543{ 1565{
1566 struct gk20a *g = &l->g;
1544 int err = 0; 1567 int err = 0;
1545 1568
1546 if (!g->cde_app.initialised) 1569 if (!l->cde_app.initialised)
1547 return -ENOSYS; 1570 return -ENOSYS;
1548 1571
1549 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", 1572 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1550 g->cde_app.firmware_version); 1573 l->cde_app.firmware_version);
1551 1574
1552 if (g->cde_app.firmware_version == 1) { 1575 if (l->cde_app.firmware_version == 1) {
1553 err = gk20a_buffer_convert_gpu_to_cde_v1( 1576 err = gk20a_buffer_convert_gpu_to_cde_v1(
1554 g, dmabuf, consumer, offset, compbits_hoffset, 1577 l, dmabuf, consumer, offset, compbits_hoffset,
1555 compbits_voffset, scatterbuffer_offset, 1578 compbits_voffset, scatterbuffer_offset,
1556 width, height, block_height_log2, 1579 width, height, block_height_log2,
1557 submit_flags, fence_in, state); 1580 submit_flags, fence_in, state);
1558 } else { 1581 } else {
1559 nvgpu_err(g, "unsupported CDE firmware version %d", 1582 nvgpu_err(g, "unsupported CDE firmware version %d",
1560 g->cde_app.firmware_version); 1583 l->cde_app.firmware_version);
1561 err = -EINVAL; 1584 err = -EINVAL;
1562 } 1585 }
1563 1586
@@ -1565,7 +1588,7 @@ static int gk20a_buffer_convert_gpu_to_cde(
1565} 1588}
1566 1589
1567int gk20a_prepare_compressible_read( 1590int gk20a_prepare_compressible_read(
1568 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 1591 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1569 u64 compbits_hoffset, u64 compbits_voffset, 1592 u64 compbits_hoffset, u64 compbits_voffset,
1570 u64 scatterbuffer_offset, 1593 u64 scatterbuffer_offset,
1571 u32 width, u32 height, u32 block_height_log2, 1594 u32 width, u32 height, u32 block_height_log2,
@@ -1573,6 +1596,7 @@ int gk20a_prepare_compressible_read(
1573 u32 *valid_compbits, u32 *zbc_color, 1596 u32 *valid_compbits, u32 *zbc_color,
1574 struct gk20a_fence **fence_out) 1597 struct gk20a_fence **fence_out)
1575{ 1598{
1599 struct gk20a *g = &l->g;
1576 int err = 0; 1600 int err = 0;
1577 struct gk20a_buffer_state *state; 1601 struct gk20a_buffer_state *state;
1578 struct dma_buf *dmabuf; 1602 struct dma_buf *dmabuf;
@@ -1606,7 +1630,7 @@ int gk20a_prepare_compressible_read(
1606 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && 1630 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1607 missing_cde_bits) { 1631 missing_cde_bits) {
1608 err = gk20a_buffer_convert_gpu_to_cde( 1632 err = gk20a_buffer_convert_gpu_to_cde(
1609 g, dmabuf, 1633 l, dmabuf,
1610 missing_cde_bits, 1634 missing_cde_bits,
1611 offset, compbits_hoffset, 1635 offset, compbits_hoffset,
1612 compbits_voffset, scatterbuffer_offset, 1636 compbits_voffset, scatterbuffer_offset,
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/common/linux/cde.h
index 4f400bf3..22732a2a 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/common/linux/cde.h
@@ -19,8 +19,6 @@
19#ifndef _CDE_GK20A_H_ 19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_ 20#define _CDE_GK20A_H_
21 21
22#include "mm_gk20a.h"
23
24#define MAX_CDE_BUFS 10 22#define MAX_CDE_BUFS 10
25#define MAX_CDE_PARAMS 64 23#define MAX_CDE_PARAMS 64
26#define MAX_CDE_USER_PARAMS 40 24#define MAX_CDE_USER_PARAMS 40
@@ -214,7 +212,7 @@ struct gk20a_cde_param {
214}; 212};
215 213
216struct gk20a_cde_ctx { 214struct gk20a_cde_ctx {
217 struct gk20a *g; 215 struct nvgpu_os_linux *l;
218 struct device *dev; 216 struct device *dev;
219 217
220 /* channel related data */ 218 /* channel related data */
@@ -284,11 +282,11 @@ struct gk20a_cde_app {
284 u32 shader_parameter; 282 u32 shader_parameter;
285}; 283};
286 284
287void gk20a_cde_destroy(struct gk20a *g); 285void gk20a_cde_destroy(struct nvgpu_os_linux *l);
288void gk20a_cde_suspend(struct gk20a *g); 286void gk20a_cde_suspend(struct nvgpu_os_linux *l);
289int gk20a_init_cde_support(struct gk20a *g); 287int gk20a_init_cde_support(struct nvgpu_os_linux *l);
290int gk20a_cde_reload(struct gk20a *g); 288int gk20a_cde_reload(struct nvgpu_os_linux *l);
291int gk20a_cde_convert(struct gk20a *g, 289int gk20a_cde_convert(struct nvgpu_os_linux *l,
292 struct dma_buf *compbits_buf, 290 struct dma_buf *compbits_buf,
293 u64 compbits_byte_offset, 291 u64 compbits_byte_offset,
294 u64 scatterbuffer_byte_offset, 292 u64 scatterbuffer_byte_offset,
@@ -297,7 +295,7 @@ int gk20a_cde_convert(struct gk20a *g,
297 int num_params, struct gk20a_fence **fence_out); 295 int num_params, struct gk20a_fence **fence_out);
298 296
299int gk20a_prepare_compressible_read( 297int gk20a_prepare_compressible_read(
300 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 298 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
301 u64 compbits_hoffset, u64 compbits_voffset, 299 u64 compbits_hoffset, u64 compbits_voffset,
302 u64 scatterbuffer_offset, 300 u64 scatterbuffer_offset,
303 u32 width, u32 height, u32 block_height_log2, 301 u32 width, u32 height, u32 block_height_log2,
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
index 40cc64a4..cbea83b9 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -22,8 +22,8 @@
22static ssize_t gk20a_cde_reload_write(struct file *file, 22static ssize_t gk20a_cde_reload_write(struct file *file,
23 const char __user *userbuf, size_t count, loff_t *ppos) 23 const char __user *userbuf, size_t count, loff_t *ppos)
24{ 24{
25 struct gk20a *g = file->private_data; 25 struct nvgpu_os_linux *l = file->private_data;
26 gk20a_cde_reload(g); 26 gk20a_cde_reload(l);
27 return count; 27 return count;
28} 28}
29 29
@@ -41,13 +41,13 @@ void gk20a_cde_debugfs_init(struct gk20a *g)
41 return; 41 return;
42 42
43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, 43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
44 l->debugfs, &g->cde_app.shader_parameter); 44 l->debugfs, &l->cde_app.shader_parameter);
45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, 45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
46 l->debugfs, &g->cde_app.ctx_count); 46 l->debugfs, &l->cde_app.ctx_count);
47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, 47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
48 l->debugfs, &g->cde_app.ctx_usecount); 48 l->debugfs, &l->cde_app.ctx_usecount);
49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, 49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
50 l->debugfs, &g->cde_app.ctx_count_top); 50 l->debugfs, &l->cde_app.ctx_count_top);
51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, 51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
52 g, &gk20a_cde_reload_fops); 52 l, &gk20a_cde_reload_fops);
53} 53}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
index 0d79b143..0357f098 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -138,6 +138,7 @@ static int gk20a_ctrl_prepare_compressible_read(
138 struct gk20a *g, 138 struct gk20a *g,
139 struct nvgpu_gpu_prepare_compressible_read_args *args) 139 struct nvgpu_gpu_prepare_compressible_read_args *args)
140{ 140{
141 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
141 struct nvgpu_fence fence; 142 struct nvgpu_fence fence;
142 struct gk20a_fence *fence_out = NULL; 143 struct gk20a_fence *fence_out = NULL;
143 int ret = 0; 144 int ret = 0;
@@ -146,7 +147,7 @@ static int gk20a_ctrl_prepare_compressible_read(
146 fence.id = args->fence.syncpt_id; 147 fence.id = args->fence.syncpt_id;
147 fence.value = args->fence.syncpt_value; 148 fence.value = args->fence.syncpt_value;
148 149
149 ret = gk20a_prepare_compressible_read(g, args->handle, 150 ret = gk20a_prepare_compressible_read(l, args->handle,
150 args->request_compbits, args->offset, 151 args->request_compbits, args->offset,
151 args->compbits_hoffset, args->compbits_voffset, 152 args->compbits_hoffset, args->compbits_voffset,
152 args->scatterbuffer_offset, 153 args->scatterbuffer_offset,
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 6a590baa..509930c7 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -39,6 +39,7 @@
39#include "pci.h" 39#include "pci.h"
40#include "module.h" 40#include "module.h"
41#include "intr.h" 41#include "intr.h"
42#include "cde.h"
42#ifdef CONFIG_TEGRA_19x_GPU 43#ifdef CONFIG_TEGRA_19x_GPU
43#include "nvgpu_gpuid_t19x.h" 44#include "nvgpu_gpuid_t19x.h"
44#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 45#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -185,7 +186,7 @@ int gk20a_pm_finalize_poweron(struct device *dev)
185 gk20a_scale_resume(dev_from_gk20a(g)); 186 gk20a_scale_resume(dev_from_gk20a(g));
186 187
187 if (platform->has_cde) 188 if (platform->has_cde)
188 gk20a_init_cde_support(g); 189 gk20a_init_cde_support(l);
189 190
190done: 191done:
191 if (err) 192 if (err)
@@ -197,6 +198,7 @@ done:
197static int gk20a_pm_prepare_poweroff(struct device *dev) 198static int gk20a_pm_prepare_poweroff(struct device *dev)
198{ 199{
199 struct gk20a *g = get_gk20a(dev); 200 struct gk20a *g = get_gk20a(dev);
201 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
200 int ret = 0; 202 int ret = 0;
201 struct gk20a_platform *platform = gk20a_get_platform(dev); 203 struct gk20a_platform *platform = gk20a_get_platform(dev);
202 204
@@ -207,8 +209,15 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
207 if (!g->power_on) 209 if (!g->power_on)
208 goto done; 210 goto done;
209 211
212 if (gk20a_fifo_is_engine_busy(g)) {
213 ret = -EBUSY;
214 goto done;
215 }
216
210 gk20a_scale_suspend(dev); 217 gk20a_scale_suspend(dev);
211 218
219 gk20a_cde_suspend(l);
220
212 ret = gk20a_prepare_poweroff(g); 221 ret = gk20a_prepare_poweroff(g);
213 if (ret) 222 if (ret)
214 goto error; 223 goto error;
@@ -974,6 +983,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
974{ 983{
975 struct device *dev = &pdev->dev; 984 struct device *dev = &pdev->dev;
976 struct gk20a *g = get_gk20a(dev); 985 struct gk20a *g = get_gk20a(dev);
986 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
977 struct gk20a_platform *platform = gk20a_get_platform(dev); 987 struct gk20a_platform *platform = gk20a_get_platform(dev);
978 988
979 gk20a_dbg_fn(""); 989 gk20a_dbg_fn("");
@@ -982,7 +992,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
982 return vgpu_remove(pdev); 992 return vgpu_remove(pdev);
983 993
984 if (platform->has_cde) 994 if (platform->has_cde)
985 gk20a_cde_destroy(g); 995 gk20a_cde_destroy(l);
986 996
987 gk20a_ctxsw_trace_cleanup(g); 997 gk20a_ctxsw_trace_cleanup(g);
988 998
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h
index ed8364a9..160a5738 100644
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/os_linux.h
@@ -19,6 +19,7 @@
19#include <linux/cdev.h> 19#include <linux/cdev.h>
20 20
21#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
22#include "cde.h"
22 23
23struct nvgpu_os_linux { 24struct nvgpu_os_linux {
24 struct gk20a g; 25 struct gk20a g;
@@ -108,6 +109,7 @@ struct nvgpu_os_linux {
108 struct dentry *debugfs_force_preemption_gfxp; 109 struct dentry *debugfs_force_preemption_gfxp;
109 struct dentry *debugfs_dump_ctxsw_stats; 110 struct dentry *debugfs_dump_ctxsw_stats;
110#endif 111#endif
112 struct gk20a_cde_app cde_app;
111}; 113};
112 114
113static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) 115static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0cd314d6..63ea5bc4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -117,9 +117,6 @@ int gk20a_prepare_poweroff(struct gk20a *g)
117 if (gk20a_fifo_is_engine_busy(g)) 117 if (gk20a_fifo_is_engine_busy(g))
118 return -EBUSY; 118 return -EBUSY;
119 119
120 /* cancel any pending cde work */
121 gk20a_cde_suspend(g);
122
123 gk20a_ce_suspend(g); 120 gk20a_ce_suspend(g);
124 121
125 ret = gk20a_channel_suspend(g); 122 ret = gk20a_channel_suspend(g);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ab715bdc..69cb2253 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -60,7 +60,6 @@ struct nvgpu_cpu_time_correlation_sample;
60#include "pmu_gk20a.h" 60#include "pmu_gk20a.h"
61#include "priv_ring_gk20a.h" 61#include "priv_ring_gk20a.h"
62#include "therm_gk20a.h" 62#include "therm_gk20a.h"
63#include "cde_gk20a.h"
64#include "sched_gk20a.h" 63#include "sched_gk20a.h"
65#ifdef CONFIG_ARCH_TEGRA_18x_SOC 64#ifdef CONFIG_ARCH_TEGRA_18x_SOC
66#include "clk/clk.h" 65#include "clk/clk.h"
@@ -928,6 +927,7 @@ struct gpu_ops {
928 struct { 927 struct {
929 void (*get_program_numbers)(struct gk20a *g, 928 void (*get_program_numbers)(struct gk20a *g,
930 u32 block_height_log2, 929 u32 block_height_log2,
930 u32 shader_parameter,
931 int *hprog, int *vprog); 931 int *hprog, int *vprog);
932 bool (*need_scatter_buffer)(struct gk20a *g); 932 bool (*need_scatter_buffer)(struct gk20a *g);
933 int (*populate_scatter_buffer)(struct gk20a *g, 933 int (*populate_scatter_buffer)(struct gk20a *g,
@@ -1217,7 +1217,6 @@ struct gk20a {
1217 1217
1218 struct gk20a_sched_ctrl sched_ctrl; 1218 struct gk20a_sched_ctrl sched_ctrl;
1219 1219
1220 struct gk20a_cde_app cde_app;
1221 bool mmu_debug_ctrl; 1220 bool mmu_debug_ctrl;
1222 1221
1223 u32 tpc_fs_mask_user; 1222 u32 tpc_fs_mask_user;
diff --git a/drivers/gpu/nvgpu/gm20b/cde_gm20b.c b/drivers/gpu/nvgpu/gm20b/cde_gm20b.c
index f8267d1d..de7cf872 100644
--- a/drivers/gpu/nvgpu/gm20b/cde_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/cde_gm20b.c
@@ -28,15 +28,16 @@ enum programs {
28 28
29void gm20b_cde_get_program_numbers(struct gk20a *g, 29void gm20b_cde_get_program_numbers(struct gk20a *g,
30 u32 block_height_log2, 30 u32 block_height_log2,
31 u32 shader_parameter,
31 int *hprog_out, int *vprog_out) 32 int *hprog_out, int *vprog_out)
32{ 33{
33 int hprog = PROG_HPASS; 34 int hprog = PROG_HPASS;
34 int vprog = (block_height_log2 >= 2) ? 35 int vprog = (block_height_log2 >= 2) ?
35 PROG_VPASS_LARGE : PROG_VPASS_SMALL; 36 PROG_VPASS_LARGE : PROG_VPASS_SMALL;
36 if (g->cde_app.shader_parameter == 1) { 37 if (shader_parameter == 1) {
37 hprog = PROG_PASSTHROUGH; 38 hprog = PROG_PASSTHROUGH;
38 vprog = PROG_PASSTHROUGH; 39 vprog = PROG_PASSTHROUGH;
39 } else if (g->cde_app.shader_parameter == 2) { 40 } else if (shader_parameter == 2) {
40 hprog = PROG_HPASS_DEBUG; 41 hprog = PROG_HPASS_DEBUG;
41 vprog = (block_height_log2 >= 2) ? 42 vprog = (block_height_log2 >= 2) ?
42 PROG_VPASS_LARGE_DEBUG : 43 PROG_VPASS_LARGE_DEBUG :
diff --git a/drivers/gpu/nvgpu/gm20b/cde_gm20b.h b/drivers/gpu/nvgpu/gm20b/cde_gm20b.h
index f2ea20a0..0ea423ad 100644
--- a/drivers/gpu/nvgpu/gm20b/cde_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/cde_gm20b.h
@@ -20,6 +20,7 @@ struct gk20a;
20 20
21void gm20b_cde_get_program_numbers(struct gk20a *g, 21void gm20b_cde_get_program_numbers(struct gk20a *g,
22 u32 block_height_log2, 22 u32 block_height_log2,
23 u32 shader_parameter,
23 int *hprog_out, int *vprog_out); 24 int *hprog_out, int *vprog_out);
24 25
25#endif 26#endif
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
index 685ddbc4..1ddbcba6 100644
--- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -32,17 +32,18 @@ enum gp10b_programs {
32 32
33void gp10b_cde_get_program_numbers(struct gk20a *g, 33void gp10b_cde_get_program_numbers(struct gk20a *g,
34 u32 block_height_log2, 34 u32 block_height_log2,
35 u32 shader_parameter,
35 int *hprog_out, int *vprog_out) 36 int *hprog_out, int *vprog_out)
36{ 37{
37 int hprog, vprog; 38 int hprog, vprog;
38 39
39 if (g->cde_app.shader_parameter == 1) { 40 if (shader_parameter == 1) {
40 hprog = GP10B_PROG_PASSTHROUGH; 41 hprog = GP10B_PROG_PASSTHROUGH;
41 vprog = GP10B_PROG_PASSTHROUGH; 42 vprog = GP10B_PROG_PASSTHROUGH;
42 } else { 43 } else {
43 hprog = GP10B_PROG_HPASS; 44 hprog = GP10B_PROG_HPASS;
44 vprog = GP10B_PROG_VPASS; 45 vprog = GP10B_PROG_VPASS;
45 if (g->cde_app.shader_parameter == 2) { 46 if (shader_parameter == 2) {
46 hprog = GP10B_PROG_HPASS_DEBUG; 47 hprog = GP10B_PROG_HPASS_DEBUG;
47 vprog = GP10B_PROG_VPASS_DEBUG; 48 vprog = GP10B_PROG_VPASS_DEBUG;
48 } 49 }
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
index 3ee6027c..7ccfe560 100644
--- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
@@ -21,6 +21,7 @@ struct sg_table;
21 21
22void gp10b_cde_get_program_numbers(struct gk20a *g, 22void gp10b_cde_get_program_numbers(struct gk20a *g,
23 u32 block_height_log2, 23 u32 block_height_log2,
24 u32 shader_parameter,
24 int *hprog_out, int *vprog_out); 25 int *hprog_out, int *vprog_out);
25bool gp10b_need_scatter_buffer(struct gk20a *g); 26bool gp10b_need_scatter_buffer(struct gk20a *g);
26int gp10b_populate_scatter_buffer(struct gk20a *g, 27int gp10b_populate_scatter_buffer(struct gk20a *g,