summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu2
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c (renamed from drivers/gpu/nvgpu/gk20a/cde_gk20a.c)238
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.h (renamed from drivers/gpu/nvgpu/gk20a/cde_gk20a.h)16
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_linux.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gm20b/cde_gm20b.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/cde_gm20b.h1
-rw-r--r--drivers/gpu/nvgpu/gp10b/cde_gp10b.c5
-rw-r--r--drivers/gpu/nvgpu/gp10b/cde_gp10b.h1
13 files changed, 171 insertions, 136 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 25545f29..87199316 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -45,6 +45,7 @@ nvgpu-y := \
45 common/linux/vm.o \ 45 common/linux/vm.o \
46 common/linux/intr.o \ 46 common/linux/intr.o \
47 common/linux/sysfs.o \ 47 common/linux/sysfs.o \
48 common/linux/cde.o \
48 common/mm/nvgpu_allocator.o \ 49 common/mm/nvgpu_allocator.o \
49 common/mm/bitmap_allocator.o \ 50 common/mm/bitmap_allocator.o \
50 common/mm/buddy_allocator.o \ 51 common/mm/buddy_allocator.o \
@@ -92,7 +93,6 @@ nvgpu-y := \
92 gk20a/ltc_gk20a.o \ 93 gk20a/ltc_gk20a.o \
93 gk20a/fb_gk20a.o \ 94 gk20a/fb_gk20a.o \
94 gk20a/hal.o \ 95 gk20a/hal.o \
95 gk20a/cde_gk20a.o \
96 gk20a/tsg_gk20a.o \ 96 gk20a/tsg_gk20a.o \
97 gk20a/ctxsw_trace_gk20a.o \ 97 gk20a/ctxsw_trace_gk20a.o \
98 gk20a/fecs_trace_gk20a.o \ 98 gk20a/fecs_trace_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/common/linux/cde.c
index 506207f2..5b0fb910 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -31,13 +31,14 @@
31#include <nvgpu/bug.h> 31#include <nvgpu/bug.h>
32#include <nvgpu/firmware.h> 32#include <nvgpu/firmware.h>
33 33
34#include "gk20a.h" 34#include "gk20a/gk20a.h"
35#include "channel_gk20a.h" 35#include "gk20a/channel_gk20a.h"
36#include "mm_gk20a.h" 36#include "gk20a/mm_gk20a.h"
37#include "cde_gk20a.h" 37#include "gk20a/fence_gk20a.h"
38#include "fence_gk20a.h" 38#include "gk20a/gr_gk20a.h"
39#include "gr_gk20a.h" 39
40#include "common/linux/os_linux.h" 40#include "cde.h"
41#include "os_linux.h"
41 42
42#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> 43#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 44#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -49,7 +50,7 @@
49#include "common/linux/vm_priv.h" 50#include "common/linux/vm_priv.h"
50 51
51static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); 52static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
52static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); 53static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
53 54
54#define CTX_DELETE_TIME 1000 55#define CTX_DELETE_TIME 1000
55 56
@@ -65,7 +66,7 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
65 nvgpu_dma_unmap_free(cde_ctx->vm, mem); 66 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
66 } 67 }
67 68
68 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); 69 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
69 70
70 cde_ctx->convert_cmd = NULL; 71 cde_ctx->convert_cmd = NULL;
71 cde_ctx->init_convert_cmd = NULL; 72 cde_ctx->init_convert_cmd = NULL;
@@ -79,7 +80,8 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
79static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) 80static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
80__must_hold(&cde_app->mutex) 81__must_hold(&cde_app->mutex)
81{ 82{
82 struct gk20a *g = cde_ctx->g; 83 struct nvgpu_os_linux *l = cde_ctx->l;
84 struct gk20a *g = &l->g;
83 struct channel_gk20a *ch = cde_ctx->ch; 85 struct channel_gk20a *ch = cde_ctx->ch;
84 struct vm_gk20a *vm = ch->vm; 86 struct vm_gk20a *vm = ch->vm;
85 87
@@ -95,7 +97,7 @@ __must_hold(&cde_app->mutex)
95 97
96 /* housekeeping on app */ 98 /* housekeeping on app */
97 nvgpu_list_del(&cde_ctx->list); 99 nvgpu_list_del(&cde_ctx->list);
98 cde_ctx->g->cde_app.ctx_count--; 100 l->cde_app.ctx_count--;
99 nvgpu_kfree(g, cde_ctx); 101 nvgpu_kfree(g, cde_ctx);
100} 102}
101 103
@@ -104,7 +106,7 @@ static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
104__releases(&cde_app->mutex) 106__releases(&cde_app->mutex)
105__acquires(&cde_app->mutex) 107__acquires(&cde_app->mutex)
106{ 108{
107 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 109 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
108 110
109 /* permanent contexts do not have deleter works */ 111 /* permanent contexts do not have deleter works */
110 if (!cde_ctx->is_temporary) 112 if (!cde_ctx->is_temporary)
@@ -119,10 +121,10 @@ __acquires(&cde_app->mutex)
119 } 121 }
120} 122}
121 123
122static void gk20a_cde_remove_contexts(struct gk20a *g) 124static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
123__must_hold(&cde_app->mutex) 125__must_hold(&l->cde_app->mutex)
124{ 126{
125 struct gk20a_cde_app *cde_app = &g->cde_app; 127 struct gk20a_cde_app *cde_app = &l->cde_app;
126 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; 128 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
127 129
128 /* safe to go off the mutex in cancel_deleter since app is 130 /* safe to go off the mutex in cancel_deleter since app is
@@ -142,38 +144,38 @@ __must_hold(&cde_app->mutex)
142 } 144 }
143} 145}
144 146
145static void gk20a_cde_stop(struct gk20a *g) 147static void gk20a_cde_stop(struct nvgpu_os_linux *l)
146__must_hold(&cde_app->mutex) 148__must_hold(&l->cde_app->mutex)
147{ 149{
148 struct gk20a_cde_app *cde_app = &g->cde_app; 150 struct gk20a_cde_app *cde_app = &l->cde_app;
149 151
150 /* prevent further conversions and delayed works from working */ 152 /* prevent further conversions and delayed works from working */
151 cde_app->initialised = false; 153 cde_app->initialised = false;
152 /* free all data, empty the list */ 154 /* free all data, empty the list */
153 gk20a_cde_remove_contexts(g); 155 gk20a_cde_remove_contexts(l);
154} 156}
155 157
156void gk20a_cde_destroy(struct gk20a *g) 158void gk20a_cde_destroy(struct nvgpu_os_linux *l)
157__acquires(&cde_app->mutex) 159__acquires(&l->cde_app->mutex)
158__releases(&cde_app->mutex) 160__releases(&l->cde_app->mutex)
159{ 161{
160 struct gk20a_cde_app *cde_app = &g->cde_app; 162 struct gk20a_cde_app *cde_app = &l->cde_app;
161 163
162 if (!cde_app->initialised) 164 if (!cde_app->initialised)
163 return; 165 return;
164 166
165 nvgpu_mutex_acquire(&cde_app->mutex); 167 nvgpu_mutex_acquire(&cde_app->mutex);
166 gk20a_cde_stop(g); 168 gk20a_cde_stop(l);
167 nvgpu_mutex_release(&cde_app->mutex); 169 nvgpu_mutex_release(&cde_app->mutex);
168 170
169 nvgpu_mutex_destroy(&cde_app->mutex); 171 nvgpu_mutex_destroy(&cde_app->mutex);
170} 172}
171 173
172void gk20a_cde_suspend(struct gk20a *g) 174void gk20a_cde_suspend(struct nvgpu_os_linux *l)
173__acquires(&cde_app->mutex) 175__acquires(&l->cde_app->mutex)
174__releases(&cde_app->mutex) 176__releases(&l->cde_app->mutex)
175{ 177{
176 struct gk20a_cde_app *cde_app = &g->cde_app; 178 struct gk20a_cde_app *cde_app = &l->cde_app;
177 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; 179 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
178 180
179 if (!cde_app->initialised) 181 if (!cde_app->initialised)
@@ -195,13 +197,13 @@ __releases(&cde_app->mutex)
195 197
196} 198}
197 199
198static int gk20a_cde_create_context(struct gk20a *g) 200static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
199__must_hold(&cde_app->mutex) 201__must_hold(&l->cde_app->mutex)
200{ 202{
201 struct gk20a_cde_app *cde_app = &g->cde_app; 203 struct gk20a_cde_app *cde_app = &l->cde_app;
202 struct gk20a_cde_ctx *cde_ctx; 204 struct gk20a_cde_ctx *cde_ctx;
203 205
204 cde_ctx = gk20a_cde_allocate_context(g); 206 cde_ctx = gk20a_cde_allocate_context(l);
205 if (IS_ERR(cde_ctx)) 207 if (IS_ERR(cde_ctx))
206 return PTR_ERR(cde_ctx); 208 return PTR_ERR(cde_ctx);
207 209
@@ -213,21 +215,21 @@ __must_hold(&cde_app->mutex)
213 return 0; 215 return 0;
214} 216}
215 217
216static int gk20a_cde_create_contexts(struct gk20a *g) 218static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
217__must_hold(&g->cde_app->mutex) 219__must_hold(&l->cde_app->mutex)
218{ 220{
219 int err; 221 int err;
220 int i; 222 int i;
221 223
222 for (i = 0; i < NUM_CDE_CONTEXTS; i++) { 224 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
223 err = gk20a_cde_create_context(g); 225 err = gk20a_cde_create_context(l);
224 if (err) 226 if (err)
225 goto out; 227 goto out;
226 } 228 }
227 229
228 return 0; 230 return 0;
229out: 231out:
230 gk20a_cde_remove_contexts(g); 232 gk20a_cde_remove_contexts(l);
231 return err; 233 return err;
232} 234}
233 235
@@ -236,7 +238,8 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
236 struct gk20a_cde_hdr_buf *buf) 238 struct gk20a_cde_hdr_buf *buf)
237{ 239{
238 struct nvgpu_mem *mem; 240 struct nvgpu_mem *mem;
239 struct gk20a *g = cde_ctx->g; 241 struct nvgpu_os_linux *l = cde_ctx->l;
242 struct gk20a *g = &l->g;
240 int err; 243 int err;
241 244
242 /* check that the file can hold the buf */ 245 /* check that the file can hold the buf */
@@ -276,7 +279,8 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
276static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, 279static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
277 int type, s32 shift, u64 mask, u64 value) 280 int type, s32 shift, u64 mask, u64 value)
278{ 281{
279 struct gk20a *g = cde_ctx->g; 282 struct nvgpu_os_linux *l = cde_ctx->l;
283 struct gk20a *g = &l->g;
280 u32 *target_mem_ptr = target; 284 u32 *target_mem_ptr = target;
281 u64 *target_mem_ptr_u64 = target; 285 u64 *target_mem_ptr_u64 = target;
282 u64 current_value, new_value; 286 u64 current_value, new_value;
@@ -325,7 +329,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
325{ 329{
326 struct nvgpu_mem *source_mem; 330 struct nvgpu_mem *source_mem;
327 struct nvgpu_mem *target_mem; 331 struct nvgpu_mem *target_mem;
328 struct gk20a *g = cde_ctx->g; 332 struct nvgpu_os_linux *l = cde_ctx->l;
333 struct gk20a *g = &l->g;
329 u32 *target_mem_ptr; 334 u32 *target_mem_ptr;
330 u64 vaddr; 335 u64 vaddr;
331 int err; 336 int err;
@@ -373,7 +378,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
373 378
374static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) 379static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
375{ 380{
376 struct gk20a *g = cde_ctx->g; 381 struct nvgpu_os_linux *l = cde_ctx->l;
382 struct gk20a *g = &l->g;
377 struct nvgpu_mem *target_mem; 383 struct nvgpu_mem *target_mem;
378 u32 *target_mem_ptr; 384 u32 *target_mem_ptr;
379 u64 new_data; 385 u64 new_data;
@@ -464,7 +470,8 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
464 struct gk20a_cde_hdr_param *param) 470 struct gk20a_cde_hdr_param *param)
465{ 471{
466 struct nvgpu_mem *target_mem; 472 struct nvgpu_mem *target_mem;
467 struct gk20a *g = cde_ctx->g; 473 struct nvgpu_os_linux *l = cde_ctx->l;
474 struct gk20a *g = &l->g;
468 475
469 if (param->target_buf >= cde_ctx->num_bufs) { 476 if (param->target_buf >= cde_ctx->num_bufs) {
470 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", 477 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
@@ -506,7 +513,8 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
506 struct nvgpu_firmware *img, 513 struct nvgpu_firmware *img,
507 u32 required_class) 514 u32 required_class)
508{ 515{
509 struct gk20a *g = cde_ctx->g; 516 struct nvgpu_os_linux *l = cde_ctx->l;
517 struct gk20a *g = &l->g;
510 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx; 518 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx;
511 int err; 519 int err;
512 520
@@ -532,7 +540,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
532 struct gk20a_cde_cmd_elem *cmd_elem, 540 struct gk20a_cde_cmd_elem *cmd_elem,
533 u32 num_elems) 541 u32 num_elems)
534{ 542{
535 struct gk20a *g = cde_ctx->g; 543 struct nvgpu_os_linux *l = cde_ctx->l;
544 struct gk20a *g = &l->g;
536 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem; 545 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
537 u32 *num_entries; 546 u32 *num_entries;
538 unsigned int i; 547 unsigned int i;
@@ -551,7 +560,7 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
551 } 560 }
552 561
553 /* allocate gpfifo entries to be pushed */ 562 /* allocate gpfifo entries to be pushed */
554 *gpfifo = nvgpu_kzalloc(cde_ctx->g, 563 *gpfifo = nvgpu_kzalloc(g,
555 sizeof(struct nvgpu_gpfifo) * num_elems); 564 sizeof(struct nvgpu_gpfifo) * num_elems);
556 if (!*gpfifo) { 565 if (!*gpfifo) {
557 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); 566 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
@@ -596,7 +605,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
596 605
597static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) 606static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
598{ 607{
599 struct gk20a *g = cde_ctx->g; 608 struct nvgpu_os_linux *l = cde_ctx->l;
609 struct gk20a *g = &l->g;
600 unsigned long init_bytes = cde_ctx->init_cmd_num_entries * 610 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
601 sizeof(struct nvgpu_gpfifo); 611 sizeof(struct nvgpu_gpfifo);
602 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * 612 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
@@ -605,7 +615,7 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
605 struct nvgpu_gpfifo *combined_cmd; 615 struct nvgpu_gpfifo *combined_cmd;
606 616
607 /* allocate buffer that has space for both */ 617 /* allocate buffer that has space for both */
608 combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes); 618 combined_cmd = nvgpu_kzalloc(g, total_bytes);
609 if (!combined_cmd) { 619 if (!combined_cmd) {
610 nvgpu_warn(g, 620 nvgpu_warn(g,
611 "cde: could not allocate memory for gpfifo entries"); 621 "cde: could not allocate memory for gpfifo entries");
@@ -617,8 +627,8 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
617 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, 627 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
618 cde_ctx->convert_cmd, conv_bytes); 628 cde_ctx->convert_cmd, conv_bytes);
619 629
620 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); 630 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
621 nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd); 631 nvgpu_kfree(g, cde_ctx->convert_cmd);
622 632
623 cde_ctx->init_convert_cmd = combined_cmd; 633 cde_ctx->init_convert_cmd = combined_cmd;
624 cde_ctx->convert_cmd = combined_cmd 634 cde_ctx->convert_cmd = combined_cmd
@@ -630,8 +640,9 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
630static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, 640static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
631 struct nvgpu_firmware *img) 641 struct nvgpu_firmware *img)
632{ 642{
633 struct gk20a *g = cde_ctx->g; 643 struct nvgpu_os_linux *l = cde_ctx->l;
634 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 644 struct gk20a *g = &l->g;
645 struct gk20a_cde_app *cde_app = &l->cde_app;
635 u32 *data = (u32 *)img->data; 646 u32 *data = (u32 *)img->data;
636 u32 num_of_elems; 647 u32 num_of_elems;
637 struct gk20a_cde_hdr_elem *elem; 648 struct gk20a_cde_hdr_elem *elem;
@@ -724,7 +735,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
724 u32 op, struct nvgpu_fence *fence, 735 u32 op, struct nvgpu_fence *fence,
725 u32 flags, struct gk20a_fence **fence_out) 736 u32 flags, struct gk20a_fence **fence_out)
726{ 737{
727 struct gk20a *g = cde_ctx->g; 738 struct nvgpu_os_linux *l = cde_ctx->l;
739 struct gk20a *g = &l->g;
728 struct nvgpu_gpfifo *gpfifo = NULL; 740 struct nvgpu_gpfifo *gpfifo = NULL;
729 int num_entries = 0; 741 int num_entries = 0;
730 742
@@ -756,7 +768,7 @@ static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
756__acquires(&cde_app->mutex) 768__acquires(&cde_app->mutex)
757__releases(&cde_app->mutex) 769__releases(&cde_app->mutex)
758{ 770{
759 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 771 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
760 772
761 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); 773 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
762 trace_gk20a_cde_release(cde_ctx); 774 trace_gk20a_cde_release(cde_ctx);
@@ -781,8 +793,9 @@ __releases(&cde_app->mutex)
781 struct delayed_work *delay_work = to_delayed_work(work); 793 struct delayed_work *delay_work = to_delayed_work(work);
782 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, 794 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
783 struct gk20a_cde_ctx, ctx_deleter_work); 795 struct gk20a_cde_ctx, ctx_deleter_work);
784 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; 796 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
785 struct gk20a *g = cde_ctx->g; 797 struct nvgpu_os_linux *l = cde_ctx->l;
798 struct gk20a *g = &l->g;
786 int err; 799 int err;
787 800
788 /* someone has just taken it? engine deletion started? */ 801 /* someone has just taken it? engine deletion started? */
@@ -823,10 +836,11 @@ out:
823 gk20a_idle(g); 836 gk20a_idle(g);
824} 837}
825 838
826static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g) 839static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
827__must_hold(&cde_app->mutex) 840__must_hold(&cde_app->mutex)
828{ 841{
829 struct gk20a_cde_app *cde_app = &g->cde_app; 842 struct gk20a *g = &l->g;
843 struct gk20a_cde_app *cde_app = &l->cde_app;
830 struct gk20a_cde_ctx *cde_ctx; 844 struct gk20a_cde_ctx *cde_ctx;
831 845
832 /* exhausted? */ 846 /* exhausted? */
@@ -862,7 +876,7 @@ __must_hold(&cde_app->mutex)
862 "cde: no free contexts, count=%d", 876 "cde: no free contexts, count=%d",
863 cde_app->ctx_count); 877 cde_app->ctx_count);
864 878
865 cde_ctx = gk20a_cde_allocate_context(g); 879 cde_ctx = gk20a_cde_allocate_context(l);
866 if (IS_ERR(cde_ctx)) { 880 if (IS_ERR(cde_ctx)) {
867 nvgpu_warn(g, "cde: cannot allocate context: %ld", 881 nvgpu_warn(g, "cde: cannot allocate context: %ld",
868 PTR_ERR(cde_ctx)); 882 PTR_ERR(cde_ctx));
@@ -881,11 +895,12 @@ __must_hold(&cde_app->mutex)
881 return cde_ctx; 895 return cde_ctx;
882} 896}
883 897
884static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) 898static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
885__releases(&cde_app->mutex) 899__releases(&cde_app->mutex)
886__acquires(&cde_app->mutex) 900__acquires(&cde_app->mutex)
887{ 901{
888 struct gk20a_cde_app *cde_app = &g->cde_app; 902 struct gk20a *g = &l->g;
903 struct gk20a_cde_app *cde_app = &l->cde_app;
889 struct gk20a_cde_ctx *cde_ctx = NULL; 904 struct gk20a_cde_ctx *cde_ctx = NULL;
890 struct nvgpu_timeout timeout; 905 struct nvgpu_timeout timeout;
891 906
@@ -893,7 +908,7 @@ __acquires(&cde_app->mutex)
893 NVGPU_TIMER_CPU_TIMER); 908 NVGPU_TIMER_CPU_TIMER);
894 909
895 do { 910 do {
896 cde_ctx = gk20a_cde_do_get_context(g); 911 cde_ctx = gk20a_cde_do_get_context(l);
897 if (PTR_ERR(cde_ctx) != -EAGAIN) 912 if (PTR_ERR(cde_ctx) != -EAGAIN)
898 break; 913 break;
899 914
@@ -906,8 +921,9 @@ __acquires(&cde_app->mutex)
906 return cde_ctx; 921 return cde_ctx;
907} 922}
908 923
909static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) 924static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
910{ 925{
926 struct gk20a *g = &l->g;
911 struct gk20a_cde_ctx *cde_ctx; 927 struct gk20a_cde_ctx *cde_ctx;
912 int ret; 928 int ret;
913 929
@@ -915,7 +931,7 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
915 if (!cde_ctx) 931 if (!cde_ctx)
916 return ERR_PTR(-ENOMEM); 932 return ERR_PTR(-ENOMEM);
917 933
918 cde_ctx->g = g; 934 cde_ctx->l = l;
919 cde_ctx->dev = dev_from_gk20a(g); 935 cde_ctx->dev = dev_from_gk20a(g);
920 936
921 ret = gk20a_cde_load(cde_ctx); 937 ret = gk20a_cde_load(cde_ctx);
@@ -935,16 +951,17 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
935 return cde_ctx; 951 return cde_ctx;
936} 952}
937 953
938int gk20a_cde_convert(struct gk20a *g, 954int gk20a_cde_convert(struct nvgpu_os_linux *l,
939 struct dma_buf *compbits_scatter_buf, 955 struct dma_buf *compbits_scatter_buf,
940 u64 compbits_byte_offset, 956 u64 compbits_byte_offset,
941 u64 scatterbuffer_byte_offset, 957 u64 scatterbuffer_byte_offset,
942 struct nvgpu_fence *fence, 958 struct nvgpu_fence *fence,
943 u32 __flags, struct gk20a_cde_param *params, 959 u32 __flags, struct gk20a_cde_param *params,
944 int num_params, struct gk20a_fence **fence_out) 960 int num_params, struct gk20a_fence **fence_out)
945__acquires(&cde_app->mutex) 961__acquires(&l->cde_app->mutex)
946__releases(&cde_app->mutex) 962__releases(&l->cde_app->mutex)
947{ 963{
964 struct gk20a *g = &l->g;
948 struct gk20a_cde_ctx *cde_ctx = NULL; 965 struct gk20a_cde_ctx *cde_ctx = NULL;
949 struct gk20a_comptags comptags; 966 struct gk20a_comptags comptags;
950 u64 mapped_compbits_offset = 0; 967 u64 mapped_compbits_offset = 0;
@@ -972,9 +989,9 @@ __releases(&cde_app->mutex)
972 if (err) 989 if (err)
973 return err; 990 return err;
974 991
975 nvgpu_mutex_acquire(&g->cde_app.mutex); 992 nvgpu_mutex_acquire(&l->cde_app.mutex);
976 cde_ctx = gk20a_cde_get_context(g); 993 cde_ctx = gk20a_cde_get_context(l);
977 nvgpu_mutex_release(&g->cde_app.mutex); 994 nvgpu_mutex_release(&l->cde_app.mutex);
978 if (IS_ERR(cde_ctx)) { 995 if (IS_ERR(cde_ctx)) {
979 err = PTR_ERR(cde_ctx); 996 err = PTR_ERR(cde_ctx);
980 goto exit_idle; 997 goto exit_idle;
@@ -1158,8 +1175,9 @@ __acquires(&cde_app->mutex)
1158__releases(&cde_app->mutex) 1175__releases(&cde_app->mutex)
1159{ 1176{
1160 struct gk20a_cde_ctx *cde_ctx = data; 1177 struct gk20a_cde_ctx *cde_ctx = data;
1161 struct gk20a *g = cde_ctx->g; 1178 struct nvgpu_os_linux *l = cde_ctx->l;
1162 struct gk20a_cde_app *cde_app = &g->cde_app; 1179 struct gk20a *g = &l->g;
1180 struct gk20a_cde_app *cde_app = &l->cde_app;
1163 bool channel_idle; 1181 bool channel_idle;
1164 1182
1165 channel_gk20a_joblist_lock(ch); 1183 channel_gk20a_joblist_lock(ch);
@@ -1188,7 +1206,7 @@ __releases(&cde_app->mutex)
1188 /* mark it to be deleted, replace with a new one */ 1206 /* mark it to be deleted, replace with a new one */
1189 nvgpu_mutex_acquire(&cde_app->mutex); 1207 nvgpu_mutex_acquire(&cde_app->mutex);
1190 cde_ctx->is_temporary = true; 1208 cde_ctx->is_temporary = true;
1191 if (gk20a_cde_create_context(g)) { 1209 if (gk20a_cde_create_context(l)) {
1192 nvgpu_err(g, "cde: can't replace context"); 1210 nvgpu_err(g, "cde: can't replace context");
1193 } 1211 }
1194 nvgpu_mutex_release(&cde_app->mutex); 1212 nvgpu_mutex_release(&cde_app->mutex);
@@ -1208,7 +1226,8 @@ __releases(&cde_app->mutex)
1208 1226
1209static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) 1227static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1210{ 1228{
1211 struct gk20a *g = cde_ctx->g; 1229 struct nvgpu_os_linux *l = cde_ctx->l;
1230 struct gk20a *g = &l->g;
1212 struct nvgpu_firmware *img; 1231 struct nvgpu_firmware *img;
1213 struct channel_gk20a *ch; 1232 struct channel_gk20a *ch;
1214 struct gr_gk20a *gr = &g->gr; 1233 struct gr_gk20a *gr = &g->gr;
@@ -1288,11 +1307,12 @@ err_get_gk20a_channel:
1288 return err; 1307 return err;
1289} 1308}
1290 1309
1291int gk20a_cde_reload(struct gk20a *g) 1310int gk20a_cde_reload(struct nvgpu_os_linux *l)
1292__acquires(&cde_app->mutex) 1311__acquires(&l->cde_app->mutex)
1293__releases(&cde_app->mutex) 1312__releases(&l->cde_app->mutex)
1294{ 1313{
1295 struct gk20a_cde_app *cde_app = &g->cde_app; 1314 struct gk20a *g = &l->g;
1315 struct gk20a_cde_app *cde_app = &l->cde_app;
1296 int err; 1316 int err;
1297 1317
1298 if (!cde_app->initialised) 1318 if (!cde_app->initialised)
@@ -1304,9 +1324,9 @@ __releases(&cde_app->mutex)
1304 1324
1305 nvgpu_mutex_acquire(&cde_app->mutex); 1325 nvgpu_mutex_acquire(&cde_app->mutex);
1306 1326
1307 gk20a_cde_stop(g); 1327 gk20a_cde_stop(l);
1308 1328
1309 err = gk20a_cde_create_contexts(g); 1329 err = gk20a_cde_create_contexts(l);
1310 if (!err) 1330 if (!err)
1311 cde_app->initialised = true; 1331 cde_app->initialised = true;
1312 1332
@@ -1316,11 +1336,11 @@ __releases(&cde_app->mutex)
1316 return err; 1336 return err;
1317} 1337}
1318 1338
1319int gk20a_init_cde_support(struct gk20a *g) 1339int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1320__acquires(&cde_app->mutex) 1340__acquires(&cde_app->mutex)
1321__releases(&cde_app->mutex) 1341__releases(&cde_app->mutex)
1322{ 1342{
1323 struct gk20a_cde_app *cde_app = &g->cde_app; 1343 struct gk20a_cde_app *cde_app = &l->cde_app;
1324 int err; 1344 int err;
1325 1345
1326 if (cde_app->initialised) 1346 if (cde_app->initialised)
@@ -1340,7 +1360,7 @@ __releases(&cde_app->mutex)
1340 cde_app->ctx_count_top = 0; 1360 cde_app->ctx_count_top = 0;
1341 cde_app->ctx_usecount = 0; 1361 cde_app->ctx_usecount = 0;
1342 1362
1343 err = gk20a_cde_create_contexts(g); 1363 err = gk20a_cde_create_contexts(l);
1344 if (!err) 1364 if (!err)
1345 cde_app->initialised = true; 1365 cde_app->initialised = true;
1346 1366
@@ -1393,7 +1413,7 @@ enum cde_launch_patch_id {
1393#define MAX_CDE_LAUNCH_PATCHES 32 1413#define MAX_CDE_LAUNCH_PATCHES 32
1394 1414
1395static int gk20a_buffer_convert_gpu_to_cde_v1( 1415static int gk20a_buffer_convert_gpu_to_cde_v1(
1396 struct gk20a *g, 1416 struct nvgpu_os_linux *l,
1397 struct dma_buf *dmabuf, u32 consumer, 1417 struct dma_buf *dmabuf, u32 consumer,
1398 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1418 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1399 u64 scatterbuffer_offset, 1419 u64 scatterbuffer_offset,
@@ -1401,6 +1421,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1401 u32 submit_flags, struct nvgpu_fence *fence_in, 1421 u32 submit_flags, struct nvgpu_fence *fence_in,
1402 struct gk20a_buffer_state *state) 1422 struct gk20a_buffer_state *state)
1403{ 1423{
1424 struct gk20a *g = &l->g;
1404 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; 1425 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1405 int param = 0; 1426 int param = 0;
1406 int err = 0; 1427 int err = 0;
@@ -1426,6 +1447,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1426 1447
1427 if (g->ops.cde.get_program_numbers) 1448 if (g->ops.cde.get_program_numbers)
1428 g->ops.cde.get_program_numbers(g, block_height_log2, 1449 g->ops.cde.get_program_numbers(g, block_height_log2,
1450 l->cde_app.shader_parameter,
1429 &hprog, &vprog); 1451 &hprog, &vprog);
1430 else { 1452 else {
1431 nvgpu_warn(g, "cde: chip not supported"); 1453 nvgpu_warn(g, "cde: chip not supported");
@@ -1450,11 +1472,11 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1450 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); 1472 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1451 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", 1473 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1452 hprog, 1474 hprog,
1453 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], 1475 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1454 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], 1476 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1455 vprog, 1477 vprog,
1456 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], 1478 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1457 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); 1479 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1458 1480
1459 /* Write parameters */ 1481 /* Write parameters */
1460#define WRITE_PATCH(NAME, VALUE) \ 1482#define WRITE_PATCH(NAME, VALUE) \
@@ -1483,40 +1505,40 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
1483 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); 1505 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1484 1506
1485 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, 1507 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1486 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); 1508 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1487 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, 1509 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1488 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); 1510 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1489 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, 1511 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1490 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); 1512 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1491 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, 1513 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1492 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); 1514 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1493 1515
1494 if (consumer & NVGPU_GPU_COMPBITS_CDEH) { 1516 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1495 WRITE_PATCH(PATCH_H_LAUNCH_WORD1, 1517 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1496 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); 1518 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1497 WRITE_PATCH(PATCH_H_LAUNCH_WORD2, 1519 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1498 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); 1520 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1499 } else { 1521 } else {
1500 WRITE_PATCH(PATCH_H_LAUNCH_WORD1, 1522 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1501 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); 1523 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1502 WRITE_PATCH(PATCH_H_LAUNCH_WORD2, 1524 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1503 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); 1525 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1504 } 1526 }
1505 1527
1506 if (consumer & NVGPU_GPU_COMPBITS_CDEV) { 1528 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1507 WRITE_PATCH(PATCH_V_LAUNCH_WORD1, 1529 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1508 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); 1530 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1509 WRITE_PATCH(PATCH_V_LAUNCH_WORD2, 1531 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1510 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); 1532 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1511 } else { 1533 } else {
1512 WRITE_PATCH(PATCH_V_LAUNCH_WORD1, 1534 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1513 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); 1535 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1514 WRITE_PATCH(PATCH_V_LAUNCH_WORD2, 1536 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1515 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); 1537 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1516 } 1538 }
1517#undef WRITE_PATCH 1539#undef WRITE_PATCH
1518 1540
1519 err = gk20a_cde_convert(g, dmabuf, 1541 err = gk20a_cde_convert(l, dmabuf,
1520 compbits_hoffset, 1542 compbits_hoffset,
1521 scatterbuffer_offset, 1543 scatterbuffer_offset,
1522 fence_in, submit_flags, 1544 fence_in, submit_flags,
@@ -1534,30 +1556,31 @@ out:
1534} 1556}
1535 1557
1536static int gk20a_buffer_convert_gpu_to_cde( 1558static int gk20a_buffer_convert_gpu_to_cde(
1537 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, 1559 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1538 u64 offset, u64 compbits_hoffset, u64 compbits_voffset, 1560 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1539 u64 scatterbuffer_offset, 1561 u64 scatterbuffer_offset,
1540 u32 width, u32 height, u32 block_height_log2, 1562 u32 width, u32 height, u32 block_height_log2,
1541 u32 submit_flags, struct nvgpu_fence *fence_in, 1563 u32 submit_flags, struct nvgpu_fence *fence_in,
1542 struct gk20a_buffer_state *state) 1564 struct gk20a_buffer_state *state)
1543{ 1565{
1566 struct gk20a *g = &l->g;
1544 int err = 0; 1567 int err = 0;
1545 1568
1546 if (!g->cde_app.initialised) 1569 if (!l->cde_app.initialised)
1547 return -ENOSYS; 1570 return -ENOSYS;
1548 1571
1549 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", 1572 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1550 g->cde_app.firmware_version); 1573 l->cde_app.firmware_version);
1551 1574
1552 if (g->cde_app.firmware_version == 1) { 1575 if (l->cde_app.firmware_version == 1) {
1553 err = gk20a_buffer_convert_gpu_to_cde_v1( 1576 err = gk20a_buffer_convert_gpu_to_cde_v1(
1554 g, dmabuf, consumer, offset, compbits_hoffset, 1577 l, dmabuf, consumer, offset, compbits_hoffset,
1555 compbits_voffset, scatterbuffer_offset, 1578 compbits_voffset, scatterbuffer_offset,
1556 width, height, block_height_log2, 1579 width, height, block_height_log2,
1557 submit_flags, fence_in, state); 1580 submit_flags, fence_in, state);
1558 } else { 1581 } else {
1559 nvgpu_err(g, "unsupported CDE firmware version %d", 1582 nvgpu_err(g, "unsupported CDE firmware version %d",
1560 g->cde_app.firmware_version); 1583 l->cde_app.firmware_version);
1561 err = -EINVAL; 1584 err = -EINVAL;
1562 } 1585 }
1563 1586
@@ -1565,7 +1588,7 @@ static int gk20a_buffer_convert_gpu_to_cde(
1565} 1588}
1566 1589
1567int gk20a_prepare_compressible_read( 1590int gk20a_prepare_compressible_read(
1568 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 1591 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1569 u64 compbits_hoffset, u64 compbits_voffset, 1592 u64 compbits_hoffset, u64 compbits_voffset,
1570 u64 scatterbuffer_offset, 1593 u64 scatterbuffer_offset,
1571 u32 width, u32 height, u32 block_height_log2, 1594 u32 width, u32 height, u32 block_height_log2,
@@ -1573,6 +1596,7 @@ int gk20a_prepare_compressible_read(
1573 u32 *valid_compbits, u32 *zbc_color, 1596 u32 *valid_compbits, u32 *zbc_color,
1574 struct gk20a_fence **fence_out) 1597 struct gk20a_fence **fence_out)
1575{ 1598{
1599 struct gk20a *g = &l->g;
1576 int err = 0; 1600 int err = 0;
1577 struct gk20a_buffer_state *state; 1601 struct gk20a_buffer_state *state;
1578 struct dma_buf *dmabuf; 1602 struct dma_buf *dmabuf;
@@ -1606,7 +1630,7 @@ int gk20a_prepare_compressible_read(
1606 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && 1630 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1607 missing_cde_bits) { 1631 missing_cde_bits) {
1608 err = gk20a_buffer_convert_gpu_to_cde( 1632 err = gk20a_buffer_convert_gpu_to_cde(
1609 g, dmabuf, 1633 l, dmabuf,
1610 missing_cde_bits, 1634 missing_cde_bits,
1611 offset, compbits_hoffset, 1635 offset, compbits_hoffset,
1612 compbits_voffset, scatterbuffer_offset, 1636 compbits_voffset, scatterbuffer_offset,
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/common/linux/cde.h
index 4f400bf3..22732a2a 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/common/linux/cde.h
@@ -19,8 +19,6 @@
19#ifndef _CDE_GK20A_H_ 19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_ 20#define _CDE_GK20A_H_
21 21
22#include "mm_gk20a.h"
23
24#define MAX_CDE_BUFS 10 22#define MAX_CDE_BUFS 10
25#define MAX_CDE_PARAMS 64 23#define MAX_CDE_PARAMS 64
26#define MAX_CDE_USER_PARAMS 40 24#define MAX_CDE_USER_PARAMS 40
@@ -214,7 +212,7 @@ struct gk20a_cde_param {
214}; 212};
215 213
216struct gk20a_cde_ctx { 214struct gk20a_cde_ctx {
217 struct gk20a *g; 215 struct nvgpu_os_linux *l;
218 struct device *dev; 216 struct device *dev;
219 217
220 /* channel related data */ 218 /* channel related data */
@@ -284,11 +282,11 @@ struct gk20a_cde_app {
284 u32 shader_parameter; 282 u32 shader_parameter;
285}; 283};
286 284
287void gk20a_cde_destroy(struct gk20a *g); 285void gk20a_cde_destroy(struct nvgpu_os_linux *l);
288void gk20a_cde_suspend(struct gk20a *g); 286void gk20a_cde_suspend(struct nvgpu_os_linux *l);
289int gk20a_init_cde_support(struct gk20a *g); 287int gk20a_init_cde_support(struct nvgpu_os_linux *l);
290int gk20a_cde_reload(struct gk20a *g); 288int gk20a_cde_reload(struct nvgpu_os_linux *l);
291int gk20a_cde_convert(struct gk20a *g, 289int gk20a_cde_convert(struct nvgpu_os_linux *l,
292 struct dma_buf *compbits_buf, 290 struct dma_buf *compbits_buf,
293 u64 compbits_byte_offset, 291 u64 compbits_byte_offset,
294 u64 scatterbuffer_byte_offset, 292 u64 scatterbuffer_byte_offset,
@@ -297,7 +295,7 @@ int gk20a_cde_convert(struct gk20a *g,
297 int num_params, struct gk20a_fence **fence_out); 295 int num_params, struct gk20a_fence **fence_out);
298 296
299int gk20a_prepare_compressible_read( 297int gk20a_prepare_compressible_read(
300 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, 298 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
301 u64 compbits_hoffset, u64 compbits_voffset, 299 u64 compbits_hoffset, u64 compbits_voffset,
302 u64 scatterbuffer_offset, 300 u64 scatterbuffer_offset,
303 u32 width, u32 height, u32 block_height_log2, 301 u32 width, u32 height, u32 block_height_log2,
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
index 40cc64a4..cbea83b9 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -22,8 +22,8 @@
22static ssize_t gk20a_cde_reload_write(struct file *file, 22static ssize_t gk20a_cde_reload_write(struct file *file,
23 const char __user *userbuf, size_t count, loff_t *ppos) 23 const char __user *userbuf, size_t count, loff_t *ppos)
24{ 24{
25 struct gk20a *g = file->private_data; 25 struct nvgpu_os_linux *l = file->private_data;
26 gk20a_cde_reload(g); 26 gk20a_cde_reload(l);
27 return count; 27 return count;
28} 28}
29 29
@@ -41,13 +41,13 @@ void gk20a_cde_debugfs_init(struct gk20a *g)
41 return; 41 return;
42 42
43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, 43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
44 l->debugfs, &g->cde_app.shader_parameter); 44 l->debugfs, &l->cde_app.shader_parameter);
45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, 45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
46 l->debugfs, &g->cde_app.ctx_count); 46 l->debugfs, &l->cde_app.ctx_count);
47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, 47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
48 l->debugfs, &g->cde_app.ctx_usecount); 48 l->debugfs, &l->cde_app.ctx_usecount);
49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, 49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
50 l->debugfs, &g->cde_app.ctx_count_top); 50 l->debugfs, &l->cde_app.ctx_count_top);
51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, 51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
52 g, &gk20a_cde_reload_fops); 52 l, &gk20a_cde_reload_fops);
53} 53}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
index 0d79b143..0357f098 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -138,6 +138,7 @@ static int gk20a_ctrl_prepare_compressible_read(
138 struct gk20a *g, 138 struct gk20a *g,
139 struct nvgpu_gpu_prepare_compressible_read_args *args) 139 struct nvgpu_gpu_prepare_compressible_read_args *args)
140{ 140{
141 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
141 struct nvgpu_fence fence; 142 struct nvgpu_fence fence;
142 struct gk20a_fence *fence_out = NULL; 143 struct gk20a_fence *fence_out = NULL;
143 int ret = 0; 144 int ret = 0;
@@ -146,7 +147,7 @@ static int gk20a_ctrl_prepare_compressible_read(
146 fence.id = args->fence.syncpt_id; 147 fence.id = args->fence.syncpt_id;
147 fence.value = args->fence.syncpt_value; 148 fence.value = args->fence.syncpt_value;
148 149
149 ret = gk20a_prepare_compressible_read(g, args->handle, 150 ret = gk20a_prepare_compressible_read(l, args->handle,
150 args->request_compbits, args->offset, 151 args->request_compbits, args->offset,
151 args->compbits_hoffset, args->compbits_voffset, 152 args->compbits_hoffset, args->compbits_voffset,
152 args->scatterbuffer_offset, 153 args->scatterbuffer_offset,
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 6a590baa..509930c7 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -39,6 +39,7 @@
39#include "pci.h" 39#include "pci.h"
40#include "module.h" 40#include "module.h"
41#include "intr.h" 41#include "intr.h"
42#include "cde.h"
42#ifdef CONFIG_TEGRA_19x_GPU 43#ifdef CONFIG_TEGRA_19x_GPU
43#include "nvgpu_gpuid_t19x.h" 44#include "nvgpu_gpuid_t19x.h"
44#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 45#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -185,7 +186,7 @@ int gk20a_pm_finalize_poweron(struct device *dev)
185 gk20a_scale_resume(dev_from_gk20a(g)); 186 gk20a_scale_resume(dev_from_gk20a(g));
186 187
187 if (platform->has_cde) 188 if (platform->has_cde)
188 gk20a_init_cde_support(g); 189 gk20a_init_cde_support(l);
189 190
190done: 191done:
191 if (err) 192 if (err)
@@ -197,6 +198,7 @@ done:
197static int gk20a_pm_prepare_poweroff(struct device *dev) 198static int gk20a_pm_prepare_poweroff(struct device *dev)
198{ 199{
199 struct gk20a *g = get_gk20a(dev); 200 struct gk20a *g = get_gk20a(dev);
201 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
200 int ret = 0; 202 int ret = 0;
201 struct gk20a_platform *platform = gk20a_get_platform(dev); 203 struct gk20a_platform *platform = gk20a_get_platform(dev);
202 204
@@ -207,8 +209,15 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
207 if (!g->power_on) 209 if (!g->power_on)
208 goto done; 210 goto done;
209 211
212 if (gk20a_fifo_is_engine_busy(g)) {
213 ret = -EBUSY;
214 goto done;
215 }
216
210 gk20a_scale_suspend(dev); 217 gk20a_scale_suspend(dev);
211 218
219 gk20a_cde_suspend(l);
220
212 ret = gk20a_prepare_poweroff(g); 221 ret = gk20a_prepare_poweroff(g);
213 if (ret) 222 if (ret)
214 goto error; 223 goto error;
@@ -974,6 +983,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
974{ 983{
975 struct device *dev = &pdev->dev; 984 struct device *dev = &pdev->dev;
976 struct gk20a *g = get_gk20a(dev); 985 struct gk20a *g = get_gk20a(dev);
986 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
977 struct gk20a_platform *platform = gk20a_get_platform(dev); 987 struct gk20a_platform *platform = gk20a_get_platform(dev);
978 988
979 gk20a_dbg_fn(""); 989 gk20a_dbg_fn("");
@@ -982,7 +992,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
982 return vgpu_remove(pdev); 992 return vgpu_remove(pdev);
983 993
984 if (platform->has_cde) 994 if (platform->has_cde)
985 gk20a_cde_destroy(g); 995 gk20a_cde_destroy(l);
986 996
987 gk20a_ctxsw_trace_cleanup(g); 997 gk20a_ctxsw_trace_cleanup(g);
988 998
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h
index ed8364a9..160a5738 100644
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/os_linux.h
@@ -19,6 +19,7 @@
19#include <linux/cdev.h> 19#include <linux/cdev.h>
20 20
21#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
22#include "cde.h"
22 23
23struct nvgpu_os_linux { 24struct nvgpu_os_linux {
24 struct gk20a g; 25 struct gk20a g;
@@ -108,6 +109,7 @@ struct nvgpu_os_linux {
108 struct dentry *debugfs_force_preemption_gfxp; 109 struct dentry *debugfs_force_preemption_gfxp;
109 struct dentry *debugfs_dump_ctxsw_stats; 110 struct dentry *debugfs_dump_ctxsw_stats;
110#endif 111#endif
112 struct gk20a_cde_app cde_app;
111}; 113};
112 114
113static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) 115static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0cd314d6..63ea5bc4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -117,9 +117,6 @@ int gk20a_prepare_poweroff(struct gk20a *g)
117 if (gk20a_fifo_is_engine_busy(g)) 117 if (gk20a_fifo_is_engine_busy(g))
118 return -EBUSY; 118 return -EBUSY;
119 119
120 /* cancel any pending cde work */
121 gk20a_cde_suspend(g);
122
123 gk20a_ce_suspend(g); 120 gk20a_ce_suspend(g);
124 121
125 ret = gk20a_channel_suspend(g); 122 ret = gk20a_channel_suspend(g);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ab715bdc..69cb2253 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -60,7 +60,6 @@ struct nvgpu_cpu_time_correlation_sample;
60#include "pmu_gk20a.h" 60#include "pmu_gk20a.h"
61#include "priv_ring_gk20a.h" 61#include "priv_ring_gk20a.h"
62#include "therm_gk20a.h" 62#include "therm_gk20a.h"
63#include "cde_gk20a.h"
64#include "sched_gk20a.h" 63#include "sched_gk20a.h"
65#ifdef CONFIG_ARCH_TEGRA_18x_SOC 64#ifdef CONFIG_ARCH_TEGRA_18x_SOC
66#include "clk/clk.h" 65#include "clk/clk.h"
@@ -928,6 +927,7 @@ struct gpu_ops {
928 struct { 927 struct {
929 void (*get_program_numbers)(struct gk20a *g, 928 void (*get_program_numbers)(struct gk20a *g,
930 u32 block_height_log2, 929 u32 block_height_log2,
930 u32 shader_parameter,
931 int *hprog, int *vprog); 931 int *hprog, int *vprog);
932 bool (*need_scatter_buffer)(struct gk20a *g); 932 bool (*need_scatter_buffer)(struct gk20a *g);
933 int (*populate_scatter_buffer)(struct gk20a *g, 933 int (*populate_scatter_buffer)(struct gk20a *g,
@@ -1217,7 +1217,6 @@ struct gk20a {
1217 1217
1218 struct gk20a_sched_ctrl sched_ctrl; 1218 struct gk20a_sched_ctrl sched_ctrl;
1219 1219
1220 struct gk20a_cde_app cde_app;
1221 bool mmu_debug_ctrl; 1220 bool mmu_debug_ctrl;
1222 1221
1223 u32 tpc_fs_mask_user; 1222 u32 tpc_fs_mask_user;
diff --git a/drivers/gpu/nvgpu/gm20b/cde_gm20b.c b/drivers/gpu/nvgpu/gm20b/cde_gm20b.c
index f8267d1d..de7cf872 100644
--- a/drivers/gpu/nvgpu/gm20b/cde_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/cde_gm20b.c
@@ -28,15 +28,16 @@ enum programs {
28 28
29void gm20b_cde_get_program_numbers(struct gk20a *g, 29void gm20b_cde_get_program_numbers(struct gk20a *g,
30 u32 block_height_log2, 30 u32 block_height_log2,
31 u32 shader_parameter,
31 int *hprog_out, int *vprog_out) 32 int *hprog_out, int *vprog_out)
32{ 33{
33 int hprog = PROG_HPASS; 34 int hprog = PROG_HPASS;
34 int vprog = (block_height_log2 >= 2) ? 35 int vprog = (block_height_log2 >= 2) ?
35 PROG_VPASS_LARGE : PROG_VPASS_SMALL; 36 PROG_VPASS_LARGE : PROG_VPASS_SMALL;
36 if (g->cde_app.shader_parameter == 1) { 37 if (shader_parameter == 1) {
37 hprog = PROG_PASSTHROUGH; 38 hprog = PROG_PASSTHROUGH;
38 vprog = PROG_PASSTHROUGH; 39 vprog = PROG_PASSTHROUGH;
39 } else if (g->cde_app.shader_parameter == 2) { 40 } else if (shader_parameter == 2) {
40 hprog = PROG_HPASS_DEBUG; 41 hprog = PROG_HPASS_DEBUG;
41 vprog = (block_height_log2 >= 2) ? 42 vprog = (block_height_log2 >= 2) ?
42 PROG_VPASS_LARGE_DEBUG : 43 PROG_VPASS_LARGE_DEBUG :
diff --git a/drivers/gpu/nvgpu/gm20b/cde_gm20b.h b/drivers/gpu/nvgpu/gm20b/cde_gm20b.h
index f2ea20a0..0ea423ad 100644
--- a/drivers/gpu/nvgpu/gm20b/cde_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/cde_gm20b.h
@@ -20,6 +20,7 @@ struct gk20a;
20 20
21void gm20b_cde_get_program_numbers(struct gk20a *g, 21void gm20b_cde_get_program_numbers(struct gk20a *g,
22 u32 block_height_log2, 22 u32 block_height_log2,
23 u32 shader_parameter,
23 int *hprog_out, int *vprog_out); 24 int *hprog_out, int *vprog_out);
24 25
25#endif 26#endif
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
index 685ddbc4..1ddbcba6 100644
--- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c
@@ -32,17 +32,18 @@ enum gp10b_programs {
32 32
33void gp10b_cde_get_program_numbers(struct gk20a *g, 33void gp10b_cde_get_program_numbers(struct gk20a *g,
34 u32 block_height_log2, 34 u32 block_height_log2,
35 u32 shader_parameter,
35 int *hprog_out, int *vprog_out) 36 int *hprog_out, int *vprog_out)
36{ 37{
37 int hprog, vprog; 38 int hprog, vprog;
38 39
39 if (g->cde_app.shader_parameter == 1) { 40 if (shader_parameter == 1) {
40 hprog = GP10B_PROG_PASSTHROUGH; 41 hprog = GP10B_PROG_PASSTHROUGH;
41 vprog = GP10B_PROG_PASSTHROUGH; 42 vprog = GP10B_PROG_PASSTHROUGH;
42 } else { 43 } else {
43 hprog = GP10B_PROG_HPASS; 44 hprog = GP10B_PROG_HPASS;
44 vprog = GP10B_PROG_VPASS; 45 vprog = GP10B_PROG_VPASS;
45 if (g->cde_app.shader_parameter == 2) { 46 if (shader_parameter == 2) {
46 hprog = GP10B_PROG_HPASS_DEBUG; 47 hprog = GP10B_PROG_HPASS_DEBUG;
47 vprog = GP10B_PROG_VPASS_DEBUG; 48 vprog = GP10B_PROG_VPASS_DEBUG;
48 } 49 }
diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
index 3ee6027c..7ccfe560 100644
--- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h
@@ -21,6 +21,7 @@ struct sg_table;
21 21
22void gp10b_cde_get_program_numbers(struct gk20a *g, 22void gp10b_cde_get_program_numbers(struct gk20a *g,
23 u32 block_height_log2, 23 u32 block_height_log2,
24 u32 shader_parameter,
24 int *hprog_out, int *vprog_out); 25 int *hprog_out, int *vprog_out);
25bool gp10b_need_scatter_buffer(struct gk20a *g); 26bool gp10b_need_scatter_buffer(struct gk20a *g);
26int gp10b_populate_scatter_buffer(struct gk20a *g, 27int gp10b_populate_scatter_buffer(struct gk20a *g,