summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorArto Merilainen <amerilainen@nvidia.com>2014-07-21 03:21:09 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:41 -0400
commitb3e023a8055d4346b30924a03a99286926e76a15 (patch)
tree8e798c4d6ad8335616df558ec820fb71608c2980 /drivers/gpu
parentc60a300c4ac903dd7e0b53f2542a081fa4c334cb (diff)
gpu: nvgpu: CDE support
This patch adds support for executing a precompiled GPU program to allow exporting GPU buffers to other graphics units that have color decompression engine (CDE) support. Bug 1409151 Change-Id: Id0c930923f2449b85a6555de71d7ec93eed238ae Signed-off-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-on: http://git-master/r/360418 Reviewed-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Kconfig8
-rw-r--r--drivers/gpu/nvgpu/gk20a/Makefile1
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c924
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.h254
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
6 files changed, 1194 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
index 315c4683..b863077a 100644
--- a/drivers/gpu/nvgpu/Kconfig
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -12,6 +12,14 @@ config GK20A_DEFAULT_TIMEOUT
12 help 12 help
13 Default timeout for jobs in milliseconds. Set to zero for no timeout. 13 Default timeout for jobs in milliseconds. Set to zero for no timeout.
14 14
15config GK20A_CDE
16 depends on GK20A
17 bool "Support compression bit switzzling through CDE"
18 default n
19 help
20 Say Y to allow compression bit swizzling
21 using pre-compiled shader.
22
15config GK20A_PMU 23config GK20A_PMU
16 bool "Support GK20A PMU" 24 bool "Support GK20A PMU"
17 depends on GK20A 25 depends on GK20A
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
index 246f9447..aa9237b4 100644
--- a/drivers/gpu/nvgpu/gk20a/Makefile
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -34,6 +34,7 @@ nvgpu-y := \
34 hal.o \ 34 hal.o \
35 hal_gk20a.o \ 35 hal_gk20a.o \
36 gk20a_allocator.o \ 36 gk20a_allocator.o \
37 cde_gk20a.o \
37 platform_gk20a_generic.o \ 38 platform_gk20a_generic.o \
38 tsg_gk20a.o 39 tsg_gk20a.o
39nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o 40nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
new file mode 100644
index 00000000..d01426be
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -0,0 +1,924 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/nvhost.h>
20#include <linux/dma-mapping.h>
21#include <linux/firmware.h>
22#include <linux/debugfs.h>
23#include <linux/dma-buf.h>
24
25#include "gk20a.h"
26#include "channel_gk20a.h"
27#include "mm_gk20a.h"
28#include "cde_gk20a.h"
29#include "fence_gk20a.h"
30#include "gr_gk20a.h"
31#include "debug_gk20a.h"
32
33#include "hw_ccsr_gk20a.h"
34#include "hw_pbdma_gk20a.h"
35
36void gk20a_cde_dump(struct gk20a_cde_ctx *cde_ctx)
37{
38 int i;
39 for (i = 0; i < cde_ctx->num_bufs; i++) {
40 struct gk20a_cde_mem_desc *target_mem = cde_ctx->mem + i;
41 u32 *target_mem_ptr = target_mem->cpuva;
42 int j = 0;
43
44 gk20a_dbg(gpu_dbg_cde, "cde: buffer=%d, size=%zu, gpuva=%llx\n",
45 i, target_mem->num_bytes, target_mem->gpu_va);
46
47 for (j = 0; j < target_mem->num_bytes / sizeof(u32); j++)
48 gk20a_dbg(gpu_dbg_cde, "0x%08x ", target_mem_ptr[j]);
49 gk20a_dbg(gpu_dbg_cde, "\n\n");
50 }
51}
52
53static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
54{
55 struct device *dev = &cde_ctx->pdev->dev;
56 int i;
57
58 for (i = 0; i < cde_ctx->num_bufs; i++) {
59 struct gk20a_cde_mem_desc *mem = cde_ctx->mem + i;
60 gk20a_gmmu_unmap(cde_ctx->vm, mem->gpu_va, mem->num_bytes, 1);
61 gk20a_free_sgtable(&mem->sgt);
62 dma_free_coherent(dev, mem->num_bytes, mem->cpuva, mem->iova);
63 }
64
65 for (i = 0; i < cde_ctx->num_obj_ids; i++)
66 gk20a_free_obj_ctx(cde_ctx->ch,
67 &(struct nvhost_free_obj_ctx_args)
68 { cde_ctx->obj_ids[i] });
69
70 kfree(cde_ctx->init_cmd);
71 kfree(cde_ctx->convert_cmd);
72
73 cde_ctx->convert_cmd = NULL;
74 cde_ctx->init_cmd = NULL;
75 cde_ctx->num_bufs = 0;
76 cde_ctx->num_obj_ids = 0;
77 cde_ctx->num_params = 0;
78 cde_ctx->init_cmd_num_entries = 0;
79 cde_ctx->convert_cmd_num_entries = 0;
80}
81
82static int gk20a_cde_remove(struct gk20a_cde_ctx *cde_ctx)
83{
84 struct gk20a *g = cde_ctx->g;
85 struct channel_gk20a *ch = cde_ctx->ch;
86 struct vm_gk20a *vm = ch->vm;
87
88 /* free the channel */
89 gk20a_free_channel(cde_ctx->ch, true);
90
91 /* ..then release mapped memory */
92 gk20a_deinit_cde_img(cde_ctx);
93 gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr,
94 g->gr.compbit_store.size, 1);
95
96 return 0;
97}
98
99int gk20a_cde_destroy(struct gk20a *g)
100{
101 struct gk20a_cde_app *cde_app = &g->cde_app;
102 struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx;
103 int ret, i;
104
105 if (!cde_app->initialised)
106 return 0;
107
108 for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++)
109 ret = gk20a_cde_remove(cde_ctx);
110
111 cde_app->initialised = false;
112 return ret;
113}
114
115static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
116 const struct firmware *img,
117 struct gk20a_cde_hdr_buf *buf)
118{
119 struct device *dev = &cde_ctx->pdev->dev;
120 struct gk20a_cde_mem_desc *mem;
121 int err;
122
123 /* check that the file can hold the buf */
124 if (buf->data_byte_offset != 0 &&
125 buf->data_byte_offset + buf->num_bytes > img->size) {
126 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid data section. buffer idx = %d",
127 cde_ctx->num_bufs);
128 return -EINVAL;
129 }
130
131 /* check that we have enough buf elems available */
132 if (cde_ctx->num_bufs > MAX_CDE_BUFS) {
133 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid data section. buffer idx = %d",
134 cde_ctx->num_bufs);
135 return -ENOMEM;
136 }
137
138 /* allocate buf */
139 mem = cde_ctx->mem + cde_ctx->num_bufs;
140 mem->num_bytes = buf->num_bytes;
141 mem->cpuva = dma_alloc_coherent(dev, mem->num_bytes, &mem->iova,
142 GFP_KERNEL);
143 if (!mem->cpuva) {
144 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d",
145 cde_ctx->num_bufs);
146 return -ENOMEM;
147 }
148
149 err = gk20a_get_sgtable(dev, &mem->sgt, mem->cpuva, mem->iova,
150 mem->num_bytes);
151 if (err) {
152 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not get sg table. buffer idx = %d",
153 cde_ctx->num_bufs);
154 err = -ENOMEM;
155 goto err_get_sgtable;
156 }
157
158 mem->gpu_va = gk20a_gmmu_map(cde_ctx->vm, &mem->sgt, mem->num_bytes, 0,
159 gk20a_mem_flag_none);
160 if (!mem->gpu_va) {
161 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not map buffer to gpuva. buffer idx = %d",
162 cde_ctx->num_bufs);
163 err = -ENOMEM;
164 goto err_map_buffer;
165 }
166
167 /* copy the content */
168 if (buf->data_byte_offset != 0)
169 memcpy(mem->cpuva, img->data + buf->data_byte_offset,
170 buf->num_bytes);
171
172 cde_ctx->num_bufs++;
173
174 return 0;
175
176err_map_buffer:
177 gk20a_free_sgtable(&mem->sgt);
178 kfree(mem->sgt);
179err_get_sgtable:
180 dma_free_coherent(dev, mem->num_bytes, &mem->cpuva, mem->iova);
181 return err;
182}
183
184static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
185 int type, s32 shift, u64 mask, u64 value)
186{
187 u32 *target_mem_ptr = target;
188 u64 *target_mem_ptr_u64 = target;
189 u64 current_value, new_value;
190
191 value = (shift >= 0) ? value << shift : value >> -shift;
192 value &= mask;
193
194 /* read current data from the location */
195 if (type == TYPE_PARAM_TYPE_U32)
196 current_value = *target_mem_ptr;
197 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
198 current_value = *target_mem_ptr_u64;
199 else if (type == TYPE_PARAM_TYPE_U64_BIG) {
200 current_value = *target_mem_ptr_u64;
201 current_value = (u64)(current_value >> 32) |
202 (u64)(current_value << 32);
203 } else {
204 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown type. type=%d",
205 type);
206 return -EINVAL;
207 }
208
209 current_value &= ~mask;
210 new_value = current_value | value;
211
212 /* store the element data back */
213 if (type == TYPE_PARAM_TYPE_U32)
214 *target_mem_ptr = (u32)new_value;
215 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
216 *target_mem_ptr_u64 = new_value;
217 else {
218 new_value = (u64)(new_value >> 32) |
219 (u64)(new_value << 32);
220 *target_mem_ptr_u64 = new_value;
221 }
222
223 return 0;
224}
225
226static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
227 const struct firmware *img,
228 struct gk20a_cde_hdr_replace *replace)
229{
230 struct gk20a_cde_mem_desc *source_mem;
231 struct gk20a_cde_mem_desc *target_mem;
232 u32 *target_mem_ptr;
233 u64 vaddr;
234 int err;
235
236 if (replace->target_buf >= cde_ctx->num_bufs ||
237 replace->source_buf >= cde_ctx->num_bufs) {
238 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
239 replace->target_buf, replace->source_buf,
240 cde_ctx->num_bufs);
241 return -EINVAL;
242 }
243
244 source_mem = cde_ctx->mem + replace->source_buf;
245 target_mem = cde_ctx->mem + replace->target_buf;
246 target_mem_ptr = target_mem->cpuva;
247
248 if (source_mem->num_bytes < (replace->source_byte_offset + 3) ||
249 target_mem->num_bytes < (replace->target_byte_offset + 3)) {
250 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
251 replace->target_byte_offset,
252 replace->source_byte_offset,
253 source_mem->num_bytes,
254 target_mem->num_bytes);
255 return -EINVAL;
256 }
257
258 /* calculate the target pointer */
259 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
260
261 /* determine patch value */
262 vaddr = source_mem->gpu_va + replace->source_byte_offset;
263 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
264 replace->shift, replace->mask,
265 vaddr);
266 if (err) {
267 gk20a_warn(&cde_ctx->pdev->dev, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
268 err, replace->target_buf,
269 replace->target_byte_offset,
270 replace->source_buf,
271 replace->source_byte_offset);
272 }
273
274 return err;
275}
276
277static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
278{
279 struct gk20a *g = cde_ctx->g;
280 struct gk20a_cde_mem_desc *target_mem;
281 u32 *target_mem_ptr;
282 u64 new_data;
283 int user_id = 0, i, err;
284
285 for (i = 0; i < cde_ctx->num_params; i++) {
286 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
287 target_mem = cde_ctx->mem + param->target_buf;
288 target_mem_ptr = target_mem->cpuva;
289 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
290
291 switch (param->id) {
292 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
293 new_data = g->gr.comptags_per_cacheline;
294 break;
295 case TYPE_PARAM_GPU_CONFIGURATION:
296 new_data = g->ltc_count * g->gr.slices_per_ltc *
297 g->gr.cacheline_size;
298 break;
299 case TYPE_PARAM_FIRSTPAGEOFFSET:
300 new_data = cde_ctx->src_param_offset;
301 break;
302 case TYPE_PARAM_NUMPAGES:
303 new_data = cde_ctx->src_param_lines;
304 break;
305 case TYPE_PARAM_BACKINGSTORE:
306 new_data = cde_ctx->backing_store_vaddr;
307 break;
308 case TYPE_PARAM_DESTINATION:
309 new_data = cde_ctx->dest_vaddr;
310 break;
311 case TYPE_PARAM_DESTINATION_SIZE:
312 new_data = cde_ctx->dest_size;
313 break;
314 case TYPE_PARAM_BACKINGSTORE_SIZE:
315 new_data = g->gr.compbit_store.size;
316 break;
317 case TYPE_PARAM_SOURCE_SMMU_ADDR:
318 new_data = gk20a_mm_gpuva_to_iova(cde_ctx->vm,
319 cde_ctx->src_vaddr);
320 if (new_data == 0)
321 err = -EINVAL;
322 break;
323 default:
324 user_id = param->id - NUM_RESERVED_PARAMS;
325 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
326 continue;
327 new_data = cde_ctx->user_param_values[user_id];
328 }
329
330 gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
331 i, param->id, param->target_buf,
332 param->target_byte_offset, new_data,
333 param->data_offset, param->type, param->shift,
334 param->mask);
335
336 new_data += param->data_offset;
337
338 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
339 param->shift, param->mask, new_data);
340
341 if (err) {
342 gk20a_warn(&cde_ctx->pdev->dev, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
343 err, i, param->id, param->target_buf,
344 param->target_byte_offset, new_data);
345 return err;
346 }
347 }
348
349 return 0;
350}
351
352static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
353 const struct firmware *img,
354 struct gk20a_cde_hdr_param *param)
355{
356 struct gk20a_cde_mem_desc *target_mem;
357
358 if (param->target_buf >= cde_ctx->num_bufs) {
359 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
360 cde_ctx->num_params, param->target_buf,
361 cde_ctx->num_bufs);
362 return -EINVAL;
363 }
364
365 target_mem = cde_ctx->mem + param->target_buf;
366 if (target_mem->num_bytes < (param->target_byte_offset + 3)) {
367 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
368 cde_ctx->num_params, param->target_byte_offset,
369 target_mem->num_bytes);
370 return -EINVAL;
371 }
372
373 /* does this parameter fit into our parameter structure */
374 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
375 gk20a_warn(&cde_ctx->pdev->dev, "cde: no room for new parameters param idx = %d",
376 cde_ctx->num_params);
377 return -ENOMEM;
378 }
379
380 /* is the given id valid? */
381 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
382 gk20a_warn(&cde_ctx->pdev->dev, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
383 param->id, cde_ctx->num_params,
384 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
385 return -EINVAL;
386 }
387
388 cde_ctx->params[cde_ctx->num_params] = *param;
389 cde_ctx->num_params++;
390
391 return 0;
392}
393
394static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
395 const struct firmware *img,
396 u32 required_class)
397{
398 struct nvhost_alloc_obj_ctx_args alloc_obj_ctx;
399 int err;
400
401 if (cde_ctx->num_obj_ids >= MAX_CDE_OBJ_IDS) {
402 gk20a_warn(&cde_ctx->pdev->dev, "cde: running out of class ids");
403 return -ENOMEM;
404 }
405
406 alloc_obj_ctx.class_num = required_class;
407 alloc_obj_ctx.padding = 0;
408
409 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
410 if (err) {
411 gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d",
412 err);
413 return err;
414 }
415
416 cde_ctx->obj_ids[cde_ctx->num_obj_ids] = alloc_obj_ctx.obj_id;
417 cde_ctx->num_obj_ids++;
418
419 return 0;
420}
421
422static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
423 const struct firmware *img,
424 u32 op,
425 struct gk20a_cde_cmd_elem *cmd_elem,
426 u32 num_elems)
427{
428 struct nvhost_gpfifo **gpfifo, *gpfifo_elem;
429 u32 *num_entries;
430 int i;
431
432 /* check command type */
433 if (op == TYPE_BUF_COMMAND_INIT) {
434 gpfifo = &cde_ctx->init_cmd;
435 num_entries = &cde_ctx->init_cmd_num_entries;
436 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
437 gpfifo = &cde_ctx->convert_cmd;
438 num_entries = &cde_ctx->convert_cmd_num_entries;
439 } else {
440 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown command. op=%u",
441 op);
442 return -EINVAL;
443 }
444
445 /* allocate gpfifo entries to be pushed */
446 *gpfifo = kzalloc(sizeof(struct nvhost_gpfifo) * num_elems,
447 GFP_KERNEL);
448 if (!*gpfifo) {
449 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate memory for gpfifo entries");
450 return -ENOMEM;
451 }
452
453 gpfifo_elem = *gpfifo;
454 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
455 struct gk20a_cde_mem_desc *target_mem;
456
457 /* validate the current entry */
458 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
459 gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer is not available (target=%u, num_bufs=%u)",
460 cmd_elem->target_buf, cde_ctx->num_bufs);
461 return -EINVAL;
462 }
463
464 target_mem = cde_ctx->mem + cmd_elem->target_buf;
465 if (target_mem->num_bytes <
466 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
467 gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
468 target_mem->num_bytes,
469 cmd_elem->target_byte_offset,
470 cmd_elem->num_bytes);
471 return -EINVAL;
472 }
473
474 /* store the element into gpfifo */
475 gpfifo_elem->entry0 =
476 u64_lo32(target_mem->gpu_va +
477 cmd_elem->target_byte_offset);
478 gpfifo_elem->entry1 =
479 u64_hi32(target_mem->gpu_va +
480 cmd_elem->target_byte_offset) |
481 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
482 sizeof(u32));
483 }
484
485 *num_entries = num_elems;
486 return 0;
487}
488
489static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
490 const struct firmware *img)
491{
492 u32 *data = (u32 *)img->data;
493 u32 version, num_of_elems;
494 struct gk20a_cde_hdr_elem *elem;
495 u32 min_size = 0;
496 int err = 0;
497 int i;
498
499 min_size += 2 * sizeof(u32);
500 if (img->size < min_size) {
501 gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid image header");
502 return -EINVAL;
503 }
504
505 version = data[0];
506 num_of_elems = data[1];
507
508 min_size += num_of_elems * sizeof(*elem);
509 if (img->size < min_size) {
510 gk20a_warn(&cde_ctx->pdev->dev, "cde: bad image");
511 return -EINVAL;
512 }
513
514 elem = (struct gk20a_cde_hdr_elem *)&data[2];
515 for (i = 0; i < num_of_elems; i++) {
516 int err = 0;
517 switch (elem->type) {
518 case TYPE_BUF:
519 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
520 break;
521 case TYPE_REPLACE:
522 err = gk20a_init_cde_replace(cde_ctx, img,
523 &elem->replace);
524 break;
525 case TYPE_PARAM:
526 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
527 break;
528 case TYPE_REQUIRED_CLASS:
529 err = gk20a_init_cde_required_class(cde_ctx, img,
530 elem->required_class);
531 break;
532 case TYPE_COMMAND:
533 {
534 struct gk20a_cde_cmd_elem *cmd = (void *)
535 &img->data[elem->command.data_byte_offset];
536 err = gk20a_init_cde_command(cde_ctx, img,
537 elem->command.op, cmd,
538 elem->command.num_entries);
539 break;
540 }
541 default:
542 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown header element");
543 err = -EINVAL;
544 }
545
546 if (err)
547 goto deinit_image;
548
549 elem++;
550 }
551
552 if (!cde_ctx->init_cmd || !cde_ctx->init_cmd_num_entries) {
553 gk20a_warn(&cde_ctx->pdev->dev, "cde: convert command not defined");
554 err = -EINVAL;
555 goto deinit_image;
556 }
557
558 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
559 gk20a_warn(&cde_ctx->pdev->dev, "cde: convert command not defined");
560 err = -EINVAL;
561 goto deinit_image;
562 }
563
564 return 0;
565
566deinit_image:
567 gk20a_deinit_cde_img(cde_ctx);
568 return err;
569}
570
571static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
572 u32 op, struct nvhost_fence *fence,
573 u32 flags, struct gk20a_fence **fence_out)
574{
575 struct nvhost_gpfifo *gpfifo = NULL;
576 int num_entries = 0;
577
578 /* check command type */
579 if (op == TYPE_BUF_COMMAND_INIT) {
580 gpfifo = cde_ctx->init_cmd;
581 num_entries = cde_ctx->init_cmd_num_entries;
582 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
583 gpfifo = cde_ctx->convert_cmd;
584 num_entries = cde_ctx->convert_cmd_num_entries;
585 } else {
586 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown buffer");
587 return -EINVAL;
588 }
589
590 if (gpfifo == NULL || num_entries == 0) {
591 gk20a_warn(&cde_ctx->pdev->dev, "cde: buffer not available");
592 return -ENOSYS;
593 }
594
595 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo,
596 num_entries, flags, fence, fence_out);
597}
598
599int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd,
600 s32 dst_kind, u64 dst_byte_offset,
601 u32 dst_size, struct nvhost_fence *fence,
602 u32 __flags, struct gk20a_cde_param *params,
603 int num_params, struct gk20a_fence **fence_out)
604{
605 struct gk20a_cde_app *cde_app = &g->cde_app;
606 struct gk20a_comptags comptags;
607 struct gk20a_cde_ctx *cde_ctx;
608 struct dma_buf *src = NULL, *dst = NULL;
609 u64 dst_vaddr = 0, src_vaddr = 0;
610 u32 flags;
611 int err, i;
612
613 if (!cde_app->initialised) {
614 gk20a_warn(&g->dev->dev, "cde: conversion requrest but no image has been provided");
615 return -ENOSYS;
616 }
617
618 mutex_lock(&cde_app->mutex);
619
620 /* pick next free cde context */
621 cde_ctx = cde_app->cde_ctx + cde_app->cde_ctx_ptr;
622 cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) %
623 ARRAY_SIZE(cde_app->cde_ctx);
624
625 /* First, get buffer references and map the buffers to local va */
626
627 dst = dma_buf_get(dst_fd);
628 if (IS_ERR(src)) {
629 dst = NULL;
630 err = -EINVAL;
631 goto exit_unlock;
632 }
633
634 /* ensure that the dst buffer has drvdata */
635 err = gk20a_dmabuf_alloc_drvdata(dst, &g->dev->dev);
636 if (err)
637 goto exit_unlock;
638
639 /* map the destination buffer */
640 dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0,
641 0, dst_kind, NULL, true,
642 gk20a_mem_flag_none,
643 0, 0);
644 if (!dst_vaddr) {
645 err = -EINVAL;
646 goto exit_unlock;
647 }
648
649 src = dma_buf_get(src_fd);
650 if (IS_ERR(src)) {
651 src = NULL;
652 err = -EINVAL;
653 goto exit_unlock;
654 }
655
656 /* ensure that the src buffer has drvdata */
657 err = gk20a_dmabuf_alloc_drvdata(src, &g->dev->dev);
658 if (err)
659 goto exit_unlock;
660
661 /* map the source buffer to prevent premature release */
662 src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0,
663 0, dst_kind, NULL, true,
664 gk20a_mem_flag_none,
665 0, 0);
666 if (!src_vaddr) {
667 err = -EINVAL;
668 goto exit_unlock;
669 }
670
671 if (!dst_size)
672 dst_size = dst->size - dst_byte_offset;
673
674 /* reload buffer converter if it has failed */
675 if (cde_ctx->ch->has_timedout) {
676 mutex_unlock(&cde_app->mutex);
677 err = gk20a_cde_reload(g);
678 if (err)
679 return err;
680 mutex_lock(&cde_app->mutex);
681 }
682
683 /* wait for channel idle */
684 err = gk20a_channel_finish(cde_ctx->ch, 2000);
685 if (err) {
686 gk20a_warn(&cde_ctx->pdev->dev, "cde: old work could not be finished");
687 goto exit_unlock;
688 }
689
690 /* disable the channel */
691 gk20a_writel(g, ccsr_channel_r(cde_ctx->ch->hw_chid),
692 gk20a_readl(g, ccsr_channel_r(cde_ctx->ch->hw_chid)) |
693 ccsr_channel_enable_clr_true_f());
694 gk20a_fifo_preempt_channel(g, cde_ctx->ch->hw_chid);
695 channel_gk20a_unbind(&g->fifo.channel[cde_ctx->ch->hw_chid]);
696
697 /* reinitialise the graphics context of the channel */
698 gr_gk20a_load_golden_ctx_image(g, cde_ctx->ch);
699
700 /* re-enable the channel */
701 g->ops.fifo.bind_channel(&g->fifo.channel[cde_ctx->ch->hw_chid]);
702 gk20a_writel(g, ccsr_channel_r(cde_ctx->ch->hw_chid),
703 gk20a_readl(g, ccsr_channel_r(cde_ctx->ch->hw_chid)) |
704 ccsr_channel_enable_set_true_f());
705
706 /* store source buffer compression tags */
707 gk20a_get_comptags(&g->dev->dev, src, &comptags);
708 cde_ctx->src_vaddr = src_vaddr;
709 cde_ctx->src_param_offset = comptags.offset;
710 cde_ctx->src_param_lines = comptags.lines;
711
712 /* store information about destination */
713 cde_ctx->dest_vaddr = dst_vaddr + dst_byte_offset;
714 cde_ctx->dest_size = dst_size;
715
716 /* remove existing argument data */
717 memset(cde_ctx->user_param_values, 0,
718 sizeof(cde_ctx->user_param_values));
719
720 /* read user space arguments for the conversion */
721 for (i = 0; i < num_params; i++) {
722 struct gk20a_cde_param *param = params + i;
723 int id = param->id - NUM_RESERVED_PARAMS;
724
725 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
726 gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown user parameter");
727 err = -EINVAL;
728 goto exit_unlock;
729 }
730 cde_ctx->user_param_values[id] = param->value;
731 }
732
733 /* patch data */
734 err = gk20a_cde_patch_params(cde_ctx);
735 if (err) {
736 gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to patch parameters");
737 goto exit_unlock;
738 }
739
740 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
741 g->gr.compbit_store.size, cde_ctx->backing_store_vaddr);
742 gk20a_dbg(gpu_dbg_cde, "cde: buffer=dst, size=%llu, gpuva=%llx\n",
743 cde_ctx->dest_size, cde_ctx->dest_vaddr);
744 gk20a_cde_dump(cde_ctx);
745
746 /* execute the init push buffer */
747 err = gk20a_cde_execute_buffer(cde_ctx, TYPE_BUF_COMMAND_INIT,
748 NULL, 0, NULL);
749 if (err)
750 goto exit_unlock;
751
752 /* take always the postfence as it is needed for protecting the
753 * cde context */
754 flags = __flags | NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
755
756 /* execute the conversion buffer */
757 err = gk20a_cde_execute_buffer(cde_ctx, TYPE_BUF_COMMAND_CONVERT,
758 fence, flags, fence_out);
759
760exit_unlock:
761
762 /* unmap the buffers - channel holds references to them now */
763 if (dst_vaddr)
764 gk20a_vm_unmap(g->cde_app.vm, dst_vaddr);
765 if (src_vaddr)
766 gk20a_vm_unmap(g->cde_app.vm, src_vaddr);
767
768 /* drop dmabuf refs if work was aborted */
769 if (err && src)
770 dma_buf_put(src);
771 if (err && dst)
772 dma_buf_put(dst);
773
774 mutex_unlock(&cde_app->mutex);
775
776 return err;
777}
778
779int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
780{
781 struct gk20a *g = cde_ctx->g;
782 const struct firmware *img;
783 struct channel_gk20a *ch;
784 struct gr_gk20a *gr = &g->gr;
785 int err = 0;
786 u64 vaddr;
787
788 img = gk20a_request_firmware(g, "gpu2cde.bin");
789 if (!img) {
790 dev_err(&cde_ctx->pdev->dev, "cde: could not fetch the firmware");
791 return -ENOSYS;
792 }
793
794 ch = gk20a_open_new_channel(g);
795 if (!ch) {
796 gk20a_warn(&cde_ctx->pdev->dev, "cde: gk20a channel not available");
797 err = -ENOMEM;
798 goto err_get_gk20a_channel;
799 }
800
801 /* bind the channel to the vm */
802 gk20a_vm_get(&g->mm.pmu.vm);
803 ch->vm = &g->mm.pmu.vm;
804 err = channel_gk20a_commit_va(ch);
805 if (err) {
806 gk20a_warn(&cde_ctx->pdev->dev, "cde: could not bind vm");
807 goto err_commit_va;
808 }
809
810 /* allocate gpfifo (1024 should be more than enough) */
811 err = gk20a_alloc_channel_gpfifo(ch,
812 &(struct nvhost_alloc_gpfifo_args){1024, 0});
813 if (err) {
814 gk20a_warn(&cde_ctx->pdev->dev, "cde: unable to allocate gpfifo");
815 goto err_alloc_gpfifo;
816 }
817
818 /* map backing store to gpu virtual space */
819 vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt,
820 g->gr.compbit_store.size, 0,
821 gk20a_mem_flag_none);
822
823 if (!vaddr) {
824 gk20a_warn(&cde_ctx->pdev->dev, "cde: cannot map compression bit backing store");
825 goto err_map_backingstore;
826 }
827
828 /* store initialisation data */
829 cde_ctx->ch = ch;
830 cde_ctx->vm = ch->vm;
831 cde_ctx->backing_store_vaddr = vaddr;
832
833 /* initialise the firmware */
834 err = gk20a_init_cde_img(cde_ctx, img);
835 if (err) {
836 gk20a_warn(&cde_ctx->pdev->dev, "cde: image initialisation failed");
837 goto err_init_cde_img;
838 }
839
840 /* initialisation done */
841 release_firmware(img);
842
843 return 0;
844
845err_init_cde_img:
846 gk20a_gmmu_unmap(ch->vm, vaddr, g->gr.compbit_store.size, 1);
847err_map_backingstore:
848err_alloc_gpfifo:
849 gk20a_vm_put(ch->vm);
850err_commit_va:
851err_get_gk20a_channel:
852 release_firmware(img);
853 dev_err(&cde_ctx->pdev->dev, "cde: couldn't initialise buffer converter: %d",
854 err);
855 return err;
856}
857
858int gk20a_cde_reload(struct gk20a *g)
859{
860 struct gk20a_cde_app *cde_app = &g->cde_app;
861 struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx;
862 int err, i;
863
864 if (!cde_app->initialised) {
865 gk20a_busy(g->dev);
866 gk20a_init_cde_support(g);
867 gk20a_idle(g->dev);
868 if (!cde_app->initialised)
869 return -ENOSYS;
870 return 0;
871 }
872
873 gk20a_busy(g->dev);
874 mutex_lock(&cde_app->mutex);
875 for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) {
876 gk20a_cde_remove(cde_ctx);
877 err = gk20a_cde_load(cde_ctx);
878 }
879
880 cde_app->cde_ctx_ptr = 0;
881 mutex_unlock(&cde_app->mutex);
882
883 gk20a_idle(g->dev);
884 return err;
885}
886
887int gk20a_init_cde_support(struct gk20a *g)
888{
889 struct gk20a_cde_app *cde_app = &g->cde_app;
890 struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx;
891 int ret, i;
892
893 if (cde_app->initialised)
894 return 0;
895
896 mutex_init(&cde_app->mutex);
897 mutex_lock(&cde_app->mutex);
898
899 for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) {
900 cde_ctx->g = g;
901 cde_ctx->pdev = g->dev;
902 ret = gk20a_cde_load(cde_ctx);
903 if (ret)
904 goto err_init_instance;
905 }
906
907 /* take shadow to the vm for general usage */
908 cde_app->vm = cde_app->cde_ctx->vm;
909
910 cde_app->cde_ctx_ptr = 0;
911 cde_app->initialised = true;
912 mutex_unlock(&cde_app->mutex);
913
914 return 0;
915
916err_init_instance:
917
918 /* deinitialise initialised channels */
919 while (i--) {
920 gk20a_cde_remove(cde_ctx);
921 cde_ctx--;
922 }
923 return ret;
924}
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
new file mode 100644
index 00000000..784ae8b4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -0,0 +1,254 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#include "mm_gk20a.h"
23
24#define MAX_CDE_BUFS 10
25#define MAX_CDE_PARAMS 64
26#define MAX_CDE_USER_PARAMS 32
27#define MAX_CDE_OBJ_IDS 4
28
29struct dma_buf;
30struct gk20a;
31
32/*
33 * this element defines a buffer that is allocated and mapped into gpu address
34 * space. data_byte_offset defines the beginning of the buffer inside the
35 * firmare. num_bytes defines how many bytes the firmware contains.
36 *
37 * If data_byte_offset is zero, we allocate an empty buffer.
38 */
39
40struct gk20a_cde_hdr_buf {
41 u64 data_byte_offset;
42 u64 num_bytes;
43};
44
45/*
46 * this element defines a constant patching in buffers. It basically
47 * computes physical address to <source_buf>+source_byte_offset. The
48 * address is then modified into patch value as per:
49 * value = (current_value & ~mask) | (address << shift) & mask .
50 *
51 * The type field defines the register size as:
52 * 0=u32,
53 * 1=u64 (little endian),
54 * 2=u64 (big endian)
55 */
56
57struct gk20a_cde_hdr_replace {
58 u32 target_buf;
59 u32 source_buf;
60 s32 shift;
61 u32 type;
62 s64 target_byte_offset;
63 s64 source_byte_offset;
64 u64 mask;
65};
66
67enum {
68 TYPE_PARAM_TYPE_U32 = 0,
69 TYPE_PARAM_TYPE_U64_LITTLE,
70 TYPE_PARAM_TYPE_U64_BIG
71};
72
73/*
74 * this element defines a runtime patching in buffers. Parameters with id from
75 * 0 to 1024 are reserved for special usage as follows:
76 * 0 = comptags_per_cacheline,
77 * 1 = slices_per_fbp,
78 * 2 = num_fbps
79 * 3 = source buffer first page offset
80 * 4 = source buffer block height log2
81 * 5 = backing store memory address
82 * 6 = destination memory address
83 * 7 = destination size (bytes)
84 * 8 = backing store size (bytes)
85 * 9 = cache line size
86 *
87 * Parameters above id 1024 are user-specified. I.e. they determine where a
88 * parameters from user space should be placed in buffers, what is their
89 * type, etc.
90 *
91 * Once the value is available, we add data_offset to the value.
92 *
93 * The value address is then modified into patch value as per:
94 * value = (current_value & ~mask) | (address << shift) & mask .
95 *
96 * The type field defines the register size as:
97 * 0=u32,
98 * 1=u64 (little endian),
99 * 2=u64 (big endian)
100 */
101
102struct gk20a_cde_hdr_param {
103 u32 id;
104 u32 target_buf;
105 s32 shift;
106 u32 type;
107 s64 data_offset;
108 s64 target_byte_offset;
109 u64 mask;
110};
111
112enum {
113 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
114 TYPE_PARAM_GPU_CONFIGURATION,
115 TYPE_PARAM_FIRSTPAGEOFFSET,
116 TYPE_PARAM_NUMPAGES,
117 TYPE_PARAM_BACKINGSTORE,
118 TYPE_PARAM_DESTINATION,
119 TYPE_PARAM_DESTINATION_SIZE,
120 TYPE_PARAM_BACKINGSTORE_SIZE,
121 TYPE_PARAM_SOURCE_SMMU_ADDR,
122 NUM_RESERVED_PARAMS = 1024,
123};
124
125/*
126 * This header element defines a command. The op field determines whether the
127 * element is defining an init (0) or convert command (1). data_byte_offset
128 * denotes the beginning address of command elements in the file.
129 */
130
131struct gk20a_cde_hdr_command {
132 u32 op;
133 u32 num_entries;
134 u64 data_byte_offset;
135};
136
137enum {
138 TYPE_BUF_COMMAND_INIT = 0,
139 TYPE_BUF_COMMAND_CONVERT
140};
141
142/*
143 * This is a command element defines one entry inside push buffer. target_buf
144 * defines the buffer including the pushbuffer entries, target_byte_offset the
145 * offset inside the buffer and num_bytes the number of words in the buffer.
146 */
147
148struct gk20a_cde_cmd_elem {
149 u32 target_buf;
150 u32 padding;
151 u64 target_byte_offset;
152 u64 num_bytes;
153};
154
155/*
156 * Following defines a single header element. Each element has a type and
157 * some of the data structures.
158 */
159
160struct gk20a_cde_hdr_elem {
161 u32 type;
162 u32 padding;
163 union {
164 struct gk20a_cde_hdr_buf buf;
165 struct gk20a_cde_hdr_replace replace;
166 struct gk20a_cde_hdr_param param;
167 u32 required_class;
168 struct gk20a_cde_hdr_command command;
169 };
170};
171
172enum {
173 TYPE_BUF = 0,
174 TYPE_REPLACE,
175 TYPE_PARAM,
176 TYPE_REQUIRED_CLASS,
177 TYPE_COMMAND
178};
179
180struct gk20a_cde_mem_desc {
181 struct sg_table *sgt;
182 dma_addr_t iova;
183 void *cpuva;
184 size_t num_bytes;
185 u64 gpu_va;
186};
187
188struct gk20a_cde_param {
189 u32 id;
190 u32 padding;
191 u64 value;
192};
193
194struct gk20a_cde_ctx {
195 struct gk20a *g;
196 struct platform_device *pdev;
197
198 /* channel related data */
199 struct channel_gk20a *ch;
200 struct vm_gk20a *vm;
201
202 /* buf converter configuration */
203 struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS];
204 int num_bufs;
205
206 /* buffer patching params (where should patching be done) */
207 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
208 int num_params;
209
210 /* storage for user space parameter values */
211 u32 user_param_values[MAX_CDE_USER_PARAMS];
212
213 u64 src_smmu_addr;
214 u32 src_param_offset;
215 u32 src_param_lines;
216
217 u64 src_vaddr;
218
219 u64 dest_vaddr;
220 u64 dest_size;
221
222 u32 obj_ids[MAX_CDE_OBJ_IDS];
223 int num_obj_ids;
224
225 u64 backing_store_vaddr;
226
227 struct nvhost_gpfifo *init_cmd;
228 int init_cmd_num_entries;
229
230 struct nvhost_gpfifo *convert_cmd;
231 int convert_cmd_num_entries;
232
233 struct kobj_attribute attr;
234};
235
236struct gk20a_cde_app {
237 bool initialised;
238 struct mutex mutex;
239 struct vm_gk20a *vm;
240
241 struct gk20a_cde_ctx cde_ctx[1];
242 int cde_ctx_ptr;
243};
244
245int gk20a_cde_destroy(struct gk20a *g);
246int gk20a_init_cde_support(struct gk20a *g);
247int gk20a_cde_reload(struct gk20a *g);
248int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd,
249 s32 dst_kind, u64 dst_word_offset,
250 u32 dst_size, struct nvhost_fence *fence,
251 u32 __flags, struct gk20a_cde_param *params,
252 int num_params, struct gk20a_fence **fence_out);
253
254#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index fa6e0cce..2975798f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -976,6 +976,7 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
976 goto done; 976 goto done;
977 } 977 }
978 978
979
979 gk20a_channel_resume(g); 980 gk20a_channel_resume(g);
980 set_user_nice(current, nice_value); 981 set_user_nice(current, nice_value);
981 982
@@ -983,6 +984,8 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
983 984
984 trace_gk20a_finalize_poweron_done(dev_name(dev)); 985 trace_gk20a_finalize_poweron_done(dev_name(dev));
985 986
987 if (IS_ENABLED(CONFIG_GK20A_CDE))
988 gk20a_init_cde_support(g);
986done: 989done:
987 return err; 990 return err;
988} 991}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 05ed9270..b2ecade5 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -48,6 +48,7 @@ struct acr_gm20b;
48#include "therm_gk20a.h" 48#include "therm_gk20a.h"
49#include "platform_gk20a.h" 49#include "platform_gk20a.h"
50#include "gm20b/acr_gm20b.h" 50#include "gm20b/acr_gm20b.h"
51#include "cde_gk20a.h"
51 52
52extern struct platform_device tegra_gk20a_device; 53extern struct platform_device tegra_gk20a_device;
53 54
@@ -356,6 +357,8 @@ struct gk20a {
356 struct gk20a_scale_profile *scale_profile; 357 struct gk20a_scale_profile *scale_profile;
357 358
358 struct device_dma_parameters dma_parms; 359 struct device_dma_parameters dma_parms;
360
361 struct gk20a_cde_app cde_app;
359}; 362};
360 363
361static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) 364static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
@@ -422,6 +425,7 @@ enum gk20a_dbg_categories {
422 gpu_dbg_clk = BIT(7), /* gk20a clk */ 425 gpu_dbg_clk = BIT(7), /* gk20a clk */
423 gpu_dbg_map = BIT(8), /* mem mappings */ 426 gpu_dbg_map = BIT(8), /* mem mappings */
424 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ 427 gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */
428 gpu_dbg_cde = BIT(10), /* cde info messages */
425 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ 429 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
426}; 430};
427 431