summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-09-07 13:43:47 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-11 18:10:52 -0400
commitc37c9baae65bcf0ef08a319488c09f57131026cc (patch)
treee3b4252bfff7436574a909dd625de49229d538da /drivers/gpu/nvgpu/gk20a
parent17451138cf60f5d64eed88cc5defd44981926d9d (diff)
gpu: nvgpu: Move CDE code to Linux module
CDE is only used in Linux platforms, and the code is highly dependent on Linux APIs. Move the common CDE code to Linux module and leave only the chip specific parts to HAL. Change-Id: I507fe7eceaf7607303dfdddcf438449a5f582ea7 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554755 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c1669
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.h311
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h3
4 files changed, 1 insertions, 1985 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
deleted file mode 100644
index 506207f2..00000000
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ /dev/null
@@ -1,1669 +0,0 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/dma.h>
26#include <nvgpu/gmmu.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/nvgpu_common.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/log.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/firmware.h>
33
34#include "gk20a.h"
35#include "channel_gk20a.h"
36#include "mm_gk20a.h"
37#include "cde_gk20a.h"
38#include "fence_gk20a.h"
39#include "gr_gk20a.h"
40#include "common/linux/os_linux.h"
41
42#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
44
45/*
46 * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the
47 * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem.
48 */
49#include "common/linux/vm_priv.h"
50
51static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
52static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
53
54#define CTX_DELETE_TIME 1000
55
56#define MAX_CTX_USE_COUNT 42
57#define MAX_CTX_RETRY_TIME 2000
58
59static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
60{
61 unsigned int i;
62
63 for (i = 0; i < cde_ctx->num_bufs; i++) {
64 struct nvgpu_mem *mem = cde_ctx->mem + i;
65 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
66 }
67
68 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd);
69
70 cde_ctx->convert_cmd = NULL;
71 cde_ctx->init_convert_cmd = NULL;
72 cde_ctx->num_bufs = 0;
73 cde_ctx->num_params = 0;
74 cde_ctx->init_cmd_num_entries = 0;
75 cde_ctx->convert_cmd_num_entries = 0;
76 cde_ctx->init_cmd_executed = false;
77}
78
79static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
80__must_hold(&cde_app->mutex)
81{
82 struct gk20a *g = cde_ctx->g;
83 struct channel_gk20a *ch = cde_ctx->ch;
84 struct vm_gk20a *vm = ch->vm;
85
86 trace_gk20a_cde_remove_ctx(cde_ctx);
87
88 /* release mapped memory */
89 gk20a_deinit_cde_img(cde_ctx);
90 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
91 cde_ctx->backing_store_vaddr);
92
93 /* free the channel */
94 gk20a_channel_close(ch);
95
96 /* housekeeping on app */
97 nvgpu_list_del(&cde_ctx->list);
98 cde_ctx->g->cde_app.ctx_count--;
99 nvgpu_kfree(g, cde_ctx);
100}
101
102static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
103 bool wait_finish)
104__releases(&cde_app->mutex)
105__acquires(&cde_app->mutex)
106{
107 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
108
109 /* permanent contexts do not have deleter works */
110 if (!cde_ctx->is_temporary)
111 return;
112
113 if (wait_finish) {
114 nvgpu_mutex_release(&cde_app->mutex);
115 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
116 nvgpu_mutex_acquire(&cde_app->mutex);
117 } else {
118 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
119 }
120}
121
122static void gk20a_cde_remove_contexts(struct gk20a *g)
123__must_hold(&cde_app->mutex)
124{
125 struct gk20a_cde_app *cde_app = &g->cde_app;
126 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
127
128 /* safe to go off the mutex in cancel_deleter since app is
129 * deinitialised; no new jobs are started. deleter works may be only at
130 * waiting for the mutex or before, going to abort */
131
132 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
133 &cde_app->free_contexts, gk20a_cde_ctx, list) {
134 gk20a_cde_cancel_deleter(cde_ctx, true);
135 gk20a_cde_remove_ctx(cde_ctx);
136 }
137
138 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
139 &cde_app->used_contexts, gk20a_cde_ctx, list) {
140 gk20a_cde_cancel_deleter(cde_ctx, true);
141 gk20a_cde_remove_ctx(cde_ctx);
142 }
143}
144
145static void gk20a_cde_stop(struct gk20a *g)
146__must_hold(&cde_app->mutex)
147{
148 struct gk20a_cde_app *cde_app = &g->cde_app;
149
150 /* prevent further conversions and delayed works from working */
151 cde_app->initialised = false;
152 /* free all data, empty the list */
153 gk20a_cde_remove_contexts(g);
154}
155
156void gk20a_cde_destroy(struct gk20a *g)
157__acquires(&cde_app->mutex)
158__releases(&cde_app->mutex)
159{
160 struct gk20a_cde_app *cde_app = &g->cde_app;
161
162 if (!cde_app->initialised)
163 return;
164
165 nvgpu_mutex_acquire(&cde_app->mutex);
166 gk20a_cde_stop(g);
167 nvgpu_mutex_release(&cde_app->mutex);
168
169 nvgpu_mutex_destroy(&cde_app->mutex);
170}
171
172void gk20a_cde_suspend(struct gk20a *g)
173__acquires(&cde_app->mutex)
174__releases(&cde_app->mutex)
175{
176 struct gk20a_cde_app *cde_app = &g->cde_app;
177 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
178
179 if (!cde_app->initialised)
180 return;
181
182 nvgpu_mutex_acquire(&cde_app->mutex);
183
184 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
185 &cde_app->free_contexts, gk20a_cde_ctx, list) {
186 gk20a_cde_cancel_deleter(cde_ctx, false);
187 }
188
189 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
190 &cde_app->used_contexts, gk20a_cde_ctx, list) {
191 gk20a_cde_cancel_deleter(cde_ctx, false);
192 }
193
194 nvgpu_mutex_release(&cde_app->mutex);
195
196}
197
198static int gk20a_cde_create_context(struct gk20a *g)
199__must_hold(&cde_app->mutex)
200{
201 struct gk20a_cde_app *cde_app = &g->cde_app;
202 struct gk20a_cde_ctx *cde_ctx;
203
204 cde_ctx = gk20a_cde_allocate_context(g);
205 if (IS_ERR(cde_ctx))
206 return PTR_ERR(cde_ctx);
207
208 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
209 cde_app->ctx_count++;
210 if (cde_app->ctx_count > cde_app->ctx_count_top)
211 cde_app->ctx_count_top = cde_app->ctx_count;
212
213 return 0;
214}
215
216static int gk20a_cde_create_contexts(struct gk20a *g)
217__must_hold(&g->cde_app->mutex)
218{
219 int err;
220 int i;
221
222 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
223 err = gk20a_cde_create_context(g);
224 if (err)
225 goto out;
226 }
227
228 return 0;
229out:
230 gk20a_cde_remove_contexts(g);
231 return err;
232}
233
234static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
235 struct nvgpu_firmware *img,
236 struct gk20a_cde_hdr_buf *buf)
237{
238 struct nvgpu_mem *mem;
239 struct gk20a *g = cde_ctx->g;
240 int err;
241
242 /* check that the file can hold the buf */
243 if (buf->data_byte_offset != 0 &&
244 buf->data_byte_offset + buf->num_bytes > img->size) {
245 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
246 cde_ctx->num_bufs);
247 return -EINVAL;
248 }
249
250 /* check that we have enough buf elems available */
251 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
252 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
253 cde_ctx->num_bufs);
254 return -ENOMEM;
255 }
256
257 /* allocate buf */
258 mem = cde_ctx->mem + cde_ctx->num_bufs;
259 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
260 if (err) {
261 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
262 cde_ctx->num_bufs);
263 return -ENOMEM;
264 }
265
266 /* copy the content */
267 if (buf->data_byte_offset != 0)
268 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
269 buf->num_bytes);
270
271 cde_ctx->num_bufs++;
272
273 return 0;
274}
275
276static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
277 int type, s32 shift, u64 mask, u64 value)
278{
279 struct gk20a *g = cde_ctx->g;
280 u32 *target_mem_ptr = target;
281 u64 *target_mem_ptr_u64 = target;
282 u64 current_value, new_value;
283
284 value = (shift >= 0) ? value << shift : value >> -shift;
285 value &= mask;
286
287 /* read current data from the location */
288 current_value = 0;
289 if (type == TYPE_PARAM_TYPE_U32) {
290 if (mask != 0xfffffffful)
291 current_value = *target_mem_ptr;
292 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
293 if (mask != ~0ul)
294 current_value = *target_mem_ptr_u64;
295 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
296 current_value = *target_mem_ptr_u64;
297 current_value = (u64)(current_value >> 32) |
298 (u64)(current_value << 32);
299 } else {
300 nvgpu_warn(g, "cde: unknown type. type=%d",
301 type);
302 return -EINVAL;
303 }
304
305 current_value &= ~mask;
306 new_value = current_value | value;
307
308 /* store the element data back */
309 if (type == TYPE_PARAM_TYPE_U32)
310 *target_mem_ptr = (u32)new_value;
311 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
312 *target_mem_ptr_u64 = new_value;
313 else {
314 new_value = (u64)(new_value >> 32) |
315 (u64)(new_value << 32);
316 *target_mem_ptr_u64 = new_value;
317 }
318
319 return 0;
320}
321
322static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
323 struct nvgpu_firmware *img,
324 struct gk20a_cde_hdr_replace *replace)
325{
326 struct nvgpu_mem *source_mem;
327 struct nvgpu_mem *target_mem;
328 struct gk20a *g = cde_ctx->g;
329 u32 *target_mem_ptr;
330 u64 vaddr;
331 int err;
332
333 if (replace->target_buf >= cde_ctx->num_bufs ||
334 replace->source_buf >= cde_ctx->num_bufs) {
335 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
336 replace->target_buf, replace->source_buf,
337 cde_ctx->num_bufs);
338 return -EINVAL;
339 }
340
341 source_mem = cde_ctx->mem + replace->source_buf;
342 target_mem = cde_ctx->mem + replace->target_buf;
343 target_mem_ptr = target_mem->cpu_va;
344
345 if (source_mem->size < (replace->source_byte_offset + 3) ||
346 target_mem->size < (replace->target_byte_offset + 3)) {
347 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
348 replace->target_byte_offset,
349 replace->source_byte_offset,
350 source_mem->size,
351 target_mem->size);
352 return -EINVAL;
353 }
354
355 /* calculate the target pointer */
356 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
357
358 /* determine patch value */
359 vaddr = source_mem->gpu_va + replace->source_byte_offset;
360 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
361 replace->shift, replace->mask,
362 vaddr);
363 if (err) {
364 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
365 err, replace->target_buf,
366 replace->target_byte_offset,
367 replace->source_buf,
368 replace->source_byte_offset);
369 }
370
371 return err;
372}
373
374static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
375{
376 struct gk20a *g = cde_ctx->g;
377 struct nvgpu_mem *target_mem;
378 u32 *target_mem_ptr;
379 u64 new_data;
380 int user_id = 0, err;
381 unsigned int i;
382
383 for (i = 0; i < cde_ctx->num_params; i++) {
384 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
385 target_mem = cde_ctx->mem + param->target_buf;
386 target_mem_ptr = target_mem->cpu_va;
387 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
388
389 switch (param->id) {
390 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
391 new_data = g->gr.comptags_per_cacheline;
392 break;
393 case TYPE_PARAM_GPU_CONFIGURATION:
394 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
395 g->gr.cacheline_size;
396 break;
397 case TYPE_PARAM_FIRSTPAGEOFFSET:
398 new_data = cde_ctx->surf_param_offset;
399 break;
400 case TYPE_PARAM_NUMPAGES:
401 new_data = cde_ctx->surf_param_lines;
402 break;
403 case TYPE_PARAM_BACKINGSTORE:
404 new_data = cde_ctx->backing_store_vaddr;
405 break;
406 case TYPE_PARAM_DESTINATION:
407 new_data = cde_ctx->compbit_vaddr;
408 break;
409 case TYPE_PARAM_DESTINATION_SIZE:
410 new_data = cde_ctx->compbit_size;
411 break;
412 case TYPE_PARAM_BACKINGSTORE_SIZE:
413 new_data = g->gr.compbit_store.mem.size;
414 break;
415 case TYPE_PARAM_SOURCE_SMMU_ADDR:
416 new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm,
417 cde_ctx->surf_vaddr);
418 if (new_data == 0)
419 return -EINVAL;
420 break;
421 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
422 new_data = g->gr.compbit_store.base_hw;
423 break;
424 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
425 new_data = g->gr.gobs_per_comptagline_per_slice;
426 break;
427 case TYPE_PARAM_SCATTERBUFFER:
428 new_data = cde_ctx->scatterbuffer_vaddr;
429 break;
430 case TYPE_PARAM_SCATTERBUFFER_SIZE:
431 new_data = cde_ctx->scatterbuffer_size;
432 break;
433 default:
434 user_id = param->id - NUM_RESERVED_PARAMS;
435 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
436 continue;
437 new_data = cde_ctx->user_param_values[user_id];
438 }
439
440 gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
441 i, param->id, param->target_buf,
442 param->target_byte_offset, new_data,
443 param->data_offset, param->type, param->shift,
444 param->mask);
445
446 new_data += param->data_offset;
447
448 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
449 param->shift, param->mask, new_data);
450
451 if (err) {
452 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
453 err, i, param->id, param->target_buf,
454 param->target_byte_offset, new_data);
455 return err;
456 }
457 }
458
459 return 0;
460}
461
462static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
463 struct nvgpu_firmware *img,
464 struct gk20a_cde_hdr_param *param)
465{
466 struct nvgpu_mem *target_mem;
467 struct gk20a *g = cde_ctx->g;
468
469 if (param->target_buf >= cde_ctx->num_bufs) {
470 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
471 cde_ctx->num_params, param->target_buf,
472 cde_ctx->num_bufs);
473 return -EINVAL;
474 }
475
476 target_mem = cde_ctx->mem + param->target_buf;
477 if (target_mem->size < (param->target_byte_offset + 3)) {
478 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
479 cde_ctx->num_params, param->target_byte_offset,
480 target_mem->size);
481 return -EINVAL;
482 }
483
484 /* does this parameter fit into our parameter structure */
485 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
486 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
487 cde_ctx->num_params);
488 return -ENOMEM;
489 }
490
491 /* is the given id valid? */
492 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
493 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
494 param->id, cde_ctx->num_params,
495 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
496 return -EINVAL;
497 }
498
499 cde_ctx->params[cde_ctx->num_params] = *param;
500 cde_ctx->num_params++;
501
502 return 0;
503}
504
505static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
506 struct nvgpu_firmware *img,
507 u32 required_class)
508{
509 struct gk20a *g = cde_ctx->g;
510 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx;
511 int err;
512
513 alloc_obj_ctx.class_num = required_class;
514 alloc_obj_ctx.flags = 0;
515
516 /* CDE enabled */
517 cde_ctx->ch->cde = true;
518
519 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
520 if (err) {
521 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
522 err);
523 return err;
524 }
525
526 return 0;
527}
528
529static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
530 struct nvgpu_firmware *img,
531 u32 op,
532 struct gk20a_cde_cmd_elem *cmd_elem,
533 u32 num_elems)
534{
535 struct gk20a *g = cde_ctx->g;
536 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
537 u32 *num_entries;
538 unsigned int i;
539
540 /* check command type */
541 if (op == TYPE_BUF_COMMAND_INIT) {
542 gpfifo = &cde_ctx->init_convert_cmd;
543 num_entries = &cde_ctx->init_cmd_num_entries;
544 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
545 gpfifo = &cde_ctx->convert_cmd;
546 num_entries = &cde_ctx->convert_cmd_num_entries;
547 } else {
548 nvgpu_warn(g, "cde: unknown command. op=%u",
549 op);
550 return -EINVAL;
551 }
552
553 /* allocate gpfifo entries to be pushed */
554 *gpfifo = nvgpu_kzalloc(cde_ctx->g,
555 sizeof(struct nvgpu_gpfifo) * num_elems);
556 if (!*gpfifo) {
557 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
558 return -ENOMEM;
559 }
560
561 gpfifo_elem = *gpfifo;
562 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
563 struct nvgpu_mem *target_mem;
564
565 /* validate the current entry */
566 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
567 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
568 cmd_elem->target_buf, cde_ctx->num_bufs);
569 return -EINVAL;
570 }
571
572 target_mem = cde_ctx->mem + cmd_elem->target_buf;
573 if (target_mem->size<
574 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
575 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
576 target_mem->size,
577 cmd_elem->target_byte_offset,
578 cmd_elem->num_bytes);
579 return -EINVAL;
580 }
581
582 /* store the element into gpfifo */
583 gpfifo_elem->entry0 =
584 u64_lo32(target_mem->gpu_va +
585 cmd_elem->target_byte_offset);
586 gpfifo_elem->entry1 =
587 u64_hi32(target_mem->gpu_va +
588 cmd_elem->target_byte_offset) |
589 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
590 sizeof(u32));
591 }
592
593 *num_entries = num_elems;
594 return 0;
595}
596
597static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
598{
599 struct gk20a *g = cde_ctx->g;
600 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
601 sizeof(struct nvgpu_gpfifo);
602 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
603 sizeof(struct nvgpu_gpfifo);
604 unsigned long total_bytes = init_bytes + conv_bytes;
605 struct nvgpu_gpfifo *combined_cmd;
606
607 /* allocate buffer that has space for both */
608 combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes);
609 if (!combined_cmd) {
610 nvgpu_warn(g,
611 "cde: could not allocate memory for gpfifo entries");
612 return -ENOMEM;
613 }
614
615 /* move the original init here and append convert */
616 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
617 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
618 cde_ctx->convert_cmd, conv_bytes);
619
620 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd);
621 nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd);
622
623 cde_ctx->init_convert_cmd = combined_cmd;
624 cde_ctx->convert_cmd = combined_cmd
625 + cde_ctx->init_cmd_num_entries;
626
627 return 0;
628}
629
630static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
631 struct nvgpu_firmware *img)
632{
633 struct gk20a *g = cde_ctx->g;
634 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
635 u32 *data = (u32 *)img->data;
636 u32 num_of_elems;
637 struct gk20a_cde_hdr_elem *elem;
638 u32 min_size = 0;
639 int err = 0;
640 unsigned int i;
641
642 min_size += 2 * sizeof(u32);
643 if (img->size < min_size) {
644 nvgpu_warn(g, "cde: invalid image header");
645 return -EINVAL;
646 }
647
648 cde_app->firmware_version = data[0];
649 num_of_elems = data[1];
650
651 min_size += num_of_elems * sizeof(*elem);
652 if (img->size < min_size) {
653 nvgpu_warn(g, "cde: bad image");
654 return -EINVAL;
655 }
656
657 elem = (struct gk20a_cde_hdr_elem *)&data[2];
658 for (i = 0; i < num_of_elems; i++) {
659 int err = 0;
660 switch (elem->type) {
661 case TYPE_BUF:
662 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
663 break;
664 case TYPE_REPLACE:
665 err = gk20a_init_cde_replace(cde_ctx, img,
666 &elem->replace);
667 break;
668 case TYPE_PARAM:
669 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
670 break;
671 case TYPE_REQUIRED_CLASS:
672 err = gk20a_init_cde_required_class(cde_ctx, img,
673 elem->required_class);
674 break;
675 case TYPE_COMMAND:
676 {
677 struct gk20a_cde_cmd_elem *cmd = (void *)
678 &img->data[elem->command.data_byte_offset];
679 err = gk20a_init_cde_command(cde_ctx, img,
680 elem->command.op, cmd,
681 elem->command.num_entries);
682 break;
683 }
684 case TYPE_ARRAY:
685 memcpy(&cde_app->arrays[elem->array.id][0],
686 elem->array.data,
687 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
688 break;
689 default:
690 nvgpu_warn(g, "cde: unknown header element");
691 err = -EINVAL;
692 }
693
694 if (err)
695 goto deinit_image;
696
697 elem++;
698 }
699
700 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
701 nvgpu_warn(g, "cde: convert command not defined");
702 err = -EINVAL;
703 goto deinit_image;
704 }
705
706 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
707 nvgpu_warn(g, "cde: convert command not defined");
708 err = -EINVAL;
709 goto deinit_image;
710 }
711
712 err = gk20a_cde_pack_cmdbufs(cde_ctx);
713 if (err)
714 goto deinit_image;
715
716 return 0;
717
718deinit_image:
719 gk20a_deinit_cde_img(cde_ctx);
720 return err;
721}
722
723static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
724 u32 op, struct nvgpu_fence *fence,
725 u32 flags, struct gk20a_fence **fence_out)
726{
727 struct gk20a *g = cde_ctx->g;
728 struct nvgpu_gpfifo *gpfifo = NULL;
729 int num_entries = 0;
730
731 /* check command type */
732 if (op == TYPE_BUF_COMMAND_INIT) {
733 /* both init and convert combined */
734 gpfifo = cde_ctx->init_convert_cmd;
735 num_entries = cde_ctx->init_cmd_num_entries
736 + cde_ctx->convert_cmd_num_entries;
737 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
738 gpfifo = cde_ctx->convert_cmd;
739 num_entries = cde_ctx->convert_cmd_num_entries;
740 } else {
741 nvgpu_warn(g, "cde: unknown buffer");
742 return -EINVAL;
743 }
744
745 if (gpfifo == NULL || num_entries == 0) {
746 nvgpu_warn(g, "cde: buffer not available");
747 return -ENOSYS;
748 }
749
750 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
751 num_entries, flags, fence, fence_out, true,
752 NULL);
753}
754
755static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
756__acquires(&cde_app->mutex)
757__releases(&cde_app->mutex)
758{
759 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
760
761 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
762 trace_gk20a_cde_release(cde_ctx);
763
764 nvgpu_mutex_acquire(&cde_app->mutex);
765
766 if (cde_ctx->in_use) {
767 cde_ctx->in_use = false;
768 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
769 cde_app->ctx_usecount--;
770 } else {
771 gk20a_dbg_info("double release cde context %p", cde_ctx);
772 }
773
774 nvgpu_mutex_release(&cde_app->mutex);
775}
776
777static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
778__acquires(&cde_app->mutex)
779__releases(&cde_app->mutex)
780{
781 struct delayed_work *delay_work = to_delayed_work(work);
782 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
783 struct gk20a_cde_ctx, ctx_deleter_work);
784 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
785 struct gk20a *g = cde_ctx->g;
786 int err;
787
788 /* someone has just taken it? engine deletion started? */
789 if (cde_ctx->in_use || !cde_app->initialised)
790 return;
791
792 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
793 "cde: attempting to delete temporary %p", cde_ctx);
794
795 err = gk20a_busy(g);
796 if (err) {
797 /* this context would find new use anyway later, so not freeing
798 * here does not leak anything */
799 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
800 " temp ctx deletion");
801 return;
802 }
803
804 nvgpu_mutex_acquire(&cde_app->mutex);
805 if (cde_ctx->in_use || !cde_app->initialised) {
806 gk20a_dbg(gpu_dbg_cde_ctx,
807 "cde: context use raced, not deleting %p",
808 cde_ctx);
809 goto out;
810 }
811
812 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
813 "double pending %p", cde_ctx);
814
815 gk20a_cde_remove_ctx(cde_ctx);
816 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
817 "cde: destroyed %p count=%d use=%d max=%d",
818 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
819 cde_app->ctx_count_top);
820
821out:
822 nvgpu_mutex_release(&cde_app->mutex);
823 gk20a_idle(g);
824}
825
826static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g)
827__must_hold(&cde_app->mutex)
828{
829 struct gk20a_cde_app *cde_app = &g->cde_app;
830 struct gk20a_cde_ctx *cde_ctx;
831
832 /* exhausted? */
833
834 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
835 return ERR_PTR(-EAGAIN);
836
837 /* idle context available? */
838
839 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
840 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
841 gk20a_cde_ctx, list);
842 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
843 "cde: got free %p count=%d use=%d max=%d",
844 cde_ctx, cde_app->ctx_count,
845 cde_app->ctx_usecount,
846 cde_app->ctx_count_top);
847 trace_gk20a_cde_get_context(cde_ctx);
848
849 /* deleter work may be scheduled, but in_use prevents it */
850 cde_ctx->in_use = true;
851 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
852 cde_app->ctx_usecount++;
853
854 /* cancel any deletions now that ctx is in use */
855 gk20a_cde_cancel_deleter(cde_ctx, true);
856 return cde_ctx;
857 }
858
859 /* no free contexts, get a temporary one */
860
861 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
862 "cde: no free contexts, count=%d",
863 cde_app->ctx_count);
864
865 cde_ctx = gk20a_cde_allocate_context(g);
866 if (IS_ERR(cde_ctx)) {
867 nvgpu_warn(g, "cde: cannot allocate context: %ld",
868 PTR_ERR(cde_ctx));
869 return cde_ctx;
870 }
871
872 trace_gk20a_cde_get_context(cde_ctx);
873 cde_ctx->in_use = true;
874 cde_ctx->is_temporary = true;
875 cde_app->ctx_usecount++;
876 cde_app->ctx_count++;
877 if (cde_app->ctx_count > cde_app->ctx_count_top)
878 cde_app->ctx_count_top = cde_app->ctx_count;
879 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
880
881 return cde_ctx;
882}
883
884static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g)
885__releases(&cde_app->mutex)
886__acquires(&cde_app->mutex)
887{
888 struct gk20a_cde_app *cde_app = &g->cde_app;
889 struct gk20a_cde_ctx *cde_ctx = NULL;
890 struct nvgpu_timeout timeout;
891
892 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
893 NVGPU_TIMER_CPU_TIMER);
894
895 do {
896 cde_ctx = gk20a_cde_do_get_context(g);
897 if (PTR_ERR(cde_ctx) != -EAGAIN)
898 break;
899
900 /* exhausted, retry */
901 nvgpu_mutex_release(&cde_app->mutex);
902 cond_resched();
903 nvgpu_mutex_acquire(&cde_app->mutex);
904 } while (!nvgpu_timeout_expired(&timeout));
905
906 return cde_ctx;
907}
908
909static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
910{
911 struct gk20a_cde_ctx *cde_ctx;
912 int ret;
913
914 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
915 if (!cde_ctx)
916 return ERR_PTR(-ENOMEM);
917
918 cde_ctx->g = g;
919 cde_ctx->dev = dev_from_gk20a(g);
920
921 ret = gk20a_cde_load(cde_ctx);
922 if (ret) {
923 nvgpu_kfree(g, cde_ctx);
924 return ERR_PTR(ret);
925 }
926
927 nvgpu_init_list_node(&cde_ctx->list);
928 cde_ctx->is_temporary = false;
929 cde_ctx->in_use = false;
930 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
931 gk20a_cde_ctx_deleter_fn);
932
933 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
934 trace_gk20a_cde_allocate_context(cde_ctx);
935 return cde_ctx;
936}
937
938int gk20a_cde_convert(struct gk20a *g,
939 struct dma_buf *compbits_scatter_buf,
940 u64 compbits_byte_offset,
941 u64 scatterbuffer_byte_offset,
942 struct nvgpu_fence *fence,
943 u32 __flags, struct gk20a_cde_param *params,
944 int num_params, struct gk20a_fence **fence_out)
945__acquires(&cde_app->mutex)
946__releases(&cde_app->mutex)
947{
948 struct gk20a_cde_ctx *cde_ctx = NULL;
949 struct gk20a_comptags comptags;
950 u64 mapped_compbits_offset = 0;
951 u64 compbits_size = 0;
952 u64 mapped_scatterbuffer_offset = 0;
953 u64 scatterbuffer_size = 0;
954 u64 map_vaddr = 0;
955 u64 map_offset = 0;
956 u64 map_size = 0;
957 u8 *surface = NULL;
958 u64 big_page_mask = 0;
959 u32 flags;
960 int err, i;
961 const s32 compbits_kind = 0;
962
963 gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
964 compbits_byte_offset, scatterbuffer_byte_offset);
965
966 /* scatter buffer must be after compbits buffer */
967 if (scatterbuffer_byte_offset &&
968 scatterbuffer_byte_offset < compbits_byte_offset)
969 return -EINVAL;
970
971 err = gk20a_busy(g);
972 if (err)
973 return err;
974
975 nvgpu_mutex_acquire(&g->cde_app.mutex);
976 cde_ctx = gk20a_cde_get_context(g);
977 nvgpu_mutex_release(&g->cde_app.mutex);
978 if (IS_ERR(cde_ctx)) {
979 err = PTR_ERR(cde_ctx);
980 goto exit_idle;
981 }
982
983 /* First, map the buffer to local va */
984
985 /* ensure that the compbits buffer has drvdata */
986 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
987 dev_from_gk20a(g));
988 if (err)
989 goto exit_idle;
990
991 /* compbits don't start at page aligned offset, so we need to align
992 the region to be mapped */
993 big_page_mask = cde_ctx->vm->big_page_size - 1;
994 map_offset = compbits_byte_offset & ~big_page_mask;
995 map_size = compbits_scatter_buf->size - map_offset;
996
997
998 /* compute compbit start offset from the beginning of the mapped
999 area */
1000 mapped_compbits_offset = compbits_byte_offset - map_offset;
1001 if (scatterbuffer_byte_offset) {
1002 compbits_size = scatterbuffer_byte_offset -
1003 compbits_byte_offset;
1004 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1005 map_offset;
1006 scatterbuffer_size = compbits_scatter_buf->size -
1007 scatterbuffer_byte_offset;
1008 } else {
1009 compbits_size = compbits_scatter_buf->size -
1010 compbits_byte_offset;
1011 }
1012
1013 gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1014 map_offset, map_size);
1015 gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1016 mapped_compbits_offset, compbits_size);
1017 gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1018 mapped_scatterbuffer_offset, scatterbuffer_size);
1019
1020
1021 /* map the destination buffer */
1022 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
1023 map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
1024 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1025 compbits_kind, true,
1026 gk20a_mem_flag_none,
1027 map_offset, map_size,
1028 NULL);
1029 if (!map_vaddr) {
1030 dma_buf_put(compbits_scatter_buf);
1031 err = -EINVAL;
1032 goto exit_idle;
1033 }
1034
1035 if (scatterbuffer_byte_offset &&
1036 g->ops.cde.need_scatter_buffer &&
1037 g->ops.cde.need_scatter_buffer(g)) {
1038 struct sg_table *sgt;
1039 void *scatter_buffer;
1040
1041 surface = dma_buf_vmap(compbits_scatter_buf);
1042 if (IS_ERR(surface)) {
1043 nvgpu_warn(g,
1044 "dma_buf_vmap failed");
1045 err = -EINVAL;
1046 goto exit_unmap_vaddr;
1047 }
1048
1049 scatter_buffer = surface + scatterbuffer_byte_offset;
1050
1051 gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1052 surface, scatter_buffer);
1053 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf);
1054 if (IS_ERR(sgt)) {
1055 nvgpu_warn(g,
1056 "mm_pin failed");
1057 err = -EINVAL;
1058 goto exit_unmap_surface;
1059 } else {
1060 err = g->ops.cde.populate_scatter_buffer(g, sgt,
1061 compbits_byte_offset, scatter_buffer,
1062 scatterbuffer_size);
1063 WARN_ON(err);
1064
1065 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1066 sgt);
1067 if (err)
1068 goto exit_unmap_surface;
1069 }
1070
1071 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1072 dma_buf_vunmap(compbits_scatter_buf, surface);
1073 surface = NULL;
1074 }
1075
1076 /* store source buffer compression tags */
1077 gk20a_get_comptags(dev_from_gk20a(g), compbits_scatter_buf, &comptags);
1078 cde_ctx->surf_param_offset = comptags.offset;
1079 cde_ctx->surf_param_lines = comptags.lines;
1080
1081 /* store surface vaddr. This is actually compbit vaddr, but since
1082 compbits live in the same surface, and we can get the alloc base
1083 address by using gk20a_mm_gpuva_to_iova_base, this will do */
1084 cde_ctx->surf_vaddr = map_vaddr;
1085
1086 /* store information about destination */
1087 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1088 cde_ctx->compbit_size = compbits_size;
1089
1090 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1091 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1092
1093 /* remove existing argument data */
1094 memset(cde_ctx->user_param_values, 0,
1095 sizeof(cde_ctx->user_param_values));
1096
1097 /* read user space arguments for the conversion */
1098 for (i = 0; i < num_params; i++) {
1099 struct gk20a_cde_param *param = params + i;
1100 int id = param->id - NUM_RESERVED_PARAMS;
1101
1102 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1103 nvgpu_warn(g, "cde: unknown user parameter");
1104 err = -EINVAL;
1105 goto exit_unmap_surface;
1106 }
1107 cde_ctx->user_param_values[id] = param->value;
1108 }
1109
1110 /* patch data */
1111 err = gk20a_cde_patch_params(cde_ctx);
1112 if (err) {
1113 nvgpu_warn(g, "cde: failed to patch parameters");
1114 goto exit_unmap_surface;
1115 }
1116
1117 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1118 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1119 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1120 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1121 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1122 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1123
1124 /* take always the postfence as it is needed for protecting the
1125 * cde context */
1126 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
1127
1128 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1129 gk20a_idle(g);
1130
1131 /* execute the conversion buffer, combined with init first if it's the
1132 * first time */
1133 err = gk20a_cde_execute_buffer(cde_ctx,
1134 cde_ctx->init_cmd_executed
1135 ? TYPE_BUF_COMMAND_CONVERT
1136 : TYPE_BUF_COMMAND_INIT,
1137 fence, flags, fence_out);
1138
1139 cde_ctx->init_cmd_executed = true;
1140
1141 /* unmap the buffers - channel holds references to them now */
1142 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1143
1144 return err;
1145
1146exit_unmap_surface:
1147 if (surface)
1148 dma_buf_vunmap(compbits_scatter_buf, surface);
1149exit_unmap_vaddr:
1150 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1151exit_idle:
1152 gk20a_idle(g);
1153 return err;
1154}
1155
1156static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1157__acquires(&cde_app->mutex)
1158__releases(&cde_app->mutex)
1159{
1160 struct gk20a_cde_ctx *cde_ctx = data;
1161 struct gk20a *g = cde_ctx->g;
1162 struct gk20a_cde_app *cde_app = &g->cde_app;
1163 bool channel_idle;
1164
1165 channel_gk20a_joblist_lock(ch);
1166 channel_idle = channel_gk20a_joblist_is_empty(ch);
1167 channel_gk20a_joblist_unlock(ch);
1168
1169 if (!channel_idle)
1170 return;
1171
1172 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1173 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1174 if (!cde_ctx->in_use)
1175 gk20a_dbg_info("double finish cde context %p on channel %p",
1176 cde_ctx, ch);
1177
1178 if (ch->has_timedout) {
1179 if (cde_ctx->is_temporary) {
1180 nvgpu_warn(g,
1181 "cde: channel had timed out"
1182 " (temporary channel)");
1183 /* going to be deleted anyway */
1184 } else {
1185 nvgpu_warn(g,
1186 "cde: channel had timed out"
1187 ", reloading");
1188 /* mark it to be deleted, replace with a new one */
1189 nvgpu_mutex_acquire(&cde_app->mutex);
1190 cde_ctx->is_temporary = true;
1191 if (gk20a_cde_create_context(g)) {
1192 nvgpu_err(g, "cde: can't replace context");
1193 }
1194 nvgpu_mutex_release(&cde_app->mutex);
1195 }
1196 }
1197
1198 /* delete temporary contexts later (watch for doubles) */
1199 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1200 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1201 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1202 msecs_to_jiffies(CTX_DELETE_TIME));
1203 }
1204
1205 if (!ch->has_timedout)
1206 gk20a_cde_ctx_release(cde_ctx);
1207}
1208
1209static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1210{
1211 struct gk20a *g = cde_ctx->g;
1212 struct nvgpu_firmware *img;
1213 struct channel_gk20a *ch;
1214 struct gr_gk20a *gr = &g->gr;
1215 int err = 0;
1216 u64 vaddr;
1217
1218 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1219 if (!img) {
1220 nvgpu_err(g, "cde: could not fetch the firmware");
1221 return -ENOSYS;
1222 }
1223
1224 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1225 cde_ctx,
1226 -1,
1227 false);
1228 if (!ch) {
1229 nvgpu_warn(g, "cde: gk20a channel not available");
1230 err = -ENOMEM;
1231 goto err_get_gk20a_channel;
1232 }
1233
1234 /* bind the channel to the vm */
1235 err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch);
1236 if (err) {
1237 nvgpu_warn(g, "cde: could not bind vm");
1238 goto err_commit_va;
1239 }
1240
1241 /* allocate gpfifo (1024 should be more than enough) */
1242 err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0);
1243 if (err) {
1244 nvgpu_warn(g, "cde: unable to allocate gpfifo");
1245 goto err_alloc_gpfifo;
1246 }
1247
1248 /* map backing store to gpu virtual space */
1249 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1250 g->gr.compbit_store.mem.size,
1251 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1252 gk20a_mem_flag_read_only,
1253 false,
1254 gr->compbit_store.mem.aperture);
1255
1256 if (!vaddr) {
1257 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1258 err = -ENOMEM;
1259 goto err_map_backingstore;
1260 }
1261
1262 /* store initialisation data */
1263 cde_ctx->ch = ch;
1264 cde_ctx->vm = ch->vm;
1265 cde_ctx->backing_store_vaddr = vaddr;
1266
1267 /* initialise the firmware */
1268 err = gk20a_init_cde_img(cde_ctx, img);
1269 if (err) {
1270 nvgpu_warn(g, "cde: image initialisation failed");
1271 goto err_init_cde_img;
1272 }
1273
1274 /* initialisation done */
1275 nvgpu_release_firmware(g, img);
1276
1277 return 0;
1278
1279err_init_cde_img:
1280 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1281err_map_backingstore:
1282err_alloc_gpfifo:
1283 nvgpu_vm_put(ch->vm);
1284err_commit_va:
1285err_get_gk20a_channel:
1286 nvgpu_release_firmware(g, img);
1287 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1288 return err;
1289}
1290
1291int gk20a_cde_reload(struct gk20a *g)
1292__acquires(&cde_app->mutex)
1293__releases(&cde_app->mutex)
1294{
1295 struct gk20a_cde_app *cde_app = &g->cde_app;
1296 int err;
1297
1298 if (!cde_app->initialised)
1299 return -ENOSYS;
1300
1301 err = gk20a_busy(g);
1302 if (err)
1303 return err;
1304
1305 nvgpu_mutex_acquire(&cde_app->mutex);
1306
1307 gk20a_cde_stop(g);
1308
1309 err = gk20a_cde_create_contexts(g);
1310 if (!err)
1311 cde_app->initialised = true;
1312
1313 nvgpu_mutex_release(&cde_app->mutex);
1314
1315 gk20a_idle(g);
1316 return err;
1317}
1318
1319int gk20a_init_cde_support(struct gk20a *g)
1320__acquires(&cde_app->mutex)
1321__releases(&cde_app->mutex)
1322{
1323 struct gk20a_cde_app *cde_app = &g->cde_app;
1324 int err;
1325
1326 if (cde_app->initialised)
1327 return 0;
1328
1329 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1330
1331 err = nvgpu_mutex_init(&cde_app->mutex);
1332 if (err)
1333 return err;
1334
1335 nvgpu_mutex_acquire(&cde_app->mutex);
1336
1337 nvgpu_init_list_node(&cde_app->free_contexts);
1338 nvgpu_init_list_node(&cde_app->used_contexts);
1339 cde_app->ctx_count = 0;
1340 cde_app->ctx_count_top = 0;
1341 cde_app->ctx_usecount = 0;
1342
1343 err = gk20a_cde_create_contexts(g);
1344 if (!err)
1345 cde_app->initialised = true;
1346
1347 nvgpu_mutex_release(&cde_app->mutex);
1348 gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1349
1350 if (err)
1351 nvgpu_mutex_destroy(&cde_app->mutex);
1352
1353 return err;
1354}
1355
1356enum cde_launch_patch_id {
1357 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1358 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1359 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1360 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1361 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1362 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1363 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1364 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1365 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1366 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1367 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1368 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1369 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1370 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1371 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1372 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1373 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1374 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1375 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1376 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1377 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1378 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1379 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1380 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1381 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1382 PATCH_H_LAUNCH_WORD1_ID = 1049,
1383 PATCH_H_LAUNCH_WORD2_ID = 1050,
1384 PATCH_V_LAUNCH_WORD1_ID = 1051,
1385 PATCH_V_LAUNCH_WORD2_ID = 1052,
1386 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1387 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1388 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1389 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1390};
1391
1392/* maximum number of WRITE_PATCHes in the below function */
1393#define MAX_CDE_LAUNCH_PATCHES 32
1394
1395static int gk20a_buffer_convert_gpu_to_cde_v1(
1396 struct gk20a *g,
1397 struct dma_buf *dmabuf, u32 consumer,
1398 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1399 u64 scatterbuffer_offset,
1400 u32 width, u32 height, u32 block_height_log2,
1401 u32 submit_flags, struct nvgpu_fence *fence_in,
1402 struct gk20a_buffer_state *state)
1403{
1404 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1405 int param = 0;
1406 int err = 0;
1407 struct gk20a_fence *new_fence = NULL;
1408 const int wgx = 8;
1409 const int wgy = 8;
1410 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1411 const int xalign = compbits_per_byte * wgx;
1412 const int yalign = wgy;
1413
1414 /* Compute per launch parameters */
1415 const int xtiles = (width + 7) >> 3;
1416 const int ytiles = (height + 7) >> 3;
1417 const int gridw_h = roundup(xtiles, xalign) / xalign;
1418 const int gridh_h = roundup(ytiles, yalign) / yalign;
1419 const int gridw_v = roundup(ytiles, xalign) / xalign;
1420 const int gridh_v = roundup(xtiles, yalign) / yalign;
1421 const int xblocks = (xtiles + 1) >> 1;
1422 const int voffset = compbits_voffset - compbits_hoffset;
1423
1424 int hprog = -1;
1425 int vprog = -1;
1426
1427 if (g->ops.cde.get_program_numbers)
1428 g->ops.cde.get_program_numbers(g, block_height_log2,
1429 &hprog, &vprog);
1430 else {
1431 nvgpu_warn(g, "cde: chip not supported");
1432 return -ENOSYS;
1433 }
1434
1435 if (hprog < 0 || vprog < 0) {
1436 nvgpu_warn(g, "cde: could not determine programs");
1437 return -ENOSYS;
1438 }
1439
1440 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1441 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1442 xtiles, ytiles);
1443
1444 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1445 width, height, block_height_log2,
1446 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1447 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1448 width, height, xtiles, ytiles);
1449 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1450 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1451 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1452 hprog,
1453 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1454 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1455 vprog,
1456 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1457 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1458
1459 /* Write parameters */
1460#define WRITE_PATCH(NAME, VALUE) \
1461 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1462 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1463 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1464 block_height_log2);
1465 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1466 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1467 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1468 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1469 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1470
1471 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1472 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1473 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1474 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1475 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1476 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1477
1478 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1479 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1480 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1481 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1482 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1483 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1484
1485 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1486 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1487 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1488 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1489 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1490 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1491 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1492 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1493
1494 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1495 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1496 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1497 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1498 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1499 } else {
1500 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1501 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1502 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1503 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1504 }
1505
1506 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1507 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1508 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1509 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1510 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1511 } else {
1512 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1513 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1514 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1515 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1516 }
1517#undef WRITE_PATCH
1518
1519 err = gk20a_cde_convert(g, dmabuf,
1520 compbits_hoffset,
1521 scatterbuffer_offset,
1522 fence_in, submit_flags,
1523 params, param, &new_fence);
1524 if (err)
1525 goto out;
1526
1527 /* compbits generated, update state & fence */
1528 gk20a_fence_put(state->fence);
1529 state->fence = new_fence;
1530 state->valid_compbits |= consumer &
1531 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1532out:
1533 return err;
1534}
1535
1536static int gk20a_buffer_convert_gpu_to_cde(
1537 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer,
1538 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1539 u64 scatterbuffer_offset,
1540 u32 width, u32 height, u32 block_height_log2,
1541 u32 submit_flags, struct nvgpu_fence *fence_in,
1542 struct gk20a_buffer_state *state)
1543{
1544 int err = 0;
1545
1546 if (!g->cde_app.initialised)
1547 return -ENOSYS;
1548
1549 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1550 g->cde_app.firmware_version);
1551
1552 if (g->cde_app.firmware_version == 1) {
1553 err = gk20a_buffer_convert_gpu_to_cde_v1(
1554 g, dmabuf, consumer, offset, compbits_hoffset,
1555 compbits_voffset, scatterbuffer_offset,
1556 width, height, block_height_log2,
1557 submit_flags, fence_in, state);
1558 } else {
1559 nvgpu_err(g, "unsupported CDE firmware version %d",
1560 g->cde_app.firmware_version);
1561 err = -EINVAL;
1562 }
1563
1564 return err;
1565}
1566
1567int gk20a_prepare_compressible_read(
1568 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
1569 u64 compbits_hoffset, u64 compbits_voffset,
1570 u64 scatterbuffer_offset,
1571 u32 width, u32 height, u32 block_height_log2,
1572 u32 submit_flags, struct nvgpu_fence *fence,
1573 u32 *valid_compbits, u32 *zbc_color,
1574 struct gk20a_fence **fence_out)
1575{
1576 int err = 0;
1577 struct gk20a_buffer_state *state;
1578 struct dma_buf *dmabuf;
1579 u32 missing_bits;
1580
1581 dmabuf = dma_buf_get(buffer_fd);
1582 if (IS_ERR(dmabuf))
1583 return -EINVAL;
1584
1585 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1586 if (err) {
1587 dma_buf_put(dmabuf);
1588 return err;
1589 }
1590
1591 missing_bits = (state->valid_compbits ^ request) & request;
1592
1593 nvgpu_mutex_acquire(&state->lock);
1594
1595 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1596
1597 gk20a_fence_put(state->fence);
1598 state->fence = NULL;
1599 /* state->fence = decompress();
1600 state->valid_compbits = 0; */
1601 err = -EINVAL;
1602 goto out;
1603 } else if (missing_bits) {
1604 u32 missing_cde_bits = missing_bits &
1605 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1606 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1607 missing_cde_bits) {
1608 err = gk20a_buffer_convert_gpu_to_cde(
1609 g, dmabuf,
1610 missing_cde_bits,
1611 offset, compbits_hoffset,
1612 compbits_voffset, scatterbuffer_offset,
1613 width, height, block_height_log2,
1614 submit_flags, fence,
1615 state);
1616 if (err)
1617 goto out;
1618 }
1619 }
1620
1621 if (state->fence && fence_out)
1622 *fence_out = gk20a_fence_get(state->fence);
1623
1624 if (valid_compbits)
1625 *valid_compbits = state->valid_compbits;
1626
1627 if (zbc_color)
1628 *zbc_color = state->zbc_color;
1629
1630out:
1631 nvgpu_mutex_release(&state->lock);
1632 dma_buf_put(dmabuf);
1633 return err;
1634}
1635
1636int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1637 u32 valid_compbits, u64 offset, u32 zbc_color)
1638{
1639 int err;
1640 struct gk20a_buffer_state *state;
1641 struct dma_buf *dmabuf;
1642
1643 dmabuf = dma_buf_get(buffer_fd);
1644 if (IS_ERR(dmabuf)) {
1645 nvgpu_err(g, "invalid dmabuf");
1646 return -EINVAL;
1647 }
1648
1649 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1650 if (err) {
1651 nvgpu_err(g, "could not get state from dmabuf");
1652 dma_buf_put(dmabuf);
1653 return err;
1654 }
1655
1656 nvgpu_mutex_acquire(&state->lock);
1657
1658 /* Update the compbits state. */
1659 state->valid_compbits = valid_compbits;
1660 state->zbc_color = zbc_color;
1661
1662 /* Discard previous compbit job fence. */
1663 gk20a_fence_put(state->fence);
1664 state->fence = NULL;
1665
1666 nvgpu_mutex_release(&state->lock);
1667 dma_buf_put(dmabuf);
1668 return 0;
1669}
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
deleted file mode 100644
index 4f400bf3..00000000
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ /dev/null
@@ -1,311 +0,0 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#include "mm_gk20a.h"
23
24#define MAX_CDE_BUFS 10
25#define MAX_CDE_PARAMS 64
26#define MAX_CDE_USER_PARAMS 40
27#define MAX_CDE_ARRAY_ENTRIES 9
28
29/*
30 * The size of the context ring buffer that is dedicated for handling cde
31 * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
32 * wait on the previous job to that channel, so increasing this value
33 * reduces the likelihood of stalls.
34 */
35#define NUM_CDE_CONTEXTS 4
36
37struct dma_buf;
38struct gk20a;
39
40/*
41 * this element defines a buffer that is allocated and mapped into gpu address
42 * space. data_byte_offset defines the beginning of the buffer inside the
43 * firmare. num_bytes defines how many bytes the firmware contains.
44 *
45 * If data_byte_offset is zero, we allocate an empty buffer.
46 */
47
48struct gk20a_cde_hdr_buf {
49 u64 data_byte_offset;
50 u64 num_bytes;
51};
52
53/*
54 * this element defines a constant patching in buffers. It basically
55 * computes physical address to <source_buf>+source_byte_offset. The
56 * address is then modified into patch value as per:
57 * value = (current_value & ~mask) | (address << shift) & mask .
58 *
59 * The type field defines the register size as:
60 * 0=u32,
61 * 1=u64 (little endian),
62 * 2=u64 (big endian)
63 */
64
65struct gk20a_cde_hdr_replace {
66 u32 target_buf;
67 u32 source_buf;
68 s32 shift;
69 u32 type;
70 u64 target_byte_offset;
71 u64 source_byte_offset;
72 u64 mask;
73};
74
75enum {
76 TYPE_PARAM_TYPE_U32 = 0,
77 TYPE_PARAM_TYPE_U64_LITTLE,
78 TYPE_PARAM_TYPE_U64_BIG
79};
80
81/*
82 * this element defines a runtime patching in buffers. Parameters with id from
83 * 0 to 1024 are reserved for special usage as follows:
84 * 0 = comptags_per_cacheline,
85 * 1 = slices_per_fbp,
86 * 2 = num_fbps
87 * 3 = source buffer first page offset
88 * 4 = source buffer block height log2
89 * 5 = backing store memory address
90 * 6 = destination memory address
91 * 7 = destination size (bytes)
92 * 8 = backing store size (bytes)
93 * 9 = cache line size
94 *
95 * Parameters above id 1024 are user-specified. I.e. they determine where a
96 * parameters from user space should be placed in buffers, what is their
97 * type, etc.
98 *
99 * Once the value is available, we add data_offset to the value.
100 *
101 * The value address is then modified into patch value as per:
102 * value = (current_value & ~mask) | (address << shift) & mask .
103 *
104 * The type field defines the register size as:
105 * 0=u32,
106 * 1=u64 (little endian),
107 * 2=u64 (big endian)
108 */
109
110struct gk20a_cde_hdr_param {
111 u32 id;
112 u32 target_buf;
113 s32 shift;
114 u32 type;
115 s64 data_offset;
116 u64 target_byte_offset;
117 u64 mask;
118};
119
120enum {
121 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
122 TYPE_PARAM_GPU_CONFIGURATION,
123 TYPE_PARAM_FIRSTPAGEOFFSET,
124 TYPE_PARAM_NUMPAGES,
125 TYPE_PARAM_BACKINGSTORE,
126 TYPE_PARAM_DESTINATION,
127 TYPE_PARAM_DESTINATION_SIZE,
128 TYPE_PARAM_BACKINGSTORE_SIZE,
129 TYPE_PARAM_SOURCE_SMMU_ADDR,
130 TYPE_PARAM_BACKINGSTORE_BASE_HW,
131 TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
132 TYPE_PARAM_SCATTERBUFFER,
133 TYPE_PARAM_SCATTERBUFFER_SIZE,
134 NUM_RESERVED_PARAMS = 1024,
135};
136
137/*
138 * This header element defines a command. The op field determines whether the
139 * element is defining an init (0) or convert command (1). data_byte_offset
140 * denotes the beginning address of command elements in the file.
141 */
142
143struct gk20a_cde_hdr_command {
144 u32 op;
145 u32 num_entries;
146 u64 data_byte_offset;
147};
148
149enum {
150 TYPE_BUF_COMMAND_INIT = 0,
151 TYPE_BUF_COMMAND_CONVERT
152};
153
154/*
155 * This is a command element defines one entry inside push buffer. target_buf
156 * defines the buffer including the pushbuffer entries, target_byte_offset the
157 * offset inside the buffer and num_bytes the number of words in the buffer.
158 */
159
160struct gk20a_cde_cmd_elem {
161 u32 target_buf;
162 u32 padding;
163 u64 target_byte_offset;
164 u64 num_bytes;
165};
166
167/*
168 * This element is used for storing a small array of data.
169 */
170
171enum {
172 ARRAY_PROGRAM_OFFSET = 0,
173 ARRAY_REGISTER_COUNT,
174 ARRAY_LAUNCH_COMMAND,
175 NUM_CDE_ARRAYS
176};
177
178struct gk20a_cde_hdr_array {
179 u32 id;
180 u32 data[MAX_CDE_ARRAY_ENTRIES];
181};
182
183/*
184 * Following defines a single header element. Each element has a type and
185 * some of the data structures.
186 */
187
188struct gk20a_cde_hdr_elem {
189 u32 type;
190 u32 padding;
191 union {
192 struct gk20a_cde_hdr_buf buf;
193 struct gk20a_cde_hdr_replace replace;
194 struct gk20a_cde_hdr_param param;
195 u32 required_class;
196 struct gk20a_cde_hdr_command command;
197 struct gk20a_cde_hdr_array array;
198 };
199};
200
201enum {
202 TYPE_BUF = 0,
203 TYPE_REPLACE,
204 TYPE_PARAM,
205 TYPE_REQUIRED_CLASS,
206 TYPE_COMMAND,
207 TYPE_ARRAY
208};
209
210struct gk20a_cde_param {
211 u32 id;
212 u32 padding;
213 u64 value;
214};
215
216struct gk20a_cde_ctx {
217 struct gk20a *g;
218 struct device *dev;
219
220 /* channel related data */
221 struct channel_gk20a *ch;
222 struct vm_gk20a *vm;
223
224 /* buf converter configuration */
225 struct nvgpu_mem mem[MAX_CDE_BUFS];
226 unsigned int num_bufs;
227
228 /* buffer patching params (where should patching be done) */
229 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
230 unsigned int num_params;
231
232 /* storage for user space parameter values */
233 u32 user_param_values[MAX_CDE_USER_PARAMS];
234
235 u32 surf_param_offset;
236 u32 surf_param_lines;
237 u64 surf_vaddr;
238
239 u64 compbit_vaddr;
240 u64 compbit_size;
241
242 u64 scatterbuffer_vaddr;
243 u64 scatterbuffer_size;
244
245 u64 backing_store_vaddr;
246
247 struct nvgpu_gpfifo *init_convert_cmd;
248 int init_cmd_num_entries;
249
250 struct nvgpu_gpfifo *convert_cmd;
251 int convert_cmd_num_entries;
252
253 struct kobj_attribute attr;
254
255 bool init_cmd_executed;
256
257 struct nvgpu_list_node list;
258 bool is_temporary;
259 bool in_use;
260 struct delayed_work ctx_deleter_work;
261};
262
263static inline struct gk20a_cde_ctx *
264gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
265{
266 return (struct gk20a_cde_ctx *)
267 ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
268};
269
270struct gk20a_cde_app {
271 bool initialised;
272 struct nvgpu_mutex mutex;
273
274 struct nvgpu_list_node free_contexts;
275 struct nvgpu_list_node used_contexts;
276 unsigned int ctx_count;
277 unsigned int ctx_usecount;
278 unsigned int ctx_count_top;
279
280 u32 firmware_version;
281
282 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
283
284 u32 shader_parameter;
285};
286
287void gk20a_cde_destroy(struct gk20a *g);
288void gk20a_cde_suspend(struct gk20a *g);
289int gk20a_init_cde_support(struct gk20a *g);
290int gk20a_cde_reload(struct gk20a *g);
291int gk20a_cde_convert(struct gk20a *g,
292 struct dma_buf *compbits_buf,
293 u64 compbits_byte_offset,
294 u64 scatterbuffer_byte_offset,
295 struct nvgpu_fence *fence,
296 u32 __flags, struct gk20a_cde_param *params,
297 int num_params, struct gk20a_fence **fence_out);
298
299int gk20a_prepare_compressible_read(
300 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
301 u64 compbits_hoffset, u64 compbits_voffset,
302 u64 scatterbuffer_offset,
303 u32 width, u32 height, u32 block_height_log2,
304 u32 submit_flags, struct nvgpu_fence *fence,
305 u32 *valid_compbits, u32 *zbc_color,
306 struct gk20a_fence **fence_out);
307int gk20a_mark_compressible_write(
308 struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
309 u32 zbc_color);
310
311#endif
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 0cd314d6..63ea5bc4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -117,9 +117,6 @@ int gk20a_prepare_poweroff(struct gk20a *g)
117 if (gk20a_fifo_is_engine_busy(g)) 117 if (gk20a_fifo_is_engine_busy(g))
118 return -EBUSY; 118 return -EBUSY;
119 119
120 /* cancel any pending cde work */
121 gk20a_cde_suspend(g);
122
123 gk20a_ce_suspend(g); 120 gk20a_ce_suspend(g);
124 121
125 ret = gk20a_channel_suspend(g); 122 ret = gk20a_channel_suspend(g);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index ab715bdc..69cb2253 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -60,7 +60,6 @@ struct nvgpu_cpu_time_correlation_sample;
60#include "pmu_gk20a.h" 60#include "pmu_gk20a.h"
61#include "priv_ring_gk20a.h" 61#include "priv_ring_gk20a.h"
62#include "therm_gk20a.h" 62#include "therm_gk20a.h"
63#include "cde_gk20a.h"
64#include "sched_gk20a.h" 63#include "sched_gk20a.h"
65#ifdef CONFIG_ARCH_TEGRA_18x_SOC 64#ifdef CONFIG_ARCH_TEGRA_18x_SOC
66#include "clk/clk.h" 65#include "clk/clk.h"
@@ -928,6 +927,7 @@ struct gpu_ops {
928 struct { 927 struct {
929 void (*get_program_numbers)(struct gk20a *g, 928 void (*get_program_numbers)(struct gk20a *g,
930 u32 block_height_log2, 929 u32 block_height_log2,
930 u32 shader_parameter,
931 int *hprog, int *vprog); 931 int *hprog, int *vprog);
932 bool (*need_scatter_buffer)(struct gk20a *g); 932 bool (*need_scatter_buffer)(struct gk20a *g);
933 int (*populate_scatter_buffer)(struct gk20a *g, 933 int (*populate_scatter_buffer)(struct gk20a *g,
@@ -1217,7 +1217,6 @@ struct gk20a {
1217 1217
1218 struct gk20a_sched_ctrl sched_ctrl; 1218 struct gk20a_sched_ctrl sched_ctrl;
1219 1219
1220 struct gk20a_cde_app cde_app;
1221 bool mmu_debug_ctrl; 1220 bool mmu_debug_ctrl;
1222 1221
1223 u32 tpc_fs_mask_user; 1222 u32 tpc_fs_mask_user;