summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/cde_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/cde_gk20a.c1669
1 files changed, 0 insertions, 1669 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
deleted file mode 100644
index 506207f2..00000000
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ /dev/null
@@ -1,1669 +0,0 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/dma.h>
26#include <nvgpu/gmmu.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/nvgpu_common.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/log.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/firmware.h>
33
34#include "gk20a.h"
35#include "channel_gk20a.h"
36#include "mm_gk20a.h"
37#include "cde_gk20a.h"
38#include "fence_gk20a.h"
39#include "gr_gk20a.h"
40#include "common/linux/os_linux.h"
41
42#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
44
45/*
46 * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the
47 * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem.
48 */
49#include "common/linux/vm_priv.h"
50
51static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
52static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g);
53
54#define CTX_DELETE_TIME 1000
55
56#define MAX_CTX_USE_COUNT 42
57#define MAX_CTX_RETRY_TIME 2000
58
59static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
60{
61 unsigned int i;
62
63 for (i = 0; i < cde_ctx->num_bufs; i++) {
64 struct nvgpu_mem *mem = cde_ctx->mem + i;
65 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
66 }
67
68 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd);
69
70 cde_ctx->convert_cmd = NULL;
71 cde_ctx->init_convert_cmd = NULL;
72 cde_ctx->num_bufs = 0;
73 cde_ctx->num_params = 0;
74 cde_ctx->init_cmd_num_entries = 0;
75 cde_ctx->convert_cmd_num_entries = 0;
76 cde_ctx->init_cmd_executed = false;
77}
78
79static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
80__must_hold(&cde_app->mutex)
81{
82 struct gk20a *g = cde_ctx->g;
83 struct channel_gk20a *ch = cde_ctx->ch;
84 struct vm_gk20a *vm = ch->vm;
85
86 trace_gk20a_cde_remove_ctx(cde_ctx);
87
88 /* release mapped memory */
89 gk20a_deinit_cde_img(cde_ctx);
90 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
91 cde_ctx->backing_store_vaddr);
92
93 /* free the channel */
94 gk20a_channel_close(ch);
95
96 /* housekeeping on app */
97 nvgpu_list_del(&cde_ctx->list);
98 cde_ctx->g->cde_app.ctx_count--;
99 nvgpu_kfree(g, cde_ctx);
100}
101
102static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
103 bool wait_finish)
104__releases(&cde_app->mutex)
105__acquires(&cde_app->mutex)
106{
107 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
108
109 /* permanent contexts do not have deleter works */
110 if (!cde_ctx->is_temporary)
111 return;
112
113 if (wait_finish) {
114 nvgpu_mutex_release(&cde_app->mutex);
115 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
116 nvgpu_mutex_acquire(&cde_app->mutex);
117 } else {
118 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
119 }
120}
121
122static void gk20a_cde_remove_contexts(struct gk20a *g)
123__must_hold(&cde_app->mutex)
124{
125 struct gk20a_cde_app *cde_app = &g->cde_app;
126 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
127
128 /* safe to go off the mutex in cancel_deleter since app is
129 * deinitialised; no new jobs are started. deleter works may be only at
130 * waiting for the mutex or before, going to abort */
131
132 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
133 &cde_app->free_contexts, gk20a_cde_ctx, list) {
134 gk20a_cde_cancel_deleter(cde_ctx, true);
135 gk20a_cde_remove_ctx(cde_ctx);
136 }
137
138 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
139 &cde_app->used_contexts, gk20a_cde_ctx, list) {
140 gk20a_cde_cancel_deleter(cde_ctx, true);
141 gk20a_cde_remove_ctx(cde_ctx);
142 }
143}
144
145static void gk20a_cde_stop(struct gk20a *g)
146__must_hold(&cde_app->mutex)
147{
148 struct gk20a_cde_app *cde_app = &g->cde_app;
149
150 /* prevent further conversions and delayed works from working */
151 cde_app->initialised = false;
152 /* free all data, empty the list */
153 gk20a_cde_remove_contexts(g);
154}
155
156void gk20a_cde_destroy(struct gk20a *g)
157__acquires(&cde_app->mutex)
158__releases(&cde_app->mutex)
159{
160 struct gk20a_cde_app *cde_app = &g->cde_app;
161
162 if (!cde_app->initialised)
163 return;
164
165 nvgpu_mutex_acquire(&cde_app->mutex);
166 gk20a_cde_stop(g);
167 nvgpu_mutex_release(&cde_app->mutex);
168
169 nvgpu_mutex_destroy(&cde_app->mutex);
170}
171
172void gk20a_cde_suspend(struct gk20a *g)
173__acquires(&cde_app->mutex)
174__releases(&cde_app->mutex)
175{
176 struct gk20a_cde_app *cde_app = &g->cde_app;
177 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
178
179 if (!cde_app->initialised)
180 return;
181
182 nvgpu_mutex_acquire(&cde_app->mutex);
183
184 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
185 &cde_app->free_contexts, gk20a_cde_ctx, list) {
186 gk20a_cde_cancel_deleter(cde_ctx, false);
187 }
188
189 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
190 &cde_app->used_contexts, gk20a_cde_ctx, list) {
191 gk20a_cde_cancel_deleter(cde_ctx, false);
192 }
193
194 nvgpu_mutex_release(&cde_app->mutex);
195
196}
197
198static int gk20a_cde_create_context(struct gk20a *g)
199__must_hold(&cde_app->mutex)
200{
201 struct gk20a_cde_app *cde_app = &g->cde_app;
202 struct gk20a_cde_ctx *cde_ctx;
203
204 cde_ctx = gk20a_cde_allocate_context(g);
205 if (IS_ERR(cde_ctx))
206 return PTR_ERR(cde_ctx);
207
208 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
209 cde_app->ctx_count++;
210 if (cde_app->ctx_count > cde_app->ctx_count_top)
211 cde_app->ctx_count_top = cde_app->ctx_count;
212
213 return 0;
214}
215
216static int gk20a_cde_create_contexts(struct gk20a *g)
217__must_hold(&g->cde_app->mutex)
218{
219 int err;
220 int i;
221
222 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
223 err = gk20a_cde_create_context(g);
224 if (err)
225 goto out;
226 }
227
228 return 0;
229out:
230 gk20a_cde_remove_contexts(g);
231 return err;
232}
233
234static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
235 struct nvgpu_firmware *img,
236 struct gk20a_cde_hdr_buf *buf)
237{
238 struct nvgpu_mem *mem;
239 struct gk20a *g = cde_ctx->g;
240 int err;
241
242 /* check that the file can hold the buf */
243 if (buf->data_byte_offset != 0 &&
244 buf->data_byte_offset + buf->num_bytes > img->size) {
245 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
246 cde_ctx->num_bufs);
247 return -EINVAL;
248 }
249
250 /* check that we have enough buf elems available */
251 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
252 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
253 cde_ctx->num_bufs);
254 return -ENOMEM;
255 }
256
257 /* allocate buf */
258 mem = cde_ctx->mem + cde_ctx->num_bufs;
259 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
260 if (err) {
261 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
262 cde_ctx->num_bufs);
263 return -ENOMEM;
264 }
265
266 /* copy the content */
267 if (buf->data_byte_offset != 0)
268 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
269 buf->num_bytes);
270
271 cde_ctx->num_bufs++;
272
273 return 0;
274}
275
276static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
277 int type, s32 shift, u64 mask, u64 value)
278{
279 struct gk20a *g = cde_ctx->g;
280 u32 *target_mem_ptr = target;
281 u64 *target_mem_ptr_u64 = target;
282 u64 current_value, new_value;
283
284 value = (shift >= 0) ? value << shift : value >> -shift;
285 value &= mask;
286
287 /* read current data from the location */
288 current_value = 0;
289 if (type == TYPE_PARAM_TYPE_U32) {
290 if (mask != 0xfffffffful)
291 current_value = *target_mem_ptr;
292 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
293 if (mask != ~0ul)
294 current_value = *target_mem_ptr_u64;
295 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
296 current_value = *target_mem_ptr_u64;
297 current_value = (u64)(current_value >> 32) |
298 (u64)(current_value << 32);
299 } else {
300 nvgpu_warn(g, "cde: unknown type. type=%d",
301 type);
302 return -EINVAL;
303 }
304
305 current_value &= ~mask;
306 new_value = current_value | value;
307
308 /* store the element data back */
309 if (type == TYPE_PARAM_TYPE_U32)
310 *target_mem_ptr = (u32)new_value;
311 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
312 *target_mem_ptr_u64 = new_value;
313 else {
314 new_value = (u64)(new_value >> 32) |
315 (u64)(new_value << 32);
316 *target_mem_ptr_u64 = new_value;
317 }
318
319 return 0;
320}
321
322static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
323 struct nvgpu_firmware *img,
324 struct gk20a_cde_hdr_replace *replace)
325{
326 struct nvgpu_mem *source_mem;
327 struct nvgpu_mem *target_mem;
328 struct gk20a *g = cde_ctx->g;
329 u32 *target_mem_ptr;
330 u64 vaddr;
331 int err;
332
333 if (replace->target_buf >= cde_ctx->num_bufs ||
334 replace->source_buf >= cde_ctx->num_bufs) {
335 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
336 replace->target_buf, replace->source_buf,
337 cde_ctx->num_bufs);
338 return -EINVAL;
339 }
340
341 source_mem = cde_ctx->mem + replace->source_buf;
342 target_mem = cde_ctx->mem + replace->target_buf;
343 target_mem_ptr = target_mem->cpu_va;
344
345 if (source_mem->size < (replace->source_byte_offset + 3) ||
346 target_mem->size < (replace->target_byte_offset + 3)) {
347 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
348 replace->target_byte_offset,
349 replace->source_byte_offset,
350 source_mem->size,
351 target_mem->size);
352 return -EINVAL;
353 }
354
355 /* calculate the target pointer */
356 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
357
358 /* determine patch value */
359 vaddr = source_mem->gpu_va + replace->source_byte_offset;
360 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
361 replace->shift, replace->mask,
362 vaddr);
363 if (err) {
364 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
365 err, replace->target_buf,
366 replace->target_byte_offset,
367 replace->source_buf,
368 replace->source_byte_offset);
369 }
370
371 return err;
372}
373
374static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
375{
376 struct gk20a *g = cde_ctx->g;
377 struct nvgpu_mem *target_mem;
378 u32 *target_mem_ptr;
379 u64 new_data;
380 int user_id = 0, err;
381 unsigned int i;
382
383 for (i = 0; i < cde_ctx->num_params; i++) {
384 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
385 target_mem = cde_ctx->mem + param->target_buf;
386 target_mem_ptr = target_mem->cpu_va;
387 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
388
389 switch (param->id) {
390 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
391 new_data = g->gr.comptags_per_cacheline;
392 break;
393 case TYPE_PARAM_GPU_CONFIGURATION:
394 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
395 g->gr.cacheline_size;
396 break;
397 case TYPE_PARAM_FIRSTPAGEOFFSET:
398 new_data = cde_ctx->surf_param_offset;
399 break;
400 case TYPE_PARAM_NUMPAGES:
401 new_data = cde_ctx->surf_param_lines;
402 break;
403 case TYPE_PARAM_BACKINGSTORE:
404 new_data = cde_ctx->backing_store_vaddr;
405 break;
406 case TYPE_PARAM_DESTINATION:
407 new_data = cde_ctx->compbit_vaddr;
408 break;
409 case TYPE_PARAM_DESTINATION_SIZE:
410 new_data = cde_ctx->compbit_size;
411 break;
412 case TYPE_PARAM_BACKINGSTORE_SIZE:
413 new_data = g->gr.compbit_store.mem.size;
414 break;
415 case TYPE_PARAM_SOURCE_SMMU_ADDR:
416 new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm,
417 cde_ctx->surf_vaddr);
418 if (new_data == 0)
419 return -EINVAL;
420 break;
421 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
422 new_data = g->gr.compbit_store.base_hw;
423 break;
424 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
425 new_data = g->gr.gobs_per_comptagline_per_slice;
426 break;
427 case TYPE_PARAM_SCATTERBUFFER:
428 new_data = cde_ctx->scatterbuffer_vaddr;
429 break;
430 case TYPE_PARAM_SCATTERBUFFER_SIZE:
431 new_data = cde_ctx->scatterbuffer_size;
432 break;
433 default:
434 user_id = param->id - NUM_RESERVED_PARAMS;
435 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
436 continue;
437 new_data = cde_ctx->user_param_values[user_id];
438 }
439
440 gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
441 i, param->id, param->target_buf,
442 param->target_byte_offset, new_data,
443 param->data_offset, param->type, param->shift,
444 param->mask);
445
446 new_data += param->data_offset;
447
448 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
449 param->shift, param->mask, new_data);
450
451 if (err) {
452 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
453 err, i, param->id, param->target_buf,
454 param->target_byte_offset, new_data);
455 return err;
456 }
457 }
458
459 return 0;
460}
461
462static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
463 struct nvgpu_firmware *img,
464 struct gk20a_cde_hdr_param *param)
465{
466 struct nvgpu_mem *target_mem;
467 struct gk20a *g = cde_ctx->g;
468
469 if (param->target_buf >= cde_ctx->num_bufs) {
470 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
471 cde_ctx->num_params, param->target_buf,
472 cde_ctx->num_bufs);
473 return -EINVAL;
474 }
475
476 target_mem = cde_ctx->mem + param->target_buf;
477 if (target_mem->size < (param->target_byte_offset + 3)) {
478 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
479 cde_ctx->num_params, param->target_byte_offset,
480 target_mem->size);
481 return -EINVAL;
482 }
483
484 /* does this parameter fit into our parameter structure */
485 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
486 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
487 cde_ctx->num_params);
488 return -ENOMEM;
489 }
490
491 /* is the given id valid? */
492 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
493 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
494 param->id, cde_ctx->num_params,
495 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
496 return -EINVAL;
497 }
498
499 cde_ctx->params[cde_ctx->num_params] = *param;
500 cde_ctx->num_params++;
501
502 return 0;
503}
504
505static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
506 struct nvgpu_firmware *img,
507 u32 required_class)
508{
509 struct gk20a *g = cde_ctx->g;
510 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx;
511 int err;
512
513 alloc_obj_ctx.class_num = required_class;
514 alloc_obj_ctx.flags = 0;
515
516 /* CDE enabled */
517 cde_ctx->ch->cde = true;
518
519 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
520 if (err) {
521 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
522 err);
523 return err;
524 }
525
526 return 0;
527}
528
529static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
530 struct nvgpu_firmware *img,
531 u32 op,
532 struct gk20a_cde_cmd_elem *cmd_elem,
533 u32 num_elems)
534{
535 struct gk20a *g = cde_ctx->g;
536 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
537 u32 *num_entries;
538 unsigned int i;
539
540 /* check command type */
541 if (op == TYPE_BUF_COMMAND_INIT) {
542 gpfifo = &cde_ctx->init_convert_cmd;
543 num_entries = &cde_ctx->init_cmd_num_entries;
544 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
545 gpfifo = &cde_ctx->convert_cmd;
546 num_entries = &cde_ctx->convert_cmd_num_entries;
547 } else {
548 nvgpu_warn(g, "cde: unknown command. op=%u",
549 op);
550 return -EINVAL;
551 }
552
553 /* allocate gpfifo entries to be pushed */
554 *gpfifo = nvgpu_kzalloc(cde_ctx->g,
555 sizeof(struct nvgpu_gpfifo) * num_elems);
556 if (!*gpfifo) {
557 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
558 return -ENOMEM;
559 }
560
561 gpfifo_elem = *gpfifo;
562 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
563 struct nvgpu_mem *target_mem;
564
565 /* validate the current entry */
566 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
567 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
568 cmd_elem->target_buf, cde_ctx->num_bufs);
569 return -EINVAL;
570 }
571
572 target_mem = cde_ctx->mem + cmd_elem->target_buf;
573 if (target_mem->size<
574 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
575 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
576 target_mem->size,
577 cmd_elem->target_byte_offset,
578 cmd_elem->num_bytes);
579 return -EINVAL;
580 }
581
582 /* store the element into gpfifo */
583 gpfifo_elem->entry0 =
584 u64_lo32(target_mem->gpu_va +
585 cmd_elem->target_byte_offset);
586 gpfifo_elem->entry1 =
587 u64_hi32(target_mem->gpu_va +
588 cmd_elem->target_byte_offset) |
589 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
590 sizeof(u32));
591 }
592
593 *num_entries = num_elems;
594 return 0;
595}
596
597static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
598{
599 struct gk20a *g = cde_ctx->g;
600 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
601 sizeof(struct nvgpu_gpfifo);
602 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
603 sizeof(struct nvgpu_gpfifo);
604 unsigned long total_bytes = init_bytes + conv_bytes;
605 struct nvgpu_gpfifo *combined_cmd;
606
607 /* allocate buffer that has space for both */
608 combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes);
609 if (!combined_cmd) {
610 nvgpu_warn(g,
611 "cde: could not allocate memory for gpfifo entries");
612 return -ENOMEM;
613 }
614
615 /* move the original init here and append convert */
616 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
617 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
618 cde_ctx->convert_cmd, conv_bytes);
619
620 nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd);
621 nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd);
622
623 cde_ctx->init_convert_cmd = combined_cmd;
624 cde_ctx->convert_cmd = combined_cmd
625 + cde_ctx->init_cmd_num_entries;
626
627 return 0;
628}
629
630static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
631 struct nvgpu_firmware *img)
632{
633 struct gk20a *g = cde_ctx->g;
634 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
635 u32 *data = (u32 *)img->data;
636 u32 num_of_elems;
637 struct gk20a_cde_hdr_elem *elem;
638 u32 min_size = 0;
639 int err = 0;
640 unsigned int i;
641
642 min_size += 2 * sizeof(u32);
643 if (img->size < min_size) {
644 nvgpu_warn(g, "cde: invalid image header");
645 return -EINVAL;
646 }
647
648 cde_app->firmware_version = data[0];
649 num_of_elems = data[1];
650
651 min_size += num_of_elems * sizeof(*elem);
652 if (img->size < min_size) {
653 nvgpu_warn(g, "cde: bad image");
654 return -EINVAL;
655 }
656
657 elem = (struct gk20a_cde_hdr_elem *)&data[2];
658 for (i = 0; i < num_of_elems; i++) {
659 int err = 0;
660 switch (elem->type) {
661 case TYPE_BUF:
662 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
663 break;
664 case TYPE_REPLACE:
665 err = gk20a_init_cde_replace(cde_ctx, img,
666 &elem->replace);
667 break;
668 case TYPE_PARAM:
669 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
670 break;
671 case TYPE_REQUIRED_CLASS:
672 err = gk20a_init_cde_required_class(cde_ctx, img,
673 elem->required_class);
674 break;
675 case TYPE_COMMAND:
676 {
677 struct gk20a_cde_cmd_elem *cmd = (void *)
678 &img->data[elem->command.data_byte_offset];
679 err = gk20a_init_cde_command(cde_ctx, img,
680 elem->command.op, cmd,
681 elem->command.num_entries);
682 break;
683 }
684 case TYPE_ARRAY:
685 memcpy(&cde_app->arrays[elem->array.id][0],
686 elem->array.data,
687 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
688 break;
689 default:
690 nvgpu_warn(g, "cde: unknown header element");
691 err = -EINVAL;
692 }
693
694 if (err)
695 goto deinit_image;
696
697 elem++;
698 }
699
700 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
701 nvgpu_warn(g, "cde: convert command not defined");
702 err = -EINVAL;
703 goto deinit_image;
704 }
705
706 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
707 nvgpu_warn(g, "cde: convert command not defined");
708 err = -EINVAL;
709 goto deinit_image;
710 }
711
712 err = gk20a_cde_pack_cmdbufs(cde_ctx);
713 if (err)
714 goto deinit_image;
715
716 return 0;
717
718deinit_image:
719 gk20a_deinit_cde_img(cde_ctx);
720 return err;
721}
722
723static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
724 u32 op, struct nvgpu_fence *fence,
725 u32 flags, struct gk20a_fence **fence_out)
726{
727 struct gk20a *g = cde_ctx->g;
728 struct nvgpu_gpfifo *gpfifo = NULL;
729 int num_entries = 0;
730
731 /* check command type */
732 if (op == TYPE_BUF_COMMAND_INIT) {
733 /* both init and convert combined */
734 gpfifo = cde_ctx->init_convert_cmd;
735 num_entries = cde_ctx->init_cmd_num_entries
736 + cde_ctx->convert_cmd_num_entries;
737 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
738 gpfifo = cde_ctx->convert_cmd;
739 num_entries = cde_ctx->convert_cmd_num_entries;
740 } else {
741 nvgpu_warn(g, "cde: unknown buffer");
742 return -EINVAL;
743 }
744
745 if (gpfifo == NULL || num_entries == 0) {
746 nvgpu_warn(g, "cde: buffer not available");
747 return -ENOSYS;
748 }
749
750 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
751 num_entries, flags, fence, fence_out, true,
752 NULL);
753}
754
755static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
756__acquires(&cde_app->mutex)
757__releases(&cde_app->mutex)
758{
759 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
760
761 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
762 trace_gk20a_cde_release(cde_ctx);
763
764 nvgpu_mutex_acquire(&cde_app->mutex);
765
766 if (cde_ctx->in_use) {
767 cde_ctx->in_use = false;
768 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
769 cde_app->ctx_usecount--;
770 } else {
771 gk20a_dbg_info("double release cde context %p", cde_ctx);
772 }
773
774 nvgpu_mutex_release(&cde_app->mutex);
775}
776
777static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
778__acquires(&cde_app->mutex)
779__releases(&cde_app->mutex)
780{
781 struct delayed_work *delay_work = to_delayed_work(work);
782 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
783 struct gk20a_cde_ctx, ctx_deleter_work);
784 struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app;
785 struct gk20a *g = cde_ctx->g;
786 int err;
787
788 /* someone has just taken it? engine deletion started? */
789 if (cde_ctx->in_use || !cde_app->initialised)
790 return;
791
792 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
793 "cde: attempting to delete temporary %p", cde_ctx);
794
795 err = gk20a_busy(g);
796 if (err) {
797 /* this context would find new use anyway later, so not freeing
798 * here does not leak anything */
799 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
800 " temp ctx deletion");
801 return;
802 }
803
804 nvgpu_mutex_acquire(&cde_app->mutex);
805 if (cde_ctx->in_use || !cde_app->initialised) {
806 gk20a_dbg(gpu_dbg_cde_ctx,
807 "cde: context use raced, not deleting %p",
808 cde_ctx);
809 goto out;
810 }
811
812 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
813 "double pending %p", cde_ctx);
814
815 gk20a_cde_remove_ctx(cde_ctx);
816 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
817 "cde: destroyed %p count=%d use=%d max=%d",
818 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
819 cde_app->ctx_count_top);
820
821out:
822 nvgpu_mutex_release(&cde_app->mutex);
823 gk20a_idle(g);
824}
825
826static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g)
827__must_hold(&cde_app->mutex)
828{
829 struct gk20a_cde_app *cde_app = &g->cde_app;
830 struct gk20a_cde_ctx *cde_ctx;
831
832 /* exhausted? */
833
834 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
835 return ERR_PTR(-EAGAIN);
836
837 /* idle context available? */
838
839 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
840 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
841 gk20a_cde_ctx, list);
842 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
843 "cde: got free %p count=%d use=%d max=%d",
844 cde_ctx, cde_app->ctx_count,
845 cde_app->ctx_usecount,
846 cde_app->ctx_count_top);
847 trace_gk20a_cde_get_context(cde_ctx);
848
849 /* deleter work may be scheduled, but in_use prevents it */
850 cde_ctx->in_use = true;
851 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
852 cde_app->ctx_usecount++;
853
854 /* cancel any deletions now that ctx is in use */
855 gk20a_cde_cancel_deleter(cde_ctx, true);
856 return cde_ctx;
857 }
858
859 /* no free contexts, get a temporary one */
860
861 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
862 "cde: no free contexts, count=%d",
863 cde_app->ctx_count);
864
865 cde_ctx = gk20a_cde_allocate_context(g);
866 if (IS_ERR(cde_ctx)) {
867 nvgpu_warn(g, "cde: cannot allocate context: %ld",
868 PTR_ERR(cde_ctx));
869 return cde_ctx;
870 }
871
872 trace_gk20a_cde_get_context(cde_ctx);
873 cde_ctx->in_use = true;
874 cde_ctx->is_temporary = true;
875 cde_app->ctx_usecount++;
876 cde_app->ctx_count++;
877 if (cde_app->ctx_count > cde_app->ctx_count_top)
878 cde_app->ctx_count_top = cde_app->ctx_count;
879 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
880
881 return cde_ctx;
882}
883
884static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g)
885__releases(&cde_app->mutex)
886__acquires(&cde_app->mutex)
887{
888 struct gk20a_cde_app *cde_app = &g->cde_app;
889 struct gk20a_cde_ctx *cde_ctx = NULL;
890 struct nvgpu_timeout timeout;
891
892 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
893 NVGPU_TIMER_CPU_TIMER);
894
895 do {
896 cde_ctx = gk20a_cde_do_get_context(g);
897 if (PTR_ERR(cde_ctx) != -EAGAIN)
898 break;
899
900 /* exhausted, retry */
901 nvgpu_mutex_release(&cde_app->mutex);
902 cond_resched();
903 nvgpu_mutex_acquire(&cde_app->mutex);
904 } while (!nvgpu_timeout_expired(&timeout));
905
906 return cde_ctx;
907}
908
909static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g)
910{
911 struct gk20a_cde_ctx *cde_ctx;
912 int ret;
913
914 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
915 if (!cde_ctx)
916 return ERR_PTR(-ENOMEM);
917
918 cde_ctx->g = g;
919 cde_ctx->dev = dev_from_gk20a(g);
920
921 ret = gk20a_cde_load(cde_ctx);
922 if (ret) {
923 nvgpu_kfree(g, cde_ctx);
924 return ERR_PTR(ret);
925 }
926
927 nvgpu_init_list_node(&cde_ctx->list);
928 cde_ctx->is_temporary = false;
929 cde_ctx->in_use = false;
930 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
931 gk20a_cde_ctx_deleter_fn);
932
933 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
934 trace_gk20a_cde_allocate_context(cde_ctx);
935 return cde_ctx;
936}
937
938int gk20a_cde_convert(struct gk20a *g,
939 struct dma_buf *compbits_scatter_buf,
940 u64 compbits_byte_offset,
941 u64 scatterbuffer_byte_offset,
942 struct nvgpu_fence *fence,
943 u32 __flags, struct gk20a_cde_param *params,
944 int num_params, struct gk20a_fence **fence_out)
945__acquires(&cde_app->mutex)
946__releases(&cde_app->mutex)
947{
948 struct gk20a_cde_ctx *cde_ctx = NULL;
949 struct gk20a_comptags comptags;
950 u64 mapped_compbits_offset = 0;
951 u64 compbits_size = 0;
952 u64 mapped_scatterbuffer_offset = 0;
953 u64 scatterbuffer_size = 0;
954 u64 map_vaddr = 0;
955 u64 map_offset = 0;
956 u64 map_size = 0;
957 u8 *surface = NULL;
958 u64 big_page_mask = 0;
959 u32 flags;
960 int err, i;
961 const s32 compbits_kind = 0;
962
963 gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
964 compbits_byte_offset, scatterbuffer_byte_offset);
965
966 /* scatter buffer must be after compbits buffer */
967 if (scatterbuffer_byte_offset &&
968 scatterbuffer_byte_offset < compbits_byte_offset)
969 return -EINVAL;
970
971 err = gk20a_busy(g);
972 if (err)
973 return err;
974
975 nvgpu_mutex_acquire(&g->cde_app.mutex);
976 cde_ctx = gk20a_cde_get_context(g);
977 nvgpu_mutex_release(&g->cde_app.mutex);
978 if (IS_ERR(cde_ctx)) {
979 err = PTR_ERR(cde_ctx);
980 goto exit_idle;
981 }
982
983 /* First, map the buffer to local va */
984
985 /* ensure that the compbits buffer has drvdata */
986 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
987 dev_from_gk20a(g));
988 if (err)
989 goto exit_idle;
990
991 /* compbits don't start at page aligned offset, so we need to align
992 the region to be mapped */
993 big_page_mask = cde_ctx->vm->big_page_size - 1;
994 map_offset = compbits_byte_offset & ~big_page_mask;
995 map_size = compbits_scatter_buf->size - map_offset;
996
997
998 /* compute compbit start offset from the beginning of the mapped
999 area */
1000 mapped_compbits_offset = compbits_byte_offset - map_offset;
1001 if (scatterbuffer_byte_offset) {
1002 compbits_size = scatterbuffer_byte_offset -
1003 compbits_byte_offset;
1004 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1005 map_offset;
1006 scatterbuffer_size = compbits_scatter_buf->size -
1007 scatterbuffer_byte_offset;
1008 } else {
1009 compbits_size = compbits_scatter_buf->size -
1010 compbits_byte_offset;
1011 }
1012
1013 gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1014 map_offset, map_size);
1015 gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1016 mapped_compbits_offset, compbits_size);
1017 gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1018 mapped_scatterbuffer_offset, scatterbuffer_size);
1019
1020
1021 /* map the destination buffer */
1022 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
1023 map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
1024 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1025 compbits_kind, true,
1026 gk20a_mem_flag_none,
1027 map_offset, map_size,
1028 NULL);
1029 if (!map_vaddr) {
1030 dma_buf_put(compbits_scatter_buf);
1031 err = -EINVAL;
1032 goto exit_idle;
1033 }
1034
1035 if (scatterbuffer_byte_offset &&
1036 g->ops.cde.need_scatter_buffer &&
1037 g->ops.cde.need_scatter_buffer(g)) {
1038 struct sg_table *sgt;
1039 void *scatter_buffer;
1040
1041 surface = dma_buf_vmap(compbits_scatter_buf);
1042 if (IS_ERR(surface)) {
1043 nvgpu_warn(g,
1044 "dma_buf_vmap failed");
1045 err = -EINVAL;
1046 goto exit_unmap_vaddr;
1047 }
1048
1049 scatter_buffer = surface + scatterbuffer_byte_offset;
1050
1051 gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1052 surface, scatter_buffer);
1053 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf);
1054 if (IS_ERR(sgt)) {
1055 nvgpu_warn(g,
1056 "mm_pin failed");
1057 err = -EINVAL;
1058 goto exit_unmap_surface;
1059 } else {
1060 err = g->ops.cde.populate_scatter_buffer(g, sgt,
1061 compbits_byte_offset, scatter_buffer,
1062 scatterbuffer_size);
1063 WARN_ON(err);
1064
1065 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1066 sgt);
1067 if (err)
1068 goto exit_unmap_surface;
1069 }
1070
1071 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1072 dma_buf_vunmap(compbits_scatter_buf, surface);
1073 surface = NULL;
1074 }
1075
1076 /* store source buffer compression tags */
1077 gk20a_get_comptags(dev_from_gk20a(g), compbits_scatter_buf, &comptags);
1078 cde_ctx->surf_param_offset = comptags.offset;
1079 cde_ctx->surf_param_lines = comptags.lines;
1080
1081 /* store surface vaddr. This is actually compbit vaddr, but since
1082 compbits live in the same surface, and we can get the alloc base
1083 address by using gk20a_mm_gpuva_to_iova_base, this will do */
1084 cde_ctx->surf_vaddr = map_vaddr;
1085
1086 /* store information about destination */
1087 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1088 cde_ctx->compbit_size = compbits_size;
1089
1090 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1091 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1092
1093 /* remove existing argument data */
1094 memset(cde_ctx->user_param_values, 0,
1095 sizeof(cde_ctx->user_param_values));
1096
1097 /* read user space arguments for the conversion */
1098 for (i = 0; i < num_params; i++) {
1099 struct gk20a_cde_param *param = params + i;
1100 int id = param->id - NUM_RESERVED_PARAMS;
1101
1102 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1103 nvgpu_warn(g, "cde: unknown user parameter");
1104 err = -EINVAL;
1105 goto exit_unmap_surface;
1106 }
1107 cde_ctx->user_param_values[id] = param->value;
1108 }
1109
1110 /* patch data */
1111 err = gk20a_cde_patch_params(cde_ctx);
1112 if (err) {
1113 nvgpu_warn(g, "cde: failed to patch parameters");
1114 goto exit_unmap_surface;
1115 }
1116
1117 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1118 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1119 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1120 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1121 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1122 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1123
1124 /* take always the postfence as it is needed for protecting the
1125 * cde context */
1126 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
1127
1128 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1129 gk20a_idle(g);
1130
1131 /* execute the conversion buffer, combined with init first if it's the
1132 * first time */
1133 err = gk20a_cde_execute_buffer(cde_ctx,
1134 cde_ctx->init_cmd_executed
1135 ? TYPE_BUF_COMMAND_CONVERT
1136 : TYPE_BUF_COMMAND_INIT,
1137 fence, flags, fence_out);
1138
1139 cde_ctx->init_cmd_executed = true;
1140
1141 /* unmap the buffers - channel holds references to them now */
1142 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1143
1144 return err;
1145
1146exit_unmap_surface:
1147 if (surface)
1148 dma_buf_vunmap(compbits_scatter_buf, surface);
1149exit_unmap_vaddr:
1150 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1151exit_idle:
1152 gk20a_idle(g);
1153 return err;
1154}
1155
1156static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1157__acquires(&cde_app->mutex)
1158__releases(&cde_app->mutex)
1159{
1160 struct gk20a_cde_ctx *cde_ctx = data;
1161 struct gk20a *g = cde_ctx->g;
1162 struct gk20a_cde_app *cde_app = &g->cde_app;
1163 bool channel_idle;
1164
1165 channel_gk20a_joblist_lock(ch);
1166 channel_idle = channel_gk20a_joblist_is_empty(ch);
1167 channel_gk20a_joblist_unlock(ch);
1168
1169 if (!channel_idle)
1170 return;
1171
1172 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1173 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1174 if (!cde_ctx->in_use)
1175 gk20a_dbg_info("double finish cde context %p on channel %p",
1176 cde_ctx, ch);
1177
1178 if (ch->has_timedout) {
1179 if (cde_ctx->is_temporary) {
1180 nvgpu_warn(g,
1181 "cde: channel had timed out"
1182 " (temporary channel)");
1183 /* going to be deleted anyway */
1184 } else {
1185 nvgpu_warn(g,
1186 "cde: channel had timed out"
1187 ", reloading");
1188 /* mark it to be deleted, replace with a new one */
1189 nvgpu_mutex_acquire(&cde_app->mutex);
1190 cde_ctx->is_temporary = true;
1191 if (gk20a_cde_create_context(g)) {
1192 nvgpu_err(g, "cde: can't replace context");
1193 }
1194 nvgpu_mutex_release(&cde_app->mutex);
1195 }
1196 }
1197
1198 /* delete temporary contexts later (watch for doubles) */
1199 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1200 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1201 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1202 msecs_to_jiffies(CTX_DELETE_TIME));
1203 }
1204
1205 if (!ch->has_timedout)
1206 gk20a_cde_ctx_release(cde_ctx);
1207}
1208
1209static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1210{
1211 struct gk20a *g = cde_ctx->g;
1212 struct nvgpu_firmware *img;
1213 struct channel_gk20a *ch;
1214 struct gr_gk20a *gr = &g->gr;
1215 int err = 0;
1216 u64 vaddr;
1217
1218 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1219 if (!img) {
1220 nvgpu_err(g, "cde: could not fetch the firmware");
1221 return -ENOSYS;
1222 }
1223
1224 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1225 cde_ctx,
1226 -1,
1227 false);
1228 if (!ch) {
1229 nvgpu_warn(g, "cde: gk20a channel not available");
1230 err = -ENOMEM;
1231 goto err_get_gk20a_channel;
1232 }
1233
1234 /* bind the channel to the vm */
1235 err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch);
1236 if (err) {
1237 nvgpu_warn(g, "cde: could not bind vm");
1238 goto err_commit_va;
1239 }
1240
1241 /* allocate gpfifo (1024 should be more than enough) */
1242 err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0);
1243 if (err) {
1244 nvgpu_warn(g, "cde: unable to allocate gpfifo");
1245 goto err_alloc_gpfifo;
1246 }
1247
1248 /* map backing store to gpu virtual space */
1249 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1250 g->gr.compbit_store.mem.size,
1251 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1252 gk20a_mem_flag_read_only,
1253 false,
1254 gr->compbit_store.mem.aperture);
1255
1256 if (!vaddr) {
1257 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1258 err = -ENOMEM;
1259 goto err_map_backingstore;
1260 }
1261
1262 /* store initialisation data */
1263 cde_ctx->ch = ch;
1264 cde_ctx->vm = ch->vm;
1265 cde_ctx->backing_store_vaddr = vaddr;
1266
1267 /* initialise the firmware */
1268 err = gk20a_init_cde_img(cde_ctx, img);
1269 if (err) {
1270 nvgpu_warn(g, "cde: image initialisation failed");
1271 goto err_init_cde_img;
1272 }
1273
1274 /* initialisation done */
1275 nvgpu_release_firmware(g, img);
1276
1277 return 0;
1278
1279err_init_cde_img:
1280 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1281err_map_backingstore:
1282err_alloc_gpfifo:
1283 nvgpu_vm_put(ch->vm);
1284err_commit_va:
1285err_get_gk20a_channel:
1286 nvgpu_release_firmware(g, img);
1287 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1288 return err;
1289}
1290
1291int gk20a_cde_reload(struct gk20a *g)
1292__acquires(&cde_app->mutex)
1293__releases(&cde_app->mutex)
1294{
1295 struct gk20a_cde_app *cde_app = &g->cde_app;
1296 int err;
1297
1298 if (!cde_app->initialised)
1299 return -ENOSYS;
1300
1301 err = gk20a_busy(g);
1302 if (err)
1303 return err;
1304
1305 nvgpu_mutex_acquire(&cde_app->mutex);
1306
1307 gk20a_cde_stop(g);
1308
1309 err = gk20a_cde_create_contexts(g);
1310 if (!err)
1311 cde_app->initialised = true;
1312
1313 nvgpu_mutex_release(&cde_app->mutex);
1314
1315 gk20a_idle(g);
1316 return err;
1317}
1318
1319int gk20a_init_cde_support(struct gk20a *g)
1320__acquires(&cde_app->mutex)
1321__releases(&cde_app->mutex)
1322{
1323 struct gk20a_cde_app *cde_app = &g->cde_app;
1324 int err;
1325
1326 if (cde_app->initialised)
1327 return 0;
1328
1329 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1330
1331 err = nvgpu_mutex_init(&cde_app->mutex);
1332 if (err)
1333 return err;
1334
1335 nvgpu_mutex_acquire(&cde_app->mutex);
1336
1337 nvgpu_init_list_node(&cde_app->free_contexts);
1338 nvgpu_init_list_node(&cde_app->used_contexts);
1339 cde_app->ctx_count = 0;
1340 cde_app->ctx_count_top = 0;
1341 cde_app->ctx_usecount = 0;
1342
1343 err = gk20a_cde_create_contexts(g);
1344 if (!err)
1345 cde_app->initialised = true;
1346
1347 nvgpu_mutex_release(&cde_app->mutex);
1348 gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1349
1350 if (err)
1351 nvgpu_mutex_destroy(&cde_app->mutex);
1352
1353 return err;
1354}
1355
1356enum cde_launch_patch_id {
1357 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1358 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1359 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1360 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1361 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1362 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1363 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1364 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1365 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1366 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1367 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1368 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1369 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1370 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1371 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1372 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1373 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1374 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1375 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1376 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1377 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1378 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1379 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1380 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1381 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1382 PATCH_H_LAUNCH_WORD1_ID = 1049,
1383 PATCH_H_LAUNCH_WORD2_ID = 1050,
1384 PATCH_V_LAUNCH_WORD1_ID = 1051,
1385 PATCH_V_LAUNCH_WORD2_ID = 1052,
1386 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1387 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1388 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1389 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1390};
1391
1392/* maximum number of WRITE_PATCHes in the below function */
1393#define MAX_CDE_LAUNCH_PATCHES 32
1394
1395static int gk20a_buffer_convert_gpu_to_cde_v1(
1396 struct gk20a *g,
1397 struct dma_buf *dmabuf, u32 consumer,
1398 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1399 u64 scatterbuffer_offset,
1400 u32 width, u32 height, u32 block_height_log2,
1401 u32 submit_flags, struct nvgpu_fence *fence_in,
1402 struct gk20a_buffer_state *state)
1403{
1404 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1405 int param = 0;
1406 int err = 0;
1407 struct gk20a_fence *new_fence = NULL;
1408 const int wgx = 8;
1409 const int wgy = 8;
1410 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1411 const int xalign = compbits_per_byte * wgx;
1412 const int yalign = wgy;
1413
1414 /* Compute per launch parameters */
1415 const int xtiles = (width + 7) >> 3;
1416 const int ytiles = (height + 7) >> 3;
1417 const int gridw_h = roundup(xtiles, xalign) / xalign;
1418 const int gridh_h = roundup(ytiles, yalign) / yalign;
1419 const int gridw_v = roundup(ytiles, xalign) / xalign;
1420 const int gridh_v = roundup(xtiles, yalign) / yalign;
1421 const int xblocks = (xtiles + 1) >> 1;
1422 const int voffset = compbits_voffset - compbits_hoffset;
1423
1424 int hprog = -1;
1425 int vprog = -1;
1426
1427 if (g->ops.cde.get_program_numbers)
1428 g->ops.cde.get_program_numbers(g, block_height_log2,
1429 &hprog, &vprog);
1430 else {
1431 nvgpu_warn(g, "cde: chip not supported");
1432 return -ENOSYS;
1433 }
1434
1435 if (hprog < 0 || vprog < 0) {
1436 nvgpu_warn(g, "cde: could not determine programs");
1437 return -ENOSYS;
1438 }
1439
1440 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1441 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1442 xtiles, ytiles);
1443
1444 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1445 width, height, block_height_log2,
1446 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1447 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1448 width, height, xtiles, ytiles);
1449 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1450 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1451 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1452 hprog,
1453 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1454 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1455 vprog,
1456 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1457 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1458
1459 /* Write parameters */
1460#define WRITE_PATCH(NAME, VALUE) \
1461 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1462 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1463 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1464 block_height_log2);
1465 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1466 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1467 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1468 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1469 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1470
1471 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1472 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1473 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1474 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1475 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1476 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1477
1478 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1479 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1480 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1481 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1482 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1483 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1484
1485 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1486 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1487 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1488 g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1489 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1490 g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1491 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1492 g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1493
1494 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1495 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1496 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1497 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1498 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1499 } else {
1500 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1501 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1502 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1503 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1504 }
1505
1506 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1507 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1508 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1509 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1510 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1511 } else {
1512 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1513 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1514 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1515 g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1516 }
1517#undef WRITE_PATCH
1518
1519 err = gk20a_cde_convert(g, dmabuf,
1520 compbits_hoffset,
1521 scatterbuffer_offset,
1522 fence_in, submit_flags,
1523 params, param, &new_fence);
1524 if (err)
1525 goto out;
1526
1527 /* compbits generated, update state & fence */
1528 gk20a_fence_put(state->fence);
1529 state->fence = new_fence;
1530 state->valid_compbits |= consumer &
1531 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1532out:
1533 return err;
1534}
1535
1536static int gk20a_buffer_convert_gpu_to_cde(
1537 struct gk20a *g, struct dma_buf *dmabuf, u32 consumer,
1538 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1539 u64 scatterbuffer_offset,
1540 u32 width, u32 height, u32 block_height_log2,
1541 u32 submit_flags, struct nvgpu_fence *fence_in,
1542 struct gk20a_buffer_state *state)
1543{
1544 int err = 0;
1545
1546 if (!g->cde_app.initialised)
1547 return -ENOSYS;
1548
1549 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1550 g->cde_app.firmware_version);
1551
1552 if (g->cde_app.firmware_version == 1) {
1553 err = gk20a_buffer_convert_gpu_to_cde_v1(
1554 g, dmabuf, consumer, offset, compbits_hoffset,
1555 compbits_voffset, scatterbuffer_offset,
1556 width, height, block_height_log2,
1557 submit_flags, fence_in, state);
1558 } else {
1559 nvgpu_err(g, "unsupported CDE firmware version %d",
1560 g->cde_app.firmware_version);
1561 err = -EINVAL;
1562 }
1563
1564 return err;
1565}
1566
1567int gk20a_prepare_compressible_read(
1568 struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
1569 u64 compbits_hoffset, u64 compbits_voffset,
1570 u64 scatterbuffer_offset,
1571 u32 width, u32 height, u32 block_height_log2,
1572 u32 submit_flags, struct nvgpu_fence *fence,
1573 u32 *valid_compbits, u32 *zbc_color,
1574 struct gk20a_fence **fence_out)
1575{
1576 int err = 0;
1577 struct gk20a_buffer_state *state;
1578 struct dma_buf *dmabuf;
1579 u32 missing_bits;
1580
1581 dmabuf = dma_buf_get(buffer_fd);
1582 if (IS_ERR(dmabuf))
1583 return -EINVAL;
1584
1585 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1586 if (err) {
1587 dma_buf_put(dmabuf);
1588 return err;
1589 }
1590
1591 missing_bits = (state->valid_compbits ^ request) & request;
1592
1593 nvgpu_mutex_acquire(&state->lock);
1594
1595 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1596
1597 gk20a_fence_put(state->fence);
1598 state->fence = NULL;
1599 /* state->fence = decompress();
1600 state->valid_compbits = 0; */
1601 err = -EINVAL;
1602 goto out;
1603 } else if (missing_bits) {
1604 u32 missing_cde_bits = missing_bits &
1605 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1606 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1607 missing_cde_bits) {
1608 err = gk20a_buffer_convert_gpu_to_cde(
1609 g, dmabuf,
1610 missing_cde_bits,
1611 offset, compbits_hoffset,
1612 compbits_voffset, scatterbuffer_offset,
1613 width, height, block_height_log2,
1614 submit_flags, fence,
1615 state);
1616 if (err)
1617 goto out;
1618 }
1619 }
1620
1621 if (state->fence && fence_out)
1622 *fence_out = gk20a_fence_get(state->fence);
1623
1624 if (valid_compbits)
1625 *valid_compbits = state->valid_compbits;
1626
1627 if (zbc_color)
1628 *zbc_color = state->zbc_color;
1629
1630out:
1631 nvgpu_mutex_release(&state->lock);
1632 dma_buf_put(dmabuf);
1633 return err;
1634}
1635
1636int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1637 u32 valid_compbits, u64 offset, u32 zbc_color)
1638{
1639 int err;
1640 struct gk20a_buffer_state *state;
1641 struct dma_buf *dmabuf;
1642
1643 dmabuf = dma_buf_get(buffer_fd);
1644 if (IS_ERR(dmabuf)) {
1645 nvgpu_err(g, "invalid dmabuf");
1646 return -EINVAL;
1647 }
1648
1649 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1650 if (err) {
1651 nvgpu_err(g, "could not get state from dmabuf");
1652 dma_buf_put(dmabuf);
1653 return err;
1654 }
1655
1656 nvgpu_mutex_acquire(&state->lock);
1657
1658 /* Update the compbits state. */
1659 state->valid_compbits = valid_compbits;
1660 state->zbc_color = zbc_color;
1661
1662 /* Discard previous compbit job fence. */
1663 gk20a_fence_put(state->fence);
1664 state->fence = NULL;
1665
1666 nvgpu_mutex_release(&state->lock);
1667 dma_buf_put(dmabuf);
1668 return 0;
1669}