summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/cde.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/cde.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c1710
1 files changed, 1710 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
new file mode 100644
index 00000000..143e5b75
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -0,0 +1,1710 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <trace/events/gk20a.h>
25
26#include <nvgpu/dma.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/kmem.h>
31#include <nvgpu/log.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/firmware.h>
34
35#include <nvgpu/linux/vm.h>
36
37#include "gk20a/gk20a.h"
38#include "gk20a/channel_gk20a.h"
39#include "gk20a/mm_gk20a.h"
40#include "gk20a/fence_gk20a.h"
41#include "gk20a/gr_gk20a.h"
42
43#include "cde.h"
44#include "os_linux.h"
45#include "dmabuf.h"
46#include "channel.h"
47
48#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
49#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
50
51static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
52static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
53
54#define CTX_DELETE_TIME 1000
55
56#define MAX_CTX_USE_COUNT 42
57#define MAX_CTX_RETRY_TIME 2000
58
59static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
60{
61 struct nvgpu_mapped_buf *buffer;
62 dma_addr_t addr = 0;
63 struct gk20a *g = gk20a_from_vm(vm);
64
65 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
66 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
67 if (buffer)
68 addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
69 nvgpu_mutex_release(&vm->update_gmmu_lock);
70
71 return addr;
72}
73
74static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
75{
76 unsigned int i;
77
78 for (i = 0; i < cde_ctx->num_bufs; i++) {
79 struct nvgpu_mem *mem = cde_ctx->mem + i;
80 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
81 }
82
83 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
84
85 cde_ctx->convert_cmd = NULL;
86 cde_ctx->init_convert_cmd = NULL;
87 cde_ctx->num_bufs = 0;
88 cde_ctx->num_params = 0;
89 cde_ctx->init_cmd_num_entries = 0;
90 cde_ctx->convert_cmd_num_entries = 0;
91 cde_ctx->init_cmd_executed = false;
92}
93
94static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
95__must_hold(&cde_app->mutex)
96{
97 struct nvgpu_os_linux *l = cde_ctx->l;
98 struct gk20a *g = &l->g;
99 struct channel_gk20a *ch = cde_ctx->ch;
100 struct vm_gk20a *vm = ch->vm;
101
102 trace_gk20a_cde_remove_ctx(cde_ctx);
103
104 /* release mapped memory */
105 gk20a_deinit_cde_img(cde_ctx);
106 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
107 cde_ctx->backing_store_vaddr);
108
109 /* free the channel */
110 gk20a_channel_close(ch);
111
112 /* housekeeping on app */
113 nvgpu_list_del(&cde_ctx->list);
114 l->cde_app.ctx_count--;
115 nvgpu_kfree(g, cde_ctx);
116}
117
118static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
119 bool wait_finish)
120__releases(&cde_app->mutex)
121__acquires(&cde_app->mutex)
122{
123 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
124
125 /* permanent contexts do not have deleter works */
126 if (!cde_ctx->is_temporary)
127 return;
128
129 if (wait_finish) {
130 nvgpu_mutex_release(&cde_app->mutex);
131 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
132 nvgpu_mutex_acquire(&cde_app->mutex);
133 } else {
134 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
135 }
136}
137
138static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
139__must_hold(&l->cde_app->mutex)
140{
141 struct gk20a_cde_app *cde_app = &l->cde_app;
142 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
143
144 /* safe to go off the mutex in cancel_deleter since app is
145 * deinitialised; no new jobs are started. deleter works may be only at
146 * waiting for the mutex or before, going to abort */
147
148 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
149 &cde_app->free_contexts, gk20a_cde_ctx, list) {
150 gk20a_cde_cancel_deleter(cde_ctx, true);
151 gk20a_cde_remove_ctx(cde_ctx);
152 }
153
154 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
155 &cde_app->used_contexts, gk20a_cde_ctx, list) {
156 gk20a_cde_cancel_deleter(cde_ctx, true);
157 gk20a_cde_remove_ctx(cde_ctx);
158 }
159}
160
161static void gk20a_cde_stop(struct nvgpu_os_linux *l)
162__must_hold(&l->cde_app->mutex)
163{
164 struct gk20a_cde_app *cde_app = &l->cde_app;
165
166 /* prevent further conversions and delayed works from working */
167 cde_app->initialised = false;
168 /* free all data, empty the list */
169 gk20a_cde_remove_contexts(l);
170}
171
172void gk20a_cde_destroy(struct nvgpu_os_linux *l)
173__acquires(&l->cde_app->mutex)
174__releases(&l->cde_app->mutex)
175{
176 struct gk20a_cde_app *cde_app = &l->cde_app;
177
178 if (!cde_app->initialised)
179 return;
180
181 nvgpu_mutex_acquire(&cde_app->mutex);
182 gk20a_cde_stop(l);
183 nvgpu_mutex_release(&cde_app->mutex);
184
185 nvgpu_mutex_destroy(&cde_app->mutex);
186}
187
188void gk20a_cde_suspend(struct nvgpu_os_linux *l)
189__acquires(&l->cde_app->mutex)
190__releases(&l->cde_app->mutex)
191{
192 struct gk20a_cde_app *cde_app = &l->cde_app;
193 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
194
195 if (!cde_app->initialised)
196 return;
197
198 nvgpu_mutex_acquire(&cde_app->mutex);
199
200 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
201 &cde_app->free_contexts, gk20a_cde_ctx, list) {
202 gk20a_cde_cancel_deleter(cde_ctx, false);
203 }
204
205 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
206 &cde_app->used_contexts, gk20a_cde_ctx, list) {
207 gk20a_cde_cancel_deleter(cde_ctx, false);
208 }
209
210 nvgpu_mutex_release(&cde_app->mutex);
211
212}
213
214static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
215__must_hold(&l->cde_app->mutex)
216{
217 struct gk20a_cde_app *cde_app = &l->cde_app;
218 struct gk20a_cde_ctx *cde_ctx;
219
220 cde_ctx = gk20a_cde_allocate_context(l);
221 if (IS_ERR(cde_ctx))
222 return PTR_ERR(cde_ctx);
223
224 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
225 cde_app->ctx_count++;
226 if (cde_app->ctx_count > cde_app->ctx_count_top)
227 cde_app->ctx_count_top = cde_app->ctx_count;
228
229 return 0;
230}
231
232static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
233__must_hold(&l->cde_app->mutex)
234{
235 int err;
236 int i;
237
238 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
239 err = gk20a_cde_create_context(l);
240 if (err)
241 goto out;
242 }
243
244 return 0;
245out:
246 gk20a_cde_remove_contexts(l);
247 return err;
248}
249
250static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
251 struct nvgpu_firmware *img,
252 struct gk20a_cde_hdr_buf *buf)
253{
254 struct nvgpu_mem *mem;
255 struct nvgpu_os_linux *l = cde_ctx->l;
256 struct gk20a *g = &l->g;
257 int err;
258
259 /* check that the file can hold the buf */
260 if (buf->data_byte_offset != 0 &&
261 buf->data_byte_offset + buf->num_bytes > img->size) {
262 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
263 cde_ctx->num_bufs);
264 return -EINVAL;
265 }
266
267 /* check that we have enough buf elems available */
268 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
269 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
270 cde_ctx->num_bufs);
271 return -ENOMEM;
272 }
273
274 /* allocate buf */
275 mem = cde_ctx->mem + cde_ctx->num_bufs;
276 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
277 if (err) {
278 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
279 cde_ctx->num_bufs);
280 return -ENOMEM;
281 }
282
283 /* copy the content */
284 if (buf->data_byte_offset != 0)
285 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
286 buf->num_bytes);
287
288 cde_ctx->num_bufs++;
289
290 return 0;
291}
292
293static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
294 int type, s32 shift, u64 mask, u64 value)
295{
296 struct nvgpu_os_linux *l = cde_ctx->l;
297 struct gk20a *g = &l->g;
298 u32 *target_mem_ptr = target;
299 u64 *target_mem_ptr_u64 = target;
300 u64 current_value, new_value;
301
302 value = (shift >= 0) ? value << shift : value >> -shift;
303 value &= mask;
304
305 /* read current data from the location */
306 current_value = 0;
307 if (type == TYPE_PARAM_TYPE_U32) {
308 if (mask != 0xfffffffful)
309 current_value = *target_mem_ptr;
310 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
311 if (mask != ~0ul)
312 current_value = *target_mem_ptr_u64;
313 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
314 current_value = *target_mem_ptr_u64;
315 current_value = (u64)(current_value >> 32) |
316 (u64)(current_value << 32);
317 } else {
318 nvgpu_warn(g, "cde: unknown type. type=%d",
319 type);
320 return -EINVAL;
321 }
322
323 current_value &= ~mask;
324 new_value = current_value | value;
325
326 /* store the element data back */
327 if (type == TYPE_PARAM_TYPE_U32)
328 *target_mem_ptr = (u32)new_value;
329 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
330 *target_mem_ptr_u64 = new_value;
331 else {
332 new_value = (u64)(new_value >> 32) |
333 (u64)(new_value << 32);
334 *target_mem_ptr_u64 = new_value;
335 }
336
337 return 0;
338}
339
340static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
341 struct nvgpu_firmware *img,
342 struct gk20a_cde_hdr_replace *replace)
343{
344 struct nvgpu_mem *source_mem;
345 struct nvgpu_mem *target_mem;
346 struct nvgpu_os_linux *l = cde_ctx->l;
347 struct gk20a *g = &l->g;
348 u32 *target_mem_ptr;
349 u64 vaddr;
350 int err;
351
352 if (replace->target_buf >= cde_ctx->num_bufs ||
353 replace->source_buf >= cde_ctx->num_bufs) {
354 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
355 replace->target_buf, replace->source_buf,
356 cde_ctx->num_bufs);
357 return -EINVAL;
358 }
359
360 source_mem = cde_ctx->mem + replace->source_buf;
361 target_mem = cde_ctx->mem + replace->target_buf;
362 target_mem_ptr = target_mem->cpu_va;
363
364 if (source_mem->size < (replace->source_byte_offset + 3) ||
365 target_mem->size < (replace->target_byte_offset + 3)) {
366 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
367 replace->target_byte_offset,
368 replace->source_byte_offset,
369 source_mem->size,
370 target_mem->size);
371 return -EINVAL;
372 }
373
374 /* calculate the target pointer */
375 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
376
377 /* determine patch value */
378 vaddr = source_mem->gpu_va + replace->source_byte_offset;
379 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
380 replace->shift, replace->mask,
381 vaddr);
382 if (err) {
383 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
384 err, replace->target_buf,
385 replace->target_byte_offset,
386 replace->source_buf,
387 replace->source_byte_offset);
388 }
389
390 return err;
391}
392
393static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
394{
395 struct nvgpu_os_linux *l = cde_ctx->l;
396 struct gk20a *g = &l->g;
397 struct nvgpu_mem *target_mem;
398 u32 *target_mem_ptr;
399 u64 new_data;
400 int user_id = 0, err;
401 unsigned int i;
402
403 for (i = 0; i < cde_ctx->num_params; i++) {
404 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
405 target_mem = cde_ctx->mem + param->target_buf;
406 target_mem_ptr = target_mem->cpu_va;
407 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
408
409 switch (param->id) {
410 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
411 new_data = g->gr.comptags_per_cacheline;
412 break;
413 case TYPE_PARAM_GPU_CONFIGURATION:
414 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
415 g->gr.cacheline_size;
416 break;
417 case TYPE_PARAM_FIRSTPAGEOFFSET:
418 new_data = cde_ctx->surf_param_offset;
419 break;
420 case TYPE_PARAM_NUMPAGES:
421 new_data = cde_ctx->surf_param_lines;
422 break;
423 case TYPE_PARAM_BACKINGSTORE:
424 new_data = cde_ctx->backing_store_vaddr;
425 break;
426 case TYPE_PARAM_DESTINATION:
427 new_data = cde_ctx->compbit_vaddr;
428 break;
429 case TYPE_PARAM_DESTINATION_SIZE:
430 new_data = cde_ctx->compbit_size;
431 break;
432 case TYPE_PARAM_BACKINGSTORE_SIZE:
433 new_data = g->gr.compbit_store.mem.size;
434 break;
435 case TYPE_PARAM_SOURCE_SMMU_ADDR:
436 new_data = gpuva_to_iova_base(cde_ctx->vm,
437 cde_ctx->surf_vaddr);
438 if (new_data == 0)
439 return -EINVAL;
440 break;
441 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
442 new_data = g->gr.compbit_store.base_hw;
443 break;
444 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
445 new_data = g->gr.gobs_per_comptagline_per_slice;
446 break;
447 case TYPE_PARAM_SCATTERBUFFER:
448 new_data = cde_ctx->scatterbuffer_vaddr;
449 break;
450 case TYPE_PARAM_SCATTERBUFFER_SIZE:
451 new_data = cde_ctx->scatterbuffer_size;
452 break;
453 default:
454 user_id = param->id - NUM_RESERVED_PARAMS;
455 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
456 continue;
457 new_data = cde_ctx->user_param_values[user_id];
458 }
459
460 gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
461 i, param->id, param->target_buf,
462 param->target_byte_offset, new_data,
463 param->data_offset, param->type, param->shift,
464 param->mask);
465
466 new_data += param->data_offset;
467
468 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
469 param->shift, param->mask, new_data);
470
471 if (err) {
472 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
473 err, i, param->id, param->target_buf,
474 param->target_byte_offset, new_data);
475 return err;
476 }
477 }
478
479 return 0;
480}
481
482static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
483 struct nvgpu_firmware *img,
484 struct gk20a_cde_hdr_param *param)
485{
486 struct nvgpu_mem *target_mem;
487 struct nvgpu_os_linux *l = cde_ctx->l;
488 struct gk20a *g = &l->g;
489
490 if (param->target_buf >= cde_ctx->num_bufs) {
491 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
492 cde_ctx->num_params, param->target_buf,
493 cde_ctx->num_bufs);
494 return -EINVAL;
495 }
496
497 target_mem = cde_ctx->mem + param->target_buf;
498 if (target_mem->size < (param->target_byte_offset + 3)) {
499 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
500 cde_ctx->num_params, param->target_byte_offset,
501 target_mem->size);
502 return -EINVAL;
503 }
504
505 /* does this parameter fit into our parameter structure */
506 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
507 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
508 cde_ctx->num_params);
509 return -ENOMEM;
510 }
511
512 /* is the given id valid? */
513 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
514 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
515 param->id, cde_ctx->num_params,
516 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
517 return -EINVAL;
518 }
519
520 cde_ctx->params[cde_ctx->num_params] = *param;
521 cde_ctx->num_params++;
522
523 return 0;
524}
525
526static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
527 struct nvgpu_firmware *img,
528 u32 required_class)
529{
530 struct nvgpu_os_linux *l = cde_ctx->l;
531 struct gk20a *g = &l->g;
532 int err;
533
534 /* CDE enabled */
535 cde_ctx->ch->cde = true;
536
537 err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
538 if (err) {
539 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
540 err);
541 return err;
542 }
543
544 return 0;
545}
546
547static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
548 struct nvgpu_firmware *img,
549 u32 op,
550 struct gk20a_cde_cmd_elem *cmd_elem,
551 u32 num_elems)
552{
553 struct nvgpu_os_linux *l = cde_ctx->l;
554 struct gk20a *g = &l->g;
555 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
556 u32 *num_entries;
557 unsigned int i;
558
559 /* check command type */
560 if (op == TYPE_BUF_COMMAND_INIT) {
561 gpfifo = &cde_ctx->init_convert_cmd;
562 num_entries = &cde_ctx->init_cmd_num_entries;
563 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
564 gpfifo = &cde_ctx->convert_cmd;
565 num_entries = &cde_ctx->convert_cmd_num_entries;
566 } else {
567 nvgpu_warn(g, "cde: unknown command. op=%u",
568 op);
569 return -EINVAL;
570 }
571
572 /* allocate gpfifo entries to be pushed */
573 *gpfifo = nvgpu_kzalloc(g,
574 sizeof(struct nvgpu_gpfifo) * num_elems);
575 if (!*gpfifo) {
576 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
577 return -ENOMEM;
578 }
579
580 gpfifo_elem = *gpfifo;
581 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
582 struct nvgpu_mem *target_mem;
583
584 /* validate the current entry */
585 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
586 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
587 cmd_elem->target_buf, cde_ctx->num_bufs);
588 return -EINVAL;
589 }
590
591 target_mem = cde_ctx->mem + cmd_elem->target_buf;
592 if (target_mem->size<
593 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
594 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
595 target_mem->size,
596 cmd_elem->target_byte_offset,
597 cmd_elem->num_bytes);
598 return -EINVAL;
599 }
600
601 /* store the element into gpfifo */
602 gpfifo_elem->entry0 =
603 u64_lo32(target_mem->gpu_va +
604 cmd_elem->target_byte_offset);
605 gpfifo_elem->entry1 =
606 u64_hi32(target_mem->gpu_va +
607 cmd_elem->target_byte_offset) |
608 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
609 sizeof(u32));
610 }
611
612 *num_entries = num_elems;
613 return 0;
614}
615
616static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
617{
618 struct nvgpu_os_linux *l = cde_ctx->l;
619 struct gk20a *g = &l->g;
620 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
621 sizeof(struct nvgpu_gpfifo);
622 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
623 sizeof(struct nvgpu_gpfifo);
624 unsigned long total_bytes = init_bytes + conv_bytes;
625 struct nvgpu_gpfifo *combined_cmd;
626
627 /* allocate buffer that has space for both */
628 combined_cmd = nvgpu_kzalloc(g, total_bytes);
629 if (!combined_cmd) {
630 nvgpu_warn(g,
631 "cde: could not allocate memory for gpfifo entries");
632 return -ENOMEM;
633 }
634
635 /* move the original init here and append convert */
636 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
637 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
638 cde_ctx->convert_cmd, conv_bytes);
639
640 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
641 nvgpu_kfree(g, cde_ctx->convert_cmd);
642
643 cde_ctx->init_convert_cmd = combined_cmd;
644 cde_ctx->convert_cmd = combined_cmd
645 + cde_ctx->init_cmd_num_entries;
646
647 return 0;
648}
649
650static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
651 struct nvgpu_firmware *img)
652{
653 struct nvgpu_os_linux *l = cde_ctx->l;
654 struct gk20a *g = &l->g;
655 struct gk20a_cde_app *cde_app = &l->cde_app;
656 u32 *data = (u32 *)img->data;
657 u32 num_of_elems;
658 struct gk20a_cde_hdr_elem *elem;
659 u32 min_size = 0;
660 int err = 0;
661 unsigned int i;
662
663 min_size += 2 * sizeof(u32);
664 if (img->size < min_size) {
665 nvgpu_warn(g, "cde: invalid image header");
666 return -EINVAL;
667 }
668
669 cde_app->firmware_version = data[0];
670 num_of_elems = data[1];
671
672 min_size += num_of_elems * sizeof(*elem);
673 if (img->size < min_size) {
674 nvgpu_warn(g, "cde: bad image");
675 return -EINVAL;
676 }
677
678 elem = (struct gk20a_cde_hdr_elem *)&data[2];
679 for (i = 0; i < num_of_elems; i++) {
680 int err = 0;
681 switch (elem->type) {
682 case TYPE_BUF:
683 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
684 break;
685 case TYPE_REPLACE:
686 err = gk20a_init_cde_replace(cde_ctx, img,
687 &elem->replace);
688 break;
689 case TYPE_PARAM:
690 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
691 break;
692 case TYPE_REQUIRED_CLASS:
693 err = gk20a_init_cde_required_class(cde_ctx, img,
694 elem->required_class);
695 break;
696 case TYPE_COMMAND:
697 {
698 struct gk20a_cde_cmd_elem *cmd = (void *)
699 &img->data[elem->command.data_byte_offset];
700 err = gk20a_init_cde_command(cde_ctx, img,
701 elem->command.op, cmd,
702 elem->command.num_entries);
703 break;
704 }
705 case TYPE_ARRAY:
706 memcpy(&cde_app->arrays[elem->array.id][0],
707 elem->array.data,
708 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
709 break;
710 default:
711 nvgpu_warn(g, "cde: unknown header element");
712 err = -EINVAL;
713 }
714
715 if (err)
716 goto deinit_image;
717
718 elem++;
719 }
720
721 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
722 nvgpu_warn(g, "cde: convert command not defined");
723 err = -EINVAL;
724 goto deinit_image;
725 }
726
727 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
728 nvgpu_warn(g, "cde: convert command not defined");
729 err = -EINVAL;
730 goto deinit_image;
731 }
732
733 err = gk20a_cde_pack_cmdbufs(cde_ctx);
734 if (err)
735 goto deinit_image;
736
737 return 0;
738
739deinit_image:
740 gk20a_deinit_cde_img(cde_ctx);
741 return err;
742}
743
744static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
745 u32 op, struct nvgpu_fence *fence,
746 u32 flags, struct gk20a_fence **fence_out)
747{
748 struct nvgpu_os_linux *l = cde_ctx->l;
749 struct gk20a *g = &l->g;
750 struct nvgpu_gpfifo *gpfifo = NULL;
751 int num_entries = 0;
752
753 /* check command type */
754 if (op == TYPE_BUF_COMMAND_INIT) {
755 /* both init and convert combined */
756 gpfifo = cde_ctx->init_convert_cmd;
757 num_entries = cde_ctx->init_cmd_num_entries
758 + cde_ctx->convert_cmd_num_entries;
759 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
760 gpfifo = cde_ctx->convert_cmd;
761 num_entries = cde_ctx->convert_cmd_num_entries;
762 } else {
763 nvgpu_warn(g, "cde: unknown buffer");
764 return -EINVAL;
765 }
766
767 if (gpfifo == NULL || num_entries == 0) {
768 nvgpu_warn(g, "cde: buffer not available");
769 return -ENOSYS;
770 }
771
772 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
773 num_entries, flags, fence, fence_out, true,
774 NULL);
775}
776
777static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
778__acquires(&cde_app->mutex)
779__releases(&cde_app->mutex)
780{
781 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
782
783 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
784 trace_gk20a_cde_release(cde_ctx);
785
786 nvgpu_mutex_acquire(&cde_app->mutex);
787
788 if (cde_ctx->in_use) {
789 cde_ctx->in_use = false;
790 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
791 cde_app->ctx_usecount--;
792 } else {
793 gk20a_dbg_info("double release cde context %p", cde_ctx);
794 }
795
796 nvgpu_mutex_release(&cde_app->mutex);
797}
798
799static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
800__acquires(&cde_app->mutex)
801__releases(&cde_app->mutex)
802{
803 struct delayed_work *delay_work = to_delayed_work(work);
804 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
805 struct gk20a_cde_ctx, ctx_deleter_work);
806 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
807 struct nvgpu_os_linux *l = cde_ctx->l;
808 struct gk20a *g = &l->g;
809 int err;
810
811 /* someone has just taken it? engine deletion started? */
812 if (cde_ctx->in_use || !cde_app->initialised)
813 return;
814
815 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
816 "cde: attempting to delete temporary %p", cde_ctx);
817
818 err = gk20a_busy(g);
819 if (err) {
820 /* this context would find new use anyway later, so not freeing
821 * here does not leak anything */
822 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
823 " temp ctx deletion");
824 return;
825 }
826
827 nvgpu_mutex_acquire(&cde_app->mutex);
828 if (cde_ctx->in_use || !cde_app->initialised) {
829 gk20a_dbg(gpu_dbg_cde_ctx,
830 "cde: context use raced, not deleting %p",
831 cde_ctx);
832 goto out;
833 }
834
835 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
836 "double pending %p", cde_ctx);
837
838 gk20a_cde_remove_ctx(cde_ctx);
839 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
840 "cde: destroyed %p count=%d use=%d max=%d",
841 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
842 cde_app->ctx_count_top);
843
844out:
845 nvgpu_mutex_release(&cde_app->mutex);
846 gk20a_idle(g);
847}
848
849static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
850__must_hold(&cde_app->mutex)
851{
852 struct gk20a *g = &l->g;
853 struct gk20a_cde_app *cde_app = &l->cde_app;
854 struct gk20a_cde_ctx *cde_ctx;
855
856 /* exhausted? */
857
858 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
859 return ERR_PTR(-EAGAIN);
860
861 /* idle context available? */
862
863 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
864 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
865 gk20a_cde_ctx, list);
866 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
867 "cde: got free %p count=%d use=%d max=%d",
868 cde_ctx, cde_app->ctx_count,
869 cde_app->ctx_usecount,
870 cde_app->ctx_count_top);
871 trace_gk20a_cde_get_context(cde_ctx);
872
873 /* deleter work may be scheduled, but in_use prevents it */
874 cde_ctx->in_use = true;
875 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
876 cde_app->ctx_usecount++;
877
878 /* cancel any deletions now that ctx is in use */
879 gk20a_cde_cancel_deleter(cde_ctx, true);
880 return cde_ctx;
881 }
882
883 /* no free contexts, get a temporary one */
884
885 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
886 "cde: no free contexts, count=%d",
887 cde_app->ctx_count);
888
889 cde_ctx = gk20a_cde_allocate_context(l);
890 if (IS_ERR(cde_ctx)) {
891 nvgpu_warn(g, "cde: cannot allocate context: %ld",
892 PTR_ERR(cde_ctx));
893 return cde_ctx;
894 }
895
896 trace_gk20a_cde_get_context(cde_ctx);
897 cde_ctx->in_use = true;
898 cde_ctx->is_temporary = true;
899 cde_app->ctx_usecount++;
900 cde_app->ctx_count++;
901 if (cde_app->ctx_count > cde_app->ctx_count_top)
902 cde_app->ctx_count_top = cde_app->ctx_count;
903 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
904
905 return cde_ctx;
906}
907
908static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
909__releases(&cde_app->mutex)
910__acquires(&cde_app->mutex)
911{
912 struct gk20a *g = &l->g;
913 struct gk20a_cde_app *cde_app = &l->cde_app;
914 struct gk20a_cde_ctx *cde_ctx = NULL;
915 struct nvgpu_timeout timeout;
916
917 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
918 NVGPU_TIMER_CPU_TIMER);
919
920 do {
921 cde_ctx = gk20a_cde_do_get_context(l);
922 if (PTR_ERR(cde_ctx) != -EAGAIN)
923 break;
924
925 /* exhausted, retry */
926 nvgpu_mutex_release(&cde_app->mutex);
927 cond_resched();
928 nvgpu_mutex_acquire(&cde_app->mutex);
929 } while (!nvgpu_timeout_expired(&timeout));
930
931 return cde_ctx;
932}
933
934static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
935{
936 struct gk20a *g = &l->g;
937 struct gk20a_cde_ctx *cde_ctx;
938 int ret;
939
940 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
941 if (!cde_ctx)
942 return ERR_PTR(-ENOMEM);
943
944 cde_ctx->l = l;
945 cde_ctx->dev = dev_from_gk20a(g);
946
947 ret = gk20a_cde_load(cde_ctx);
948 if (ret) {
949 nvgpu_kfree(g, cde_ctx);
950 return ERR_PTR(ret);
951 }
952
953 nvgpu_init_list_node(&cde_ctx->list);
954 cde_ctx->is_temporary = false;
955 cde_ctx->in_use = false;
956 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
957 gk20a_cde_ctx_deleter_fn);
958
959 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
960 trace_gk20a_cde_allocate_context(cde_ctx);
961 return cde_ctx;
962}
963
964int gk20a_cde_convert(struct nvgpu_os_linux *l,
965 struct dma_buf *compbits_scatter_buf,
966 u64 compbits_byte_offset,
967 u64 scatterbuffer_byte_offset,
968 struct nvgpu_fence *fence,
969 u32 __flags, struct gk20a_cde_param *params,
970 int num_params, struct gk20a_fence **fence_out)
971__acquires(&l->cde_app->mutex)
972__releases(&l->cde_app->mutex)
973{
974 struct gk20a *g = &l->g;
975 struct gk20a_cde_ctx *cde_ctx = NULL;
976 struct gk20a_comptags comptags;
977 struct nvgpu_os_buffer os_buf = {
978 compbits_scatter_buf,
979 dev_from_gk20a(g)
980 };
981 u64 mapped_compbits_offset = 0;
982 u64 compbits_size = 0;
983 u64 mapped_scatterbuffer_offset = 0;
984 u64 scatterbuffer_size = 0;
985 u64 map_vaddr = 0;
986 u64 map_offset = 0;
987 u64 map_size = 0;
988 u8 *surface = NULL;
989 u64 big_page_mask = 0;
990 u32 flags;
991 int err, i;
992 const s16 compbits_kind = 0;
993
994 gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
995 compbits_byte_offset, scatterbuffer_byte_offset);
996
997 /* scatter buffer must be after compbits buffer */
998 if (scatterbuffer_byte_offset &&
999 scatterbuffer_byte_offset < compbits_byte_offset)
1000 return -EINVAL;
1001
1002 err = gk20a_busy(g);
1003 if (err)
1004 return err;
1005
1006 nvgpu_mutex_acquire(&l->cde_app.mutex);
1007 cde_ctx = gk20a_cde_get_context(l);
1008 nvgpu_mutex_release(&l->cde_app.mutex);
1009 if (IS_ERR(cde_ctx)) {
1010 err = PTR_ERR(cde_ctx);
1011 goto exit_idle;
1012 }
1013
1014 /* First, map the buffer to local va */
1015
1016 /* ensure that the compbits buffer has drvdata */
1017 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
1018 dev_from_gk20a(g));
1019 if (err)
1020 goto exit_idle;
1021
1022 /* compbits don't start at page aligned offset, so we need to align
1023 the region to be mapped */
1024 big_page_mask = cde_ctx->vm->big_page_size - 1;
1025 map_offset = compbits_byte_offset & ~big_page_mask;
1026 map_size = compbits_scatter_buf->size - map_offset;
1027
1028
1029 /* compute compbit start offset from the beginning of the mapped
1030 area */
1031 mapped_compbits_offset = compbits_byte_offset - map_offset;
1032 if (scatterbuffer_byte_offset) {
1033 compbits_size = scatterbuffer_byte_offset -
1034 compbits_byte_offset;
1035 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1036 map_offset;
1037 scatterbuffer_size = compbits_scatter_buf->size -
1038 scatterbuffer_byte_offset;
1039 } else {
1040 compbits_size = compbits_scatter_buf->size -
1041 compbits_byte_offset;
1042 }
1043
1044 gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1045 map_offset, map_size);
1046 gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1047 mapped_compbits_offset, compbits_size);
1048 gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1049 mapped_scatterbuffer_offset, scatterbuffer_size);
1050
1051
1052 /* map the destination buffer */
1053 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
1054 err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
1055 NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE |
1056 NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL,
1057 NV_KIND_INVALID,
1058 compbits_kind, /* incompressible kind */
1059 gk20a_mem_flag_none,
1060 map_offset, map_size,
1061 NULL,
1062 &map_vaddr);
1063 if (err) {
1064 dma_buf_put(compbits_scatter_buf);
1065 err = -EINVAL;
1066 goto exit_idle;
1067 }
1068
1069 if (scatterbuffer_byte_offset &&
1070 l->ops.cde.need_scatter_buffer &&
1071 l->ops.cde.need_scatter_buffer(g)) {
1072 struct sg_table *sgt;
1073 void *scatter_buffer;
1074
1075 surface = dma_buf_vmap(compbits_scatter_buf);
1076 if (IS_ERR(surface)) {
1077 nvgpu_warn(g,
1078 "dma_buf_vmap failed");
1079 err = -EINVAL;
1080 goto exit_unmap_vaddr;
1081 }
1082
1083 scatter_buffer = surface + scatterbuffer_byte_offset;
1084
1085 gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1086 surface, scatter_buffer);
1087 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf);
1088 if (IS_ERR(sgt)) {
1089 nvgpu_warn(g,
1090 "mm_pin failed");
1091 err = -EINVAL;
1092 goto exit_unmap_surface;
1093 } else {
1094 err = l->ops.cde.populate_scatter_buffer(g, sgt,
1095 compbits_byte_offset, scatter_buffer,
1096 scatterbuffer_size);
1097 WARN_ON(err);
1098
1099 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1100 sgt);
1101 if (err)
1102 goto exit_unmap_surface;
1103 }
1104
1105 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1106 dma_buf_vunmap(compbits_scatter_buf, surface);
1107 surface = NULL;
1108 }
1109
1110 /* store source buffer compression tags */
1111 gk20a_get_comptags(&os_buf, &comptags);
1112 cde_ctx->surf_param_offset = comptags.offset;
1113 cde_ctx->surf_param_lines = comptags.lines;
1114
1115 /* store surface vaddr. This is actually compbit vaddr, but since
1116 compbits live in the same surface, and we can get the alloc base
1117 address by using gpuva_to_iova_base, this will do */
1118 cde_ctx->surf_vaddr = map_vaddr;
1119
1120 /* store information about destination */
1121 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1122 cde_ctx->compbit_size = compbits_size;
1123
1124 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1125 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1126
1127 /* remove existing argument data */
1128 memset(cde_ctx->user_param_values, 0,
1129 sizeof(cde_ctx->user_param_values));
1130
1131 /* read user space arguments for the conversion */
1132 for (i = 0; i < num_params; i++) {
1133 struct gk20a_cde_param *param = params + i;
1134 int id = param->id - NUM_RESERVED_PARAMS;
1135
1136 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1137 nvgpu_warn(g, "cde: unknown user parameter");
1138 err = -EINVAL;
1139 goto exit_unmap_surface;
1140 }
1141 cde_ctx->user_param_values[id] = param->value;
1142 }
1143
1144 /* patch data */
1145 err = gk20a_cde_patch_params(cde_ctx);
1146 if (err) {
1147 nvgpu_warn(g, "cde: failed to patch parameters");
1148 goto exit_unmap_surface;
1149 }
1150
1151 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1152 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1153 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1154 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1155 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1156 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1157
1158 /* take always the postfence as it is needed for protecting the
1159 * cde context */
1160 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
1161
1162 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1163 gk20a_idle(g);
1164
1165 /* execute the conversion buffer, combined with init first if it's the
1166 * first time */
1167 err = gk20a_cde_execute_buffer(cde_ctx,
1168 cde_ctx->init_cmd_executed
1169 ? TYPE_BUF_COMMAND_CONVERT
1170 : TYPE_BUF_COMMAND_INIT,
1171 fence, flags, fence_out);
1172
1173 cde_ctx->init_cmd_executed = true;
1174
1175 /* unmap the buffers - channel holds references to them now */
1176 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1177
1178 return err;
1179
1180exit_unmap_surface:
1181 if (surface)
1182 dma_buf_vunmap(compbits_scatter_buf, surface);
1183exit_unmap_vaddr:
1184 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1185exit_idle:
1186 gk20a_idle(g);
1187 return err;
1188}
1189
1190static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1191__acquires(&cde_app->mutex)
1192__releases(&cde_app->mutex)
1193{
1194 struct gk20a_cde_ctx *cde_ctx = data;
1195 struct nvgpu_os_linux *l = cde_ctx->l;
1196 struct gk20a *g = &l->g;
1197 struct gk20a_cde_app *cde_app = &l->cde_app;
1198 bool channel_idle;
1199
1200 channel_gk20a_joblist_lock(ch);
1201 channel_idle = channel_gk20a_joblist_is_empty(ch);
1202 channel_gk20a_joblist_unlock(ch);
1203
1204 if (!channel_idle)
1205 return;
1206
1207 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1208 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1209 if (!cde_ctx->in_use)
1210 gk20a_dbg_info("double finish cde context %p on channel %p",
1211 cde_ctx, ch);
1212
1213 if (ch->has_timedout) {
1214 if (cde_ctx->is_temporary) {
1215 nvgpu_warn(g,
1216 "cde: channel had timed out"
1217 " (temporary channel)");
1218 /* going to be deleted anyway */
1219 } else {
1220 nvgpu_warn(g,
1221 "cde: channel had timed out"
1222 ", reloading");
1223 /* mark it to be deleted, replace with a new one */
1224 nvgpu_mutex_acquire(&cde_app->mutex);
1225 cde_ctx->is_temporary = true;
1226 if (gk20a_cde_create_context(l)) {
1227 nvgpu_err(g, "cde: can't replace context");
1228 }
1229 nvgpu_mutex_release(&cde_app->mutex);
1230 }
1231 }
1232
1233 /* delete temporary contexts later (watch for doubles) */
1234 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1235 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1236 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1237 msecs_to_jiffies(CTX_DELETE_TIME));
1238 }
1239
1240 if (!ch->has_timedout)
1241 gk20a_cde_ctx_release(cde_ctx);
1242}
1243
1244static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1245{
1246 struct nvgpu_os_linux *l = cde_ctx->l;
1247 struct gk20a *g = &l->g;
1248 struct nvgpu_firmware *img;
1249 struct channel_gk20a *ch;
1250 struct gr_gk20a *gr = &g->gr;
1251 int err = 0;
1252 u64 vaddr;
1253
1254 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1255 if (!img) {
1256 nvgpu_err(g, "cde: could not fetch the firmware");
1257 return -ENOSYS;
1258 }
1259
1260 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1261 cde_ctx,
1262 -1,
1263 false);
1264 if (!ch) {
1265 nvgpu_warn(g, "cde: gk20a channel not available");
1266 err = -ENOMEM;
1267 goto err_get_gk20a_channel;
1268 }
1269
1270 /* bind the channel to the vm */
1271 err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch);
1272 if (err) {
1273 nvgpu_warn(g, "cde: could not bind vm");
1274 goto err_commit_va;
1275 }
1276
1277 /* allocate gpfifo (1024 should be more than enough) */
1278 err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0);
1279 if (err) {
1280 nvgpu_warn(g, "cde: unable to allocate gpfifo");
1281 goto err_alloc_gpfifo;
1282 }
1283
1284 /* map backing store to gpu virtual space */
1285 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1286 g->gr.compbit_store.mem.size,
1287 NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE,
1288 gk20a_mem_flag_read_only,
1289 false,
1290 gr->compbit_store.mem.aperture);
1291
1292 if (!vaddr) {
1293 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1294 err = -ENOMEM;
1295 goto err_map_backingstore;
1296 }
1297
1298 /* store initialisation data */
1299 cde_ctx->ch = ch;
1300 cde_ctx->vm = ch->vm;
1301 cde_ctx->backing_store_vaddr = vaddr;
1302
1303 /* initialise the firmware */
1304 err = gk20a_init_cde_img(cde_ctx, img);
1305 if (err) {
1306 nvgpu_warn(g, "cde: image initialisation failed");
1307 goto err_init_cde_img;
1308 }
1309
1310 /* initialisation done */
1311 nvgpu_release_firmware(g, img);
1312
1313 return 0;
1314
1315err_init_cde_img:
1316 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1317err_map_backingstore:
1318err_alloc_gpfifo:
1319 nvgpu_vm_put(ch->vm);
1320err_commit_va:
1321err_get_gk20a_channel:
1322 nvgpu_release_firmware(g, img);
1323 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1324 return err;
1325}
1326
1327int gk20a_cde_reload(struct nvgpu_os_linux *l)
1328__acquires(&l->cde_app->mutex)
1329__releases(&l->cde_app->mutex)
1330{
1331 struct gk20a *g = &l->g;
1332 struct gk20a_cde_app *cde_app = &l->cde_app;
1333 int err;
1334
1335 if (!cde_app->initialised)
1336 return -ENOSYS;
1337
1338 err = gk20a_busy(g);
1339 if (err)
1340 return err;
1341
1342 nvgpu_mutex_acquire(&cde_app->mutex);
1343
1344 gk20a_cde_stop(l);
1345
1346 err = gk20a_cde_create_contexts(l);
1347 if (!err)
1348 cde_app->initialised = true;
1349
1350 nvgpu_mutex_release(&cde_app->mutex);
1351
1352 gk20a_idle(g);
1353 return err;
1354}
1355
1356int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1357__acquires(&cde_app->mutex)
1358__releases(&cde_app->mutex)
1359{
1360 struct gk20a_cde_app *cde_app = &l->cde_app;
1361 int err;
1362
1363 if (cde_app->initialised)
1364 return 0;
1365
1366 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1367
1368 err = nvgpu_mutex_init(&cde_app->mutex);
1369 if (err)
1370 return err;
1371
1372 nvgpu_mutex_acquire(&cde_app->mutex);
1373
1374 nvgpu_init_list_node(&cde_app->free_contexts);
1375 nvgpu_init_list_node(&cde_app->used_contexts);
1376 cde_app->ctx_count = 0;
1377 cde_app->ctx_count_top = 0;
1378 cde_app->ctx_usecount = 0;
1379
1380 err = gk20a_cde_create_contexts(l);
1381 if (!err)
1382 cde_app->initialised = true;
1383
1384 nvgpu_mutex_release(&cde_app->mutex);
1385 gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1386
1387 if (err)
1388 nvgpu_mutex_destroy(&cde_app->mutex);
1389
1390 return err;
1391}
1392
1393enum cde_launch_patch_id {
1394 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1395 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1396 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1397 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1398 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1399 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1400 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1401 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1402 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1403 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1404 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1405 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1406 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1407 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1408 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1409 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1410 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1411 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1412 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1413 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1414 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1415 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1416 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1417 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1418 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1419 PATCH_H_LAUNCH_WORD1_ID = 1049,
1420 PATCH_H_LAUNCH_WORD2_ID = 1050,
1421 PATCH_V_LAUNCH_WORD1_ID = 1051,
1422 PATCH_V_LAUNCH_WORD2_ID = 1052,
1423 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1424 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1425 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1426 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1427};
1428
1429/* maximum number of WRITE_PATCHes in the below function */
1430#define MAX_CDE_LAUNCH_PATCHES 32
1431
1432static int gk20a_buffer_convert_gpu_to_cde_v1(
1433 struct nvgpu_os_linux *l,
1434 struct dma_buf *dmabuf, u32 consumer,
1435 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1436 u64 scatterbuffer_offset,
1437 u32 width, u32 height, u32 block_height_log2,
1438 u32 submit_flags, struct nvgpu_fence *fence_in,
1439 struct gk20a_buffer_state *state)
1440{
1441 struct gk20a *g = &l->g;
1442 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1443 int param = 0;
1444 int err = 0;
1445 struct gk20a_fence *new_fence = NULL;
1446 const int wgx = 8;
1447 const int wgy = 8;
1448 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1449 const int xalign = compbits_per_byte * wgx;
1450 const int yalign = wgy;
1451
1452 /* Compute per launch parameters */
1453 const int xtiles = (width + 7) >> 3;
1454 const int ytiles = (height + 7) >> 3;
1455 const int gridw_h = roundup(xtiles, xalign) / xalign;
1456 const int gridh_h = roundup(ytiles, yalign) / yalign;
1457 const int gridw_v = roundup(ytiles, xalign) / xalign;
1458 const int gridh_v = roundup(xtiles, yalign) / yalign;
1459 const int xblocks = (xtiles + 1) >> 1;
1460 const int voffset = compbits_voffset - compbits_hoffset;
1461
1462 int hprog = -1;
1463 int vprog = -1;
1464
1465 if (l->ops.cde.get_program_numbers)
1466 l->ops.cde.get_program_numbers(g, block_height_log2,
1467 l->cde_app.shader_parameter,
1468 &hprog, &vprog);
1469 else {
1470 nvgpu_warn(g, "cde: chip not supported");
1471 return -ENOSYS;
1472 }
1473
1474 if (hprog < 0 || vprog < 0) {
1475 nvgpu_warn(g, "cde: could not determine programs");
1476 return -ENOSYS;
1477 }
1478
1479 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1480 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1481 xtiles, ytiles);
1482
1483 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1484 width, height, block_height_log2,
1485 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1486 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1487 width, height, xtiles, ytiles);
1488 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1489 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1490 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1491 hprog,
1492 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1493 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1494 vprog,
1495 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1496 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1497
1498 /* Write parameters */
1499#define WRITE_PATCH(NAME, VALUE) \
1500 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1501 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1502 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1503 block_height_log2);
1504 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1505 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1506 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1507 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1508 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1509
1510 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1511 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1512 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1513 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1514 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1515 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1516
1517 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1518 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1519 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1520 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1521 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1522 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1523
1524 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1525 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1526 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1527 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1528 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1529 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1530 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1531 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1532
1533 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1534 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1535 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1536 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1537 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1538 } else {
1539 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1540 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1541 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1542 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1543 }
1544
1545 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1546 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1547 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1548 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1549 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1550 } else {
1551 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1552 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1553 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1554 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1555 }
1556#undef WRITE_PATCH
1557
1558 err = gk20a_cde_convert(l, dmabuf,
1559 compbits_hoffset,
1560 scatterbuffer_offset,
1561 fence_in, submit_flags,
1562 params, param, &new_fence);
1563 if (err)
1564 goto out;
1565
1566 /* compbits generated, update state & fence */
1567 gk20a_fence_put(state->fence);
1568 state->fence = new_fence;
1569 state->valid_compbits |= consumer &
1570 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1571out:
1572 return err;
1573}
1574
1575static int gk20a_buffer_convert_gpu_to_cde(
1576 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1577 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1578 u64 scatterbuffer_offset,
1579 u32 width, u32 height, u32 block_height_log2,
1580 u32 submit_flags, struct nvgpu_fence *fence_in,
1581 struct gk20a_buffer_state *state)
1582{
1583 struct gk20a *g = &l->g;
1584 int err = 0;
1585
1586 if (!l->cde_app.initialised)
1587 return -ENOSYS;
1588
1589 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1590 l->cde_app.firmware_version);
1591
1592 if (l->cde_app.firmware_version == 1) {
1593 err = gk20a_buffer_convert_gpu_to_cde_v1(
1594 l, dmabuf, consumer, offset, compbits_hoffset,
1595 compbits_voffset, scatterbuffer_offset,
1596 width, height, block_height_log2,
1597 submit_flags, fence_in, state);
1598 } else {
1599 nvgpu_err(g, "unsupported CDE firmware version %d",
1600 l->cde_app.firmware_version);
1601 err = -EINVAL;
1602 }
1603
1604 return err;
1605}
1606
1607int gk20a_prepare_compressible_read(
1608 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1609 u64 compbits_hoffset, u64 compbits_voffset,
1610 u64 scatterbuffer_offset,
1611 u32 width, u32 height, u32 block_height_log2,
1612 u32 submit_flags, struct nvgpu_fence *fence,
1613 u32 *valid_compbits, u32 *zbc_color,
1614 struct gk20a_fence **fence_out)
1615{
1616 struct gk20a *g = &l->g;
1617 int err = 0;
1618 struct gk20a_buffer_state *state;
1619 struct dma_buf *dmabuf;
1620 u32 missing_bits;
1621
1622 dmabuf = dma_buf_get(buffer_fd);
1623 if (IS_ERR(dmabuf))
1624 return -EINVAL;
1625
1626 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1627 if (err) {
1628 dma_buf_put(dmabuf);
1629 return err;
1630 }
1631
1632 missing_bits = (state->valid_compbits ^ request) & request;
1633
1634 nvgpu_mutex_acquire(&state->lock);
1635
1636 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1637
1638 gk20a_fence_put(state->fence);
1639 state->fence = NULL;
1640 /* state->fence = decompress();
1641 state->valid_compbits = 0; */
1642 err = -EINVAL;
1643 goto out;
1644 } else if (missing_bits) {
1645 u32 missing_cde_bits = missing_bits &
1646 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1647 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1648 missing_cde_bits) {
1649 err = gk20a_buffer_convert_gpu_to_cde(
1650 l, dmabuf,
1651 missing_cde_bits,
1652 offset, compbits_hoffset,
1653 compbits_voffset, scatterbuffer_offset,
1654 width, height, block_height_log2,
1655 submit_flags, fence,
1656 state);
1657 if (err)
1658 goto out;
1659 }
1660 }
1661
1662 if (state->fence && fence_out)
1663 *fence_out = gk20a_fence_get(state->fence);
1664
1665 if (valid_compbits)
1666 *valid_compbits = state->valid_compbits;
1667
1668 if (zbc_color)
1669 *zbc_color = state->zbc_color;
1670
1671out:
1672 nvgpu_mutex_release(&state->lock);
1673 dma_buf_put(dmabuf);
1674 return err;
1675}
1676
1677int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1678 u32 valid_compbits, u64 offset, u32 zbc_color)
1679{
1680 int err;
1681 struct gk20a_buffer_state *state;
1682 struct dma_buf *dmabuf;
1683
1684 dmabuf = dma_buf_get(buffer_fd);
1685 if (IS_ERR(dmabuf)) {
1686 nvgpu_err(g, "invalid dmabuf");
1687 return -EINVAL;
1688 }
1689
1690 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1691 if (err) {
1692 nvgpu_err(g, "could not get state from dmabuf");
1693 dma_buf_put(dmabuf);
1694 return err;
1695 }
1696
1697 nvgpu_mutex_acquire(&state->lock);
1698
1699 /* Update the compbits state. */
1700 state->valid_compbits = valid_compbits;
1701 state->zbc_color = zbc_color;
1702
1703 /* Discard previous compbit job fence. */
1704 gk20a_fence_put(state->fence);
1705 state->fence = NULL;
1706
1707 nvgpu_mutex_release(&state->lock);
1708 dma_buf_put(dmabuf);
1709 return 0;
1710}