aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/cde.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/cde.c')
-rw-r--r--include/os/linux/cde.c1794
1 files changed, 0 insertions, 1794 deletions
diff --git a/include/os/linux/cde.c b/include/os/linux/cde.c
deleted file mode 100644
index 715513c..0000000
--- a/include/os/linux/cde.c
+++ /dev/null
@@ -1,1794 +0,0 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <trace/events/gk20a.h>
25
26#include <nvgpu/dma.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/kmem.h>
31#include <nvgpu/log.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/firmware.h>
34#include <nvgpu/os_sched.h>
35#include <nvgpu/channel.h>
36#include <nvgpu/utils.h>
37#include <nvgpu/gk20a.h>
38
39#include <nvgpu/linux/vm.h>
40
41#include "gk20a/mm_gk20a.h"
42#include "gk20a/fence_gk20a.h"
43#include "gk20a/gr_gk20a.h"
44
45#include "cde.h"
46#include "os_linux.h"
47#include "dmabuf.h"
48#include "channel.h"
49#include "cde_gm20b.h"
50#include "cde_gp10b.h"
51
52#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
53#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
54
55static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
56static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
57
58#define CTX_DELETE_TIME 1000
59
60#define MAX_CTX_USE_COUNT 42
61#define MAX_CTX_RETRY_TIME 2000
62
63static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
64{
65 struct nvgpu_mapped_buf *buffer;
66 dma_addr_t addr = 0;
67 struct gk20a *g = gk20a_from_vm(vm);
68
69 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
70 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
71 if (buffer)
72 addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
73 nvgpu_mutex_release(&vm->update_gmmu_lock);
74
75 return addr;
76}
77
78static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
79{
80 unsigned int i;
81
82 for (i = 0; i < cde_ctx->num_bufs; i++) {
83 struct nvgpu_mem *mem = cde_ctx->mem + i;
84 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
85 }
86
87 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
88
89 cde_ctx->convert_cmd = NULL;
90 cde_ctx->init_convert_cmd = NULL;
91 cde_ctx->num_bufs = 0;
92 cde_ctx->num_params = 0;
93 cde_ctx->init_cmd_num_entries = 0;
94 cde_ctx->convert_cmd_num_entries = 0;
95 cde_ctx->init_cmd_executed = false;
96}
97
98static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
99__must_hold(&cde_app->mutex)
100{
101 struct nvgpu_os_linux *l = cde_ctx->l;
102 struct gk20a *g = &l->g;
103 struct channel_gk20a *ch = cde_ctx->ch;
104 struct vm_gk20a *vm = ch->vm;
105
106 trace_gk20a_cde_remove_ctx(cde_ctx);
107
108 /* release mapped memory */
109 gk20a_deinit_cde_img(cde_ctx);
110 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
111 cde_ctx->backing_store_vaddr);
112
113 /*
114 * free the channel
115 * gk20a_channel_close() will also unbind the channel from TSG
116 */
117 gk20a_channel_close(ch);
118 nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release);
119
120 /* housekeeping on app */
121 nvgpu_list_del(&cde_ctx->list);
122 l->cde_app.ctx_count--;
123 nvgpu_kfree(g, cde_ctx);
124}
125
126static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
127 bool wait_finish)
128__releases(&cde_app->mutex)
129__acquires(&cde_app->mutex)
130{
131 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
132
133 /* permanent contexts do not have deleter works */
134 if (!cde_ctx->is_temporary)
135 return;
136
137 if (wait_finish) {
138 nvgpu_mutex_release(&cde_app->mutex);
139 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
140 nvgpu_mutex_acquire(&cde_app->mutex);
141 } else {
142 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
143 }
144}
145
146static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
147__must_hold(&l->cde_app->mutex)
148{
149 struct gk20a_cde_app *cde_app = &l->cde_app;
150 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
151
152 /* safe to go off the mutex in cancel_deleter since app is
153 * deinitialised; no new jobs are started. deleter works may be only at
154 * waiting for the mutex or before, going to abort */
155
156 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
157 &cde_app->free_contexts, gk20a_cde_ctx, list) {
158 gk20a_cde_cancel_deleter(cde_ctx, true);
159 gk20a_cde_remove_ctx(cde_ctx);
160 }
161
162 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
163 &cde_app->used_contexts, gk20a_cde_ctx, list) {
164 gk20a_cde_cancel_deleter(cde_ctx, true);
165 gk20a_cde_remove_ctx(cde_ctx);
166 }
167}
168
169static void gk20a_cde_stop(struct nvgpu_os_linux *l)
170__must_hold(&l->cde_app->mutex)
171{
172 struct gk20a_cde_app *cde_app = &l->cde_app;
173
174 /* prevent further conversions and delayed works from working */
175 cde_app->initialised = false;
176 /* free all data, empty the list */
177 gk20a_cde_remove_contexts(l);
178}
179
180void gk20a_cde_destroy(struct nvgpu_os_linux *l)
181__acquires(&l->cde_app->mutex)
182__releases(&l->cde_app->mutex)
183{
184 struct gk20a_cde_app *cde_app = &l->cde_app;
185
186 if (!cde_app->initialised)
187 return;
188
189 nvgpu_mutex_acquire(&cde_app->mutex);
190 gk20a_cde_stop(l);
191 nvgpu_mutex_release(&cde_app->mutex);
192
193 nvgpu_mutex_destroy(&cde_app->mutex);
194}
195
196void gk20a_cde_suspend(struct nvgpu_os_linux *l)
197__acquires(&l->cde_app->mutex)
198__releases(&l->cde_app->mutex)
199{
200 struct gk20a_cde_app *cde_app = &l->cde_app;
201 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
202
203 if (!cde_app->initialised)
204 return;
205
206 nvgpu_mutex_acquire(&cde_app->mutex);
207
208 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
209 &cde_app->free_contexts, gk20a_cde_ctx, list) {
210 gk20a_cde_cancel_deleter(cde_ctx, false);
211 }
212
213 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
214 &cde_app->used_contexts, gk20a_cde_ctx, list) {
215 gk20a_cde_cancel_deleter(cde_ctx, false);
216 }
217
218 nvgpu_mutex_release(&cde_app->mutex);
219
220}
221
222static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
223__must_hold(&l->cde_app->mutex)
224{
225 struct gk20a_cde_app *cde_app = &l->cde_app;
226 struct gk20a_cde_ctx *cde_ctx;
227
228 cde_ctx = gk20a_cde_allocate_context(l);
229 if (IS_ERR(cde_ctx))
230 return PTR_ERR(cde_ctx);
231
232 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
233 cde_app->ctx_count++;
234 if (cde_app->ctx_count > cde_app->ctx_count_top)
235 cde_app->ctx_count_top = cde_app->ctx_count;
236
237 return 0;
238}
239
240static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
241__must_hold(&l->cde_app->mutex)
242{
243 int err;
244 int i;
245
246 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
247 err = gk20a_cde_create_context(l);
248 if (err)
249 goto out;
250 }
251
252 return 0;
253out:
254 gk20a_cde_remove_contexts(l);
255 return err;
256}
257
258static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
259 struct nvgpu_firmware *img,
260 struct gk20a_cde_hdr_buf *buf)
261{
262 struct nvgpu_mem *mem;
263 struct nvgpu_os_linux *l = cde_ctx->l;
264 struct gk20a *g = &l->g;
265 int err;
266
267 /* check that the file can hold the buf */
268 if (buf->data_byte_offset != 0 &&
269 buf->data_byte_offset + buf->num_bytes > img->size) {
270 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
271 cde_ctx->num_bufs);
272 return -EINVAL;
273 }
274
275 /* check that we have enough buf elems available */
276 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
277 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
278 cde_ctx->num_bufs);
279 return -ENOMEM;
280 }
281
282 /* allocate buf */
283 mem = cde_ctx->mem + cde_ctx->num_bufs;
284 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
285 if (err) {
286 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
287 cde_ctx->num_bufs);
288 return -ENOMEM;
289 }
290
291 /* copy the content */
292 if (buf->data_byte_offset != 0)
293 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
294 buf->num_bytes);
295
296 cde_ctx->num_bufs++;
297
298 return 0;
299}
300
301static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
302 int type, s32 shift, u64 mask, u64 value)
303{
304 struct nvgpu_os_linux *l = cde_ctx->l;
305 struct gk20a *g = &l->g;
306 u32 *target_mem_ptr = target;
307 u64 *target_mem_ptr_u64 = target;
308 u64 current_value, new_value;
309
310 value = (shift >= 0) ? value << shift : value >> -shift;
311 value &= mask;
312
313 /* read current data from the location */
314 current_value = 0;
315 if (type == TYPE_PARAM_TYPE_U32) {
316 if (mask != 0xfffffffful)
317 current_value = *target_mem_ptr;
318 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
319 if (mask != ~0ul)
320 current_value = *target_mem_ptr_u64;
321 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
322 current_value = *target_mem_ptr_u64;
323 current_value = (u64)(current_value >> 32) |
324 (u64)(current_value << 32);
325 } else {
326 nvgpu_warn(g, "cde: unknown type. type=%d",
327 type);
328 return -EINVAL;
329 }
330
331 current_value &= ~mask;
332 new_value = current_value | value;
333
334 /* store the element data back */
335 if (type == TYPE_PARAM_TYPE_U32)
336 *target_mem_ptr = (u32)new_value;
337 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
338 *target_mem_ptr_u64 = new_value;
339 else {
340 new_value = (u64)(new_value >> 32) |
341 (u64)(new_value << 32);
342 *target_mem_ptr_u64 = new_value;
343 }
344
345 return 0;
346}
347
348static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
349 struct nvgpu_firmware *img,
350 struct gk20a_cde_hdr_replace *replace)
351{
352 struct nvgpu_mem *source_mem;
353 struct nvgpu_mem *target_mem;
354 struct nvgpu_os_linux *l = cde_ctx->l;
355 struct gk20a *g = &l->g;
356 u32 *target_mem_ptr;
357 u64 vaddr;
358 int err;
359
360 if (replace->target_buf >= cde_ctx->num_bufs ||
361 replace->source_buf >= cde_ctx->num_bufs) {
362 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
363 replace->target_buf, replace->source_buf,
364 cde_ctx->num_bufs);
365 return -EINVAL;
366 }
367
368 source_mem = cde_ctx->mem + replace->source_buf;
369 target_mem = cde_ctx->mem + replace->target_buf;
370 target_mem_ptr = target_mem->cpu_va;
371
372 if (source_mem->size < (replace->source_byte_offset + 3) ||
373 target_mem->size < (replace->target_byte_offset + 3)) {
374 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
375 replace->target_byte_offset,
376 replace->source_byte_offset,
377 source_mem->size,
378 target_mem->size);
379 return -EINVAL;
380 }
381
382 /* calculate the target pointer */
383 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
384
385 /* determine patch value */
386 vaddr = source_mem->gpu_va + replace->source_byte_offset;
387 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
388 replace->shift, replace->mask,
389 vaddr);
390 if (err) {
391 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
392 err, replace->target_buf,
393 replace->target_byte_offset,
394 replace->source_buf,
395 replace->source_byte_offset);
396 }
397
398 return err;
399}
400
401static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
402{
403 struct nvgpu_os_linux *l = cde_ctx->l;
404 struct gk20a *g = &l->g;
405 struct nvgpu_mem *target_mem;
406 u32 *target_mem_ptr;
407 u64 new_data;
408 int user_id = 0, err;
409 unsigned int i;
410
411 for (i = 0; i < cde_ctx->num_params; i++) {
412 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
413 target_mem = cde_ctx->mem + param->target_buf;
414 target_mem_ptr = target_mem->cpu_va;
415 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
416
417 switch (param->id) {
418 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
419 new_data = g->gr.comptags_per_cacheline;
420 break;
421 case TYPE_PARAM_GPU_CONFIGURATION:
422 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
423 g->gr.cacheline_size;
424 break;
425 case TYPE_PARAM_FIRSTPAGEOFFSET:
426 new_data = cde_ctx->surf_param_offset;
427 break;
428 case TYPE_PARAM_NUMPAGES:
429 new_data = cde_ctx->surf_param_lines;
430 break;
431 case TYPE_PARAM_BACKINGSTORE:
432 new_data = cde_ctx->backing_store_vaddr;
433 break;
434 case TYPE_PARAM_DESTINATION:
435 new_data = cde_ctx->compbit_vaddr;
436 break;
437 case TYPE_PARAM_DESTINATION_SIZE:
438 new_data = cde_ctx->compbit_size;
439 break;
440 case TYPE_PARAM_BACKINGSTORE_SIZE:
441 new_data = g->gr.compbit_store.mem.size;
442 break;
443 case TYPE_PARAM_SOURCE_SMMU_ADDR:
444 new_data = gpuva_to_iova_base(cde_ctx->vm,
445 cde_ctx->surf_vaddr);
446 if (new_data == 0) {
447 nvgpu_warn(g, "cde: failed to find 0x%llx",
448 cde_ctx->surf_vaddr);
449 return -EINVAL;
450 }
451 break;
452 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
453 new_data = g->gr.compbit_store.base_hw;
454 break;
455 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
456 new_data = g->gr.gobs_per_comptagline_per_slice;
457 break;
458 case TYPE_PARAM_SCATTERBUFFER:
459 new_data = cde_ctx->scatterbuffer_vaddr;
460 break;
461 case TYPE_PARAM_SCATTERBUFFER_SIZE:
462 new_data = cde_ctx->scatterbuffer_size;
463 break;
464 default:
465 user_id = param->id - NUM_RESERVED_PARAMS;
466 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
467 continue;
468 new_data = cde_ctx->user_param_values[user_id];
469 }
470
471 nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
472 i, param->id, param->target_buf,
473 param->target_byte_offset, new_data,
474 param->data_offset, param->type, param->shift,
475 param->mask);
476
477 new_data += param->data_offset;
478
479 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
480 param->shift, param->mask, new_data);
481
482 if (err) {
483 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
484 err, i, param->id, param->target_buf,
485 param->target_byte_offset, new_data);
486 return err;
487 }
488 }
489
490 return 0;
491}
492
493static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
494 struct nvgpu_firmware *img,
495 struct gk20a_cde_hdr_param *param)
496{
497 struct nvgpu_mem *target_mem;
498 struct nvgpu_os_linux *l = cde_ctx->l;
499 struct gk20a *g = &l->g;
500
501 if (param->target_buf >= cde_ctx->num_bufs) {
502 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
503 cde_ctx->num_params, param->target_buf,
504 cde_ctx->num_bufs);
505 return -EINVAL;
506 }
507
508 target_mem = cde_ctx->mem + param->target_buf;
509 if (target_mem->size < (param->target_byte_offset + 3)) {
510 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
511 cde_ctx->num_params, param->target_byte_offset,
512 target_mem->size);
513 return -EINVAL;
514 }
515
516 /* does this parameter fit into our parameter structure */
517 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
518 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
519 cde_ctx->num_params);
520 return -ENOMEM;
521 }
522
523 /* is the given id valid? */
524 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
525 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
526 param->id, cde_ctx->num_params,
527 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
528 return -EINVAL;
529 }
530
531 cde_ctx->params[cde_ctx->num_params] = *param;
532 cde_ctx->num_params++;
533
534 return 0;
535}
536
537static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
538 struct nvgpu_firmware *img,
539 u32 required_class)
540{
541 struct nvgpu_os_linux *l = cde_ctx->l;
542 struct gk20a *g = &l->g;
543 int err;
544
545 /* CDE enabled */
546 cde_ctx->ch->cde = true;
547
548 err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
549 if (err) {
550 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
551 err);
552 return err;
553 }
554
555 return 0;
556}
557
558static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
559 struct nvgpu_firmware *img,
560 u32 op,
561 struct gk20a_cde_cmd_elem *cmd_elem,
562 u32 num_elems)
563{
564 struct nvgpu_os_linux *l = cde_ctx->l;
565 struct gk20a *g = &l->g;
566 struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem;
567 u32 *num_entries;
568 unsigned int i;
569
570 /* check command type */
571 if (op == TYPE_BUF_COMMAND_INIT) {
572 gpfifo = &cde_ctx->init_convert_cmd;
573 num_entries = &cde_ctx->init_cmd_num_entries;
574 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
575 gpfifo = &cde_ctx->convert_cmd;
576 num_entries = &cde_ctx->convert_cmd_num_entries;
577 } else {
578 nvgpu_warn(g, "cde: unknown command. op=%u",
579 op);
580 return -EINVAL;
581 }
582
583 /* allocate gpfifo entries to be pushed */
584 *gpfifo = nvgpu_kzalloc(g,
585 sizeof(struct nvgpu_gpfifo_entry) * num_elems);
586 if (!*gpfifo) {
587 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
588 return -ENOMEM;
589 }
590
591 gpfifo_elem = *gpfifo;
592 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
593 struct nvgpu_mem *target_mem;
594
595 /* validate the current entry */
596 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
597 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
598 cmd_elem->target_buf, cde_ctx->num_bufs);
599 return -EINVAL;
600 }
601
602 target_mem = cde_ctx->mem + cmd_elem->target_buf;
603 if (target_mem->size<
604 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
605 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
606 target_mem->size,
607 cmd_elem->target_byte_offset,
608 cmd_elem->num_bytes);
609 return -EINVAL;
610 }
611
612 /* store the element into gpfifo */
613 gpfifo_elem->entry0 =
614 u64_lo32(target_mem->gpu_va +
615 cmd_elem->target_byte_offset);
616 gpfifo_elem->entry1 =
617 u64_hi32(target_mem->gpu_va +
618 cmd_elem->target_byte_offset) |
619 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
620 sizeof(u32));
621 }
622
623 *num_entries = num_elems;
624 return 0;
625}
626
627static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
628{
629 struct nvgpu_os_linux *l = cde_ctx->l;
630 struct gk20a *g = &l->g;
631 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
632 sizeof(struct nvgpu_gpfifo_entry);
633 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
634 sizeof(struct nvgpu_gpfifo_entry);
635 unsigned long total_bytes = init_bytes + conv_bytes;
636 struct nvgpu_gpfifo_entry *combined_cmd;
637
638 /* allocate buffer that has space for both */
639 combined_cmd = nvgpu_kzalloc(g, total_bytes);
640 if (!combined_cmd) {
641 nvgpu_warn(g,
642 "cde: could not allocate memory for gpfifo entries");
643 return -ENOMEM;
644 }
645
646 /* move the original init here and append convert */
647 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
648 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
649 cde_ctx->convert_cmd, conv_bytes);
650
651 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
652 nvgpu_kfree(g, cde_ctx->convert_cmd);
653
654 cde_ctx->init_convert_cmd = combined_cmd;
655 cde_ctx->convert_cmd = combined_cmd
656 + cde_ctx->init_cmd_num_entries;
657
658 return 0;
659}
660
661static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
662 struct nvgpu_firmware *img)
663{
664 struct nvgpu_os_linux *l = cde_ctx->l;
665 struct gk20a *g = &l->g;
666 struct gk20a_cde_app *cde_app = &l->cde_app;
667 u32 *data = (u32 *)img->data;
668 u32 num_of_elems;
669 struct gk20a_cde_hdr_elem *elem;
670 u32 min_size = 0;
671 int err = 0;
672 unsigned int i;
673
674 min_size += 2 * sizeof(u32);
675 if (img->size < min_size) {
676 nvgpu_warn(g, "cde: invalid image header");
677 return -EINVAL;
678 }
679
680 cde_app->firmware_version = data[0];
681 num_of_elems = data[1];
682
683 min_size += num_of_elems * sizeof(*elem);
684 if (img->size < min_size) {
685 nvgpu_warn(g, "cde: bad image");
686 return -EINVAL;
687 }
688
689 elem = (struct gk20a_cde_hdr_elem *)&data[2];
690 for (i = 0; i < num_of_elems; i++) {
691 int err = 0;
692 switch (elem->type) {
693 case TYPE_BUF:
694 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
695 break;
696 case TYPE_REPLACE:
697 err = gk20a_init_cde_replace(cde_ctx, img,
698 &elem->replace);
699 break;
700 case TYPE_PARAM:
701 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
702 break;
703 case TYPE_REQUIRED_CLASS:
704 err = gk20a_init_cde_required_class(cde_ctx, img,
705 elem->required_class);
706 break;
707 case TYPE_COMMAND:
708 {
709 struct gk20a_cde_cmd_elem *cmd = (void *)
710 &img->data[elem->command.data_byte_offset];
711 err = gk20a_init_cde_command(cde_ctx, img,
712 elem->command.op, cmd,
713 elem->command.num_entries);
714 break;
715 }
716 case TYPE_ARRAY:
717 memcpy(&cde_app->arrays[elem->array.id][0],
718 elem->array.data,
719 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
720 break;
721 default:
722 nvgpu_warn(g, "cde: unknown header element");
723 err = -EINVAL;
724 }
725
726 if (err)
727 goto deinit_image;
728
729 elem++;
730 }
731
732 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
733 nvgpu_warn(g, "cde: convert command not defined");
734 err = -EINVAL;
735 goto deinit_image;
736 }
737
738 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
739 nvgpu_warn(g, "cde: convert command not defined");
740 err = -EINVAL;
741 goto deinit_image;
742 }
743
744 err = gk20a_cde_pack_cmdbufs(cde_ctx);
745 if (err)
746 goto deinit_image;
747
748 return 0;
749
750deinit_image:
751 gk20a_deinit_cde_img(cde_ctx);
752 return err;
753}
754
755static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
756 u32 op, struct nvgpu_channel_fence *fence,
757 u32 flags, struct gk20a_fence **fence_out)
758{
759 struct nvgpu_os_linux *l = cde_ctx->l;
760 struct gk20a *g = &l->g;
761 struct nvgpu_gpfifo_entry *gpfifo = NULL;
762 int num_entries = 0;
763
764 /* check command type */
765 if (op == TYPE_BUF_COMMAND_INIT) {
766 /* both init and convert combined */
767 gpfifo = cde_ctx->init_convert_cmd;
768 num_entries = cde_ctx->init_cmd_num_entries
769 + cde_ctx->convert_cmd_num_entries;
770 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
771 gpfifo = cde_ctx->convert_cmd;
772 num_entries = cde_ctx->convert_cmd_num_entries;
773 } else if (op == TYPE_BUF_COMMAND_NOOP) {
774 /* Any non-null gpfifo will suffice with 0 num_entries */
775 gpfifo = cde_ctx->init_convert_cmd;
776 num_entries = 0;
777 } else {
778 nvgpu_warn(g, "cde: unknown buffer");
779 return -EINVAL;
780 }
781
782 if (gpfifo == NULL) {
783 nvgpu_warn(g, "cde: buffer not available");
784 return -ENOSYS;
785 }
786
787 return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,
788 num_entries, flags, fence, fence_out);
789}
790
791static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
792__acquires(&cde_app->mutex)
793__releases(&cde_app->mutex)
794{
795 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
796 struct gk20a *g = &cde_ctx->l->g;
797
798 nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
799 trace_gk20a_cde_release(cde_ctx);
800
801 nvgpu_mutex_acquire(&cde_app->mutex);
802
803 if (cde_ctx->in_use) {
804 cde_ctx->in_use = false;
805 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
806 cde_app->ctx_usecount--;
807 } else {
808 nvgpu_log_info(g, "double release cde context %p", cde_ctx);
809 }
810
811 nvgpu_mutex_release(&cde_app->mutex);
812}
813
814static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
815__acquires(&cde_app->mutex)
816__releases(&cde_app->mutex)
817{
818 struct delayed_work *delay_work = to_delayed_work(work);
819 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
820 struct gk20a_cde_ctx, ctx_deleter_work);
821 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
822 struct nvgpu_os_linux *l = cde_ctx->l;
823 struct gk20a *g = &l->g;
824 int err;
825
826 /* someone has just taken it? engine deletion started? */
827 if (cde_ctx->in_use || !cde_app->initialised)
828 return;
829
830 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
831 "cde: attempting to delete temporary %p", cde_ctx);
832
833 err = gk20a_busy(g);
834 if (err) {
835 /* this context would find new use anyway later, so not freeing
836 * here does not leak anything */
837 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
838 " temp ctx deletion");
839 return;
840 }
841
842 nvgpu_mutex_acquire(&cde_app->mutex);
843 if (cde_ctx->in_use || !cde_app->initialised) {
844 nvgpu_log(g, gpu_dbg_cde_ctx,
845 "cde: context use raced, not deleting %p",
846 cde_ctx);
847 goto out;
848 }
849
850 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
851 "double pending %p", cde_ctx);
852
853 gk20a_cde_remove_ctx(cde_ctx);
854 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
855 "cde: destroyed %p count=%d use=%d max=%d",
856 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
857 cde_app->ctx_count_top);
858
859out:
860 nvgpu_mutex_release(&cde_app->mutex);
861 gk20a_idle(g);
862}
863
864static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
865__must_hold(&cde_app->mutex)
866{
867 struct gk20a *g = &l->g;
868 struct gk20a_cde_app *cde_app = &l->cde_app;
869 struct gk20a_cde_ctx *cde_ctx;
870
871 /* exhausted? */
872
873 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
874 return ERR_PTR(-EAGAIN);
875
876 /* idle context available? */
877
878 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
879 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
880 gk20a_cde_ctx, list);
881 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
882 "cde: got free %p count=%d use=%d max=%d",
883 cde_ctx, cde_app->ctx_count,
884 cde_app->ctx_usecount,
885 cde_app->ctx_count_top);
886 trace_gk20a_cde_get_context(cde_ctx);
887
888 /* deleter work may be scheduled, but in_use prevents it */
889 cde_ctx->in_use = true;
890 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
891 cde_app->ctx_usecount++;
892
893 /* cancel any deletions now that ctx is in use */
894 gk20a_cde_cancel_deleter(cde_ctx, true);
895 return cde_ctx;
896 }
897
898 /* no free contexts, get a temporary one */
899
900 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
901 "cde: no free contexts, count=%d",
902 cde_app->ctx_count);
903
904 cde_ctx = gk20a_cde_allocate_context(l);
905 if (IS_ERR(cde_ctx)) {
906 nvgpu_warn(g, "cde: cannot allocate context: %ld",
907 PTR_ERR(cde_ctx));
908 return cde_ctx;
909 }
910
911 trace_gk20a_cde_get_context(cde_ctx);
912 cde_ctx->in_use = true;
913 cde_ctx->is_temporary = true;
914 cde_app->ctx_usecount++;
915 cde_app->ctx_count++;
916 if (cde_app->ctx_count > cde_app->ctx_count_top)
917 cde_app->ctx_count_top = cde_app->ctx_count;
918 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
919
920 return cde_ctx;
921}
922
923static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
924__releases(&cde_app->mutex)
925__acquires(&cde_app->mutex)
926{
927 struct gk20a *g = &l->g;
928 struct gk20a_cde_app *cde_app = &l->cde_app;
929 struct gk20a_cde_ctx *cde_ctx = NULL;
930 struct nvgpu_timeout timeout;
931
932 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
933 NVGPU_TIMER_CPU_TIMER);
934
935 do {
936 cde_ctx = gk20a_cde_do_get_context(l);
937 if (PTR_ERR(cde_ctx) != -EAGAIN)
938 break;
939
940 /* exhausted, retry */
941 nvgpu_mutex_release(&cde_app->mutex);
942 cond_resched();
943 nvgpu_mutex_acquire(&cde_app->mutex);
944 } while (!nvgpu_timeout_expired(&timeout));
945
946 return cde_ctx;
947}
948
949static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
950{
951 struct gk20a *g = &l->g;
952 struct gk20a_cde_ctx *cde_ctx;
953 int ret;
954
955 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
956 if (!cde_ctx)
957 return ERR_PTR(-ENOMEM);
958
959 cde_ctx->l = l;
960 cde_ctx->dev = dev_from_gk20a(g);
961
962 ret = gk20a_cde_load(cde_ctx);
963 if (ret) {
964 nvgpu_kfree(g, cde_ctx);
965 return ERR_PTR(ret);
966 }
967
968 nvgpu_init_list_node(&cde_ctx->list);
969 cde_ctx->is_temporary = false;
970 cde_ctx->in_use = false;
971 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
972 gk20a_cde_ctx_deleter_fn);
973
974 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
975 trace_gk20a_cde_allocate_context(cde_ctx);
976 return cde_ctx;
977}
978
979static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm,
980 u32 map_offset, u32 map_size)
981{
982 struct gk20a *g = gk20a_from_vm(vm);
983
984 /*
985 * To be simple we will just make the map size depend on the
986 * iommu'ability of the driver. If there's an IOMMU we can rely on
987 * buffers being contiguous. If not, then we'll use 4k pages since we
988 * know that will work for any buffer.
989 */
990 if (!nvgpu_iommuable(g))
991 return SZ_4K;
992
993 /*
994 * If map size or offset is not 64K aligned then use small pages.
995 */
996 if (map_size & (vm->big_page_size - 1) ||
997 map_offset & (vm->big_page_size - 1))
998 return SZ_4K;
999
1000 return vm->big_page_size;
1001}
1002
1003int gk20a_cde_convert(struct nvgpu_os_linux *l,
1004 struct dma_buf *compbits_scatter_buf,
1005 u64 compbits_byte_offset,
1006 u64 scatterbuffer_byte_offset,
1007 struct nvgpu_channel_fence *fence,
1008 u32 __flags, struct gk20a_cde_param *params,
1009 int num_params, struct gk20a_fence **fence_out)
1010__acquires(&l->cde_app->mutex)
1011__releases(&l->cde_app->mutex)
1012{
1013 struct gk20a *g = &l->g;
1014 struct gk20a_cde_ctx *cde_ctx = NULL;
1015 struct gk20a_comptags comptags;
1016 struct nvgpu_os_buffer os_buf = {
1017 compbits_scatter_buf,
1018 NULL,
1019 dev_from_gk20a(g)
1020 };
1021 u64 mapped_compbits_offset = 0;
1022 u64 compbits_size = 0;
1023 u64 mapped_scatterbuffer_offset = 0;
1024 u64 scatterbuffer_size = 0;
1025 u64 map_vaddr = 0;
1026 u64 map_offset = 0;
1027 u64 map_size = 0;
1028 u8 *surface = NULL;
1029 u64 big_page_mask = 0;
1030 u32 flags;
1031 int err, i;
1032 const s16 compbits_kind = 0;
1033 u32 submit_op;
1034 struct dma_buf_attachment *attachment;
1035
1036 nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
1037 compbits_byte_offset, scatterbuffer_byte_offset);
1038
1039 /* scatter buffer must be after compbits buffer */
1040 if (scatterbuffer_byte_offset &&
1041 scatterbuffer_byte_offset < compbits_byte_offset)
1042 return -EINVAL;
1043
1044 err = gk20a_busy(g);
1045 if (err)
1046 return err;
1047
1048 nvgpu_mutex_acquire(&l->cde_app.mutex);
1049 cde_ctx = gk20a_cde_get_context(l);
1050 nvgpu_mutex_release(&l->cde_app.mutex);
1051 if (IS_ERR(cde_ctx)) {
1052 err = PTR_ERR(cde_ctx);
1053 goto exit_idle;
1054 }
1055
1056 /* First, map the buffer to local va */
1057
1058 /* ensure that the compbits buffer has drvdata */
1059 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
1060 dev_from_gk20a(g));
1061 if (err)
1062 goto exit_idle;
1063
1064 /* compbits don't start at page aligned offset, so we need to align
1065 the region to be mapped */
1066 big_page_mask = cde_ctx->vm->big_page_size - 1;
1067 map_offset = compbits_byte_offset & ~big_page_mask;
1068 map_size = compbits_scatter_buf->size - map_offset;
1069
1070
1071 /* compute compbit start offset from the beginning of the mapped
1072 area */
1073 mapped_compbits_offset = compbits_byte_offset - map_offset;
1074 if (scatterbuffer_byte_offset) {
1075 compbits_size = scatterbuffer_byte_offset -
1076 compbits_byte_offset;
1077 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1078 map_offset;
1079 scatterbuffer_size = compbits_scatter_buf->size -
1080 scatterbuffer_byte_offset;
1081 } else {
1082 compbits_size = compbits_scatter_buf->size -
1083 compbits_byte_offset;
1084 }
1085
1086 nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1087 map_offset, map_size);
1088 nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1089 mapped_compbits_offset, compbits_size);
1090 nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1091 mapped_scatterbuffer_offset, scatterbuffer_size);
1092
1093
1094 /* map the destination buffer */
1095 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
1096 err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
1097 NVGPU_VM_MAP_CACHEABLE |
1098 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
1099 gk20a_cde_mapping_page_size(cde_ctx->vm,
1100 map_offset,
1101 map_size),
1102 NV_KIND_INVALID,
1103 compbits_kind, /* incompressible kind */
1104 gk20a_mem_flag_none,
1105 map_offset, map_size,
1106 NULL,
1107 &map_vaddr);
1108 if (err) {
1109 nvgpu_warn(g, "cde: failed to map compbits scatter buf at %lld size %lld",
1110 map_offset, map_size);
1111 dma_buf_put(compbits_scatter_buf);
1112 err = -EINVAL;
1113 goto exit_idle;
1114 }
1115
1116 if (scatterbuffer_byte_offset &&
1117 l->ops.cde.need_scatter_buffer &&
1118 l->ops.cde.need_scatter_buffer(g)) {
1119 struct sg_table *sgt;
1120 void *scatter_buffer;
1121
1122 surface = dma_buf_vmap(compbits_scatter_buf);
1123 if (IS_ERR(surface)) {
1124 nvgpu_warn(g,
1125 "dma_buf_vmap failed");
1126 err = -EINVAL;
1127 goto exit_unmap_vaddr;
1128 }
1129
1130 scatter_buffer = surface + scatterbuffer_byte_offset;
1131
1132 nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1133 surface, scatter_buffer);
1134 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf,
1135 &attachment);
1136 if (IS_ERR(sgt)) {
1137 nvgpu_warn(g,
1138 "mm_pin failed");
1139 err = -EINVAL;
1140 goto exit_unmap_surface;
1141 } else {
1142 err = l->ops.cde.populate_scatter_buffer(g, sgt,
1143 compbits_byte_offset, scatter_buffer,
1144 scatterbuffer_size);
1145 WARN_ON(err);
1146
1147 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1148 attachment, sgt);
1149 if (err)
1150 goto exit_unmap_surface;
1151 }
1152
1153 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1154 dma_buf_vunmap(compbits_scatter_buf, surface);
1155 surface = NULL;
1156 }
1157
1158 /* store source buffer compression tags */
1159 gk20a_get_comptags(&os_buf, &comptags);
1160 cde_ctx->surf_param_offset = comptags.offset;
1161 cde_ctx->surf_param_lines = comptags.lines;
1162
1163 /* store surface vaddr. This is actually compbit vaddr, but since
1164 compbits live in the same surface, and we can get the alloc base
1165 address by using gpuva_to_iova_base, this will do */
1166 cde_ctx->surf_vaddr = map_vaddr;
1167
1168 /* store information about destination */
1169 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1170 cde_ctx->compbit_size = compbits_size;
1171
1172 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1173 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1174
1175 /* remove existing argument data */
1176 memset(cde_ctx->user_param_values, 0,
1177 sizeof(cde_ctx->user_param_values));
1178
1179 /* read user space arguments for the conversion */
1180 for (i = 0; i < num_params; i++) {
1181 struct gk20a_cde_param *param = params + i;
1182 int id = param->id - NUM_RESERVED_PARAMS;
1183
1184 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1185 nvgpu_warn(g, "cde: unknown user parameter");
1186 err = -EINVAL;
1187 goto exit_unmap_surface;
1188 }
1189 cde_ctx->user_param_values[id] = param->value;
1190 }
1191
1192 /* patch data */
1193 err = gk20a_cde_patch_params(cde_ctx);
1194 if (err) {
1195 nvgpu_warn(g, "cde: failed to patch parameters");
1196 goto exit_unmap_surface;
1197 }
1198
1199 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1200 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1201 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1202 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1203 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1204 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1205
1206 /* take always the postfence as it is needed for protecting the
1207 * cde context */
1208 flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
1209
1210 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1211 gk20a_idle(g);
1212
1213 if (comptags.lines == 0) {
1214 /*
1215 * Nothing to do on the buffer, but do a null kickoff for
1216 * managing the pre and post fences.
1217 */
1218 submit_op = TYPE_BUF_COMMAND_NOOP;
1219 } else if (!cde_ctx->init_cmd_executed) {
1220 /*
1221 * First time, so include the init pushbuf too in addition to
1222 * the conversion code.
1223 */
1224 submit_op = TYPE_BUF_COMMAND_INIT;
1225 } else {
1226 /*
1227 * The usual condition: execute just the conversion.
1228 */
1229 submit_op = TYPE_BUF_COMMAND_CONVERT;
1230 }
1231 err = gk20a_cde_execute_buffer(cde_ctx, submit_op,
1232 fence, flags, fence_out);
1233
1234 if (comptags.lines != 0 && !err)
1235 cde_ctx->init_cmd_executed = true;
1236
1237 /* unmap the buffers - channel holds references to them now */
1238 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1239
1240 return err;
1241
1242exit_unmap_surface:
1243 if (surface)
1244 dma_buf_vunmap(compbits_scatter_buf, surface);
1245exit_unmap_vaddr:
1246 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1247exit_idle:
1248 gk20a_idle(g);
1249 return err;
1250}
1251
1252static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1253__acquires(&cde_app->mutex)
1254__releases(&cde_app->mutex)
1255{
1256 struct gk20a_cde_ctx *cde_ctx = data;
1257 struct nvgpu_os_linux *l = cde_ctx->l;
1258 struct gk20a *g = &l->g;
1259 struct gk20a_cde_app *cde_app = &l->cde_app;
1260 bool channel_idle;
1261
1262 channel_gk20a_joblist_lock(ch);
1263 channel_idle = channel_gk20a_joblist_is_empty(ch);
1264 channel_gk20a_joblist_unlock(ch);
1265
1266 if (!channel_idle)
1267 return;
1268
1269 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1270 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1271 if (!cde_ctx->in_use)
1272 nvgpu_log_info(g, "double finish cde context %p on channel %p",
1273 cde_ctx, ch);
1274
1275 if (gk20a_channel_check_timedout(ch)) {
1276 if (cde_ctx->is_temporary) {
1277 nvgpu_warn(g,
1278 "cde: channel had timed out"
1279 " (temporary channel)");
1280 /* going to be deleted anyway */
1281 } else {
1282 nvgpu_warn(g,
1283 "cde: channel had timed out"
1284 ", reloading");
1285 /* mark it to be deleted, replace with a new one */
1286 nvgpu_mutex_acquire(&cde_app->mutex);
1287 cde_ctx->is_temporary = true;
1288 if (gk20a_cde_create_context(l)) {
1289 nvgpu_err(g, "cde: can't replace context");
1290 }
1291 nvgpu_mutex_release(&cde_app->mutex);
1292 }
1293 }
1294
1295 /* delete temporary contexts later (watch for doubles) */
1296 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1297 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1298 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1299 msecs_to_jiffies(CTX_DELETE_TIME));
1300 }
1301
1302 if (!gk20a_channel_check_timedout(ch)) {
1303 gk20a_cde_ctx_release(cde_ctx);
1304 }
1305}
1306
1307static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1308{
1309 struct nvgpu_os_linux *l = cde_ctx->l;
1310 struct gk20a *g = &l->g;
1311 struct nvgpu_firmware *img;
1312 struct channel_gk20a *ch;
1313 struct tsg_gk20a *tsg;
1314 struct gr_gk20a *gr = &g->gr;
1315 struct nvgpu_setup_bind_args setup_bind_args;
1316 int err = 0;
1317 u64 vaddr;
1318
1319 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1320 if (!img) {
1321 nvgpu_err(g, "cde: could not fetch the firmware");
1322 return -ENOSYS;
1323 }
1324
1325 tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
1326 if (!tsg) {
1327 nvgpu_err(g, "cde: could not create TSG");
1328 err = -ENOMEM;
1329 goto err_get_gk20a_channel;
1330 }
1331
1332 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1333 cde_ctx,
1334 -1,
1335 false);
1336 if (!ch) {
1337 nvgpu_warn(g, "cde: gk20a channel not available");
1338 err = -ENOMEM;
1339 goto err_get_gk20a_channel;
1340 }
1341
1342 ch->timeout.enabled = false;
1343
1344 /* bind the channel to the vm */
1345 err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
1346 if (err) {
1347 nvgpu_warn(g, "cde: could not bind vm");
1348 goto err_commit_va;
1349 }
1350
1351 err = gk20a_tsg_bind_channel(tsg, ch);
1352 if (err) {
1353 nvgpu_err(g, "cde: unable to bind to tsg");
1354 goto err_setup_bind;
1355 }
1356
1357 setup_bind_args.num_gpfifo_entries = 1024;
1358 setup_bind_args.num_inflight_jobs = 0;
1359 setup_bind_args.flags = 0;
1360 err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
1361 if (err) {
1362 nvgpu_warn(g, "cde: unable to setup channel");
1363 goto err_setup_bind;
1364 }
1365
1366 /* map backing store to gpu virtual space */
1367 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1368 g->gr.compbit_store.mem.size,
1369 NVGPU_VM_MAP_CACHEABLE,
1370 gk20a_mem_flag_read_only,
1371 false,
1372 gr->compbit_store.mem.aperture);
1373
1374 if (!vaddr) {
1375 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1376 err = -ENOMEM;
1377 goto err_map_backingstore;
1378 }
1379
1380 /* store initialisation data */
1381 cde_ctx->ch = ch;
1382 cde_ctx->tsg = tsg;
1383 cde_ctx->vm = ch->vm;
1384 cde_ctx->backing_store_vaddr = vaddr;
1385
1386 /* initialise the firmware */
1387 err = gk20a_init_cde_img(cde_ctx, img);
1388 if (err) {
1389 nvgpu_warn(g, "cde: image initialisation failed");
1390 goto err_init_cde_img;
1391 }
1392
1393 /* initialisation done */
1394 nvgpu_release_firmware(g, img);
1395
1396 return 0;
1397
1398err_init_cde_img:
1399 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1400err_map_backingstore:
1401err_setup_bind:
1402 nvgpu_vm_put(ch->vm);
1403err_commit_va:
1404err_get_gk20a_channel:
1405 nvgpu_release_firmware(g, img);
1406 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1407 return err;
1408}
1409
1410int gk20a_cde_reload(struct nvgpu_os_linux *l)
1411__acquires(&l->cde_app->mutex)
1412__releases(&l->cde_app->mutex)
1413{
1414 struct gk20a *g = &l->g;
1415 struct gk20a_cde_app *cde_app = &l->cde_app;
1416 int err;
1417
1418 if (!cde_app->initialised)
1419 return -ENOSYS;
1420
1421 err = gk20a_busy(g);
1422 if (err)
1423 return err;
1424
1425 nvgpu_mutex_acquire(&cde_app->mutex);
1426
1427 gk20a_cde_stop(l);
1428
1429 err = gk20a_cde_create_contexts(l);
1430 if (!err)
1431 cde_app->initialised = true;
1432
1433 nvgpu_mutex_release(&cde_app->mutex);
1434
1435 gk20a_idle(g);
1436 return err;
1437}
1438
1439int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1440__acquires(&cde_app->mutex)
1441__releases(&cde_app->mutex)
1442{
1443 struct gk20a_cde_app *cde_app = &l->cde_app;
1444 struct gk20a *g = &l->g;
1445 int err;
1446
1447 if (cde_app->initialised)
1448 return 0;
1449
1450 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1451
1452 err = nvgpu_mutex_init(&cde_app->mutex);
1453 if (err)
1454 return err;
1455
1456 nvgpu_mutex_acquire(&cde_app->mutex);
1457
1458 nvgpu_init_list_node(&cde_app->free_contexts);
1459 nvgpu_init_list_node(&cde_app->used_contexts);
1460 cde_app->ctx_count = 0;
1461 cde_app->ctx_count_top = 0;
1462 cde_app->ctx_usecount = 0;
1463
1464 err = gk20a_cde_create_contexts(l);
1465 if (!err)
1466 cde_app->initialised = true;
1467
1468 nvgpu_mutex_release(&cde_app->mutex);
1469 nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1470
1471 if (err)
1472 nvgpu_mutex_destroy(&cde_app->mutex);
1473
1474 return err;
1475}
1476
1477enum cde_launch_patch_id {
1478 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1479 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1480 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1481 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1482 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1483 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1484 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1485 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1486 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1487 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1488 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1489 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1490 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1491 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1492 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1493 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1494 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1495 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1496 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1497 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1498 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1499 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1500 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1501 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1502 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1503 PATCH_H_LAUNCH_WORD1_ID = 1049,
1504 PATCH_H_LAUNCH_WORD2_ID = 1050,
1505 PATCH_V_LAUNCH_WORD1_ID = 1051,
1506 PATCH_V_LAUNCH_WORD2_ID = 1052,
1507 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1508 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1509 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1510 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1511};
1512
1513/* maximum number of WRITE_PATCHes in the below function */
1514#define MAX_CDE_LAUNCH_PATCHES 32
1515
1516static int gk20a_buffer_convert_gpu_to_cde_v1(
1517 struct nvgpu_os_linux *l,
1518 struct dma_buf *dmabuf, u32 consumer,
1519 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1520 u64 scatterbuffer_offset,
1521 u32 width, u32 height, u32 block_height_log2,
1522 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1523 struct gk20a_buffer_state *state)
1524{
1525 struct gk20a *g = &l->g;
1526 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1527 int param = 0;
1528 int err = 0;
1529 struct gk20a_fence *new_fence = NULL;
1530 const int wgx = 8;
1531 const int wgy = 8;
1532 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1533 const int xalign = compbits_per_byte * wgx;
1534 const int yalign = wgy;
1535
1536 /* Compute per launch parameters */
1537 const int xtiles = (width + 7) >> 3;
1538 const int ytiles = (height + 7) >> 3;
1539 const int gridw_h = roundup(xtiles, xalign) / xalign;
1540 const int gridh_h = roundup(ytiles, yalign) / yalign;
1541 const int gridw_v = roundup(ytiles, xalign) / xalign;
1542 const int gridh_v = roundup(xtiles, yalign) / yalign;
1543 const int xblocks = (xtiles + 1) >> 1;
1544 const int voffset = compbits_voffset - compbits_hoffset;
1545
1546 int hprog = -1;
1547 int vprog = -1;
1548
1549 if (l->ops.cde.get_program_numbers)
1550 l->ops.cde.get_program_numbers(g, block_height_log2,
1551 l->cde_app.shader_parameter,
1552 &hprog, &vprog);
1553 else {
1554 nvgpu_warn(g, "cde: chip not supported");
1555 return -ENOSYS;
1556 }
1557
1558 if (hprog < 0 || vprog < 0) {
1559 nvgpu_warn(g, "cde: could not determine programs");
1560 return -ENOSYS;
1561 }
1562
1563 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1564 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1565 xtiles, ytiles);
1566
1567 nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1568 width, height, block_height_log2,
1569 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1570 nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1571 width, height, xtiles, ytiles);
1572 nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1573 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1574 nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1575 hprog,
1576 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1577 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1578 vprog,
1579 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1580 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1581
1582 /* Write parameters */
1583#define WRITE_PATCH(NAME, VALUE) \
1584 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1585 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1586 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1587 block_height_log2);
1588 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1589 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1590 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1591 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1592 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1593
1594 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1595 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1596 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1597 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1598 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1599 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1600
1601 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1602 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1603 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1604 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1605 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1606 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1607
1608 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1609 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1610 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1611 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1612 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1613 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1614 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1615 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1616
1617 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1618 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1619 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1620 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1621 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1622 } else {
1623 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1624 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1625 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1626 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1627 }
1628
1629 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1630 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1631 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1632 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1633 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1634 } else {
1635 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1636 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1637 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1638 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1639 }
1640#undef WRITE_PATCH
1641
1642 err = gk20a_cde_convert(l, dmabuf,
1643 compbits_hoffset,
1644 scatterbuffer_offset,
1645 fence_in, submit_flags,
1646 params, param, &new_fence);
1647 if (err)
1648 goto out;
1649
1650 /* compbits generated, update state & fence */
1651 gk20a_fence_put(state->fence);
1652 state->fence = new_fence;
1653 state->valid_compbits |= consumer &
1654 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1655out:
1656 return err;
1657}
1658
1659static int gk20a_buffer_convert_gpu_to_cde(
1660 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1661 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1662 u64 scatterbuffer_offset,
1663 u32 width, u32 height, u32 block_height_log2,
1664 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1665 struct gk20a_buffer_state *state)
1666{
1667 struct gk20a *g = &l->g;
1668 int err = 0;
1669
1670 if (!l->cde_app.initialised)
1671 return -ENOSYS;
1672
1673 nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n",
1674 l->cde_app.firmware_version);
1675
1676 if (l->cde_app.firmware_version == 1) {
1677 err = gk20a_buffer_convert_gpu_to_cde_v1(
1678 l, dmabuf, consumer, offset, compbits_hoffset,
1679 compbits_voffset, scatterbuffer_offset,
1680 width, height, block_height_log2,
1681 submit_flags, fence_in, state);
1682 } else {
1683 nvgpu_err(g, "unsupported CDE firmware version %d",
1684 l->cde_app.firmware_version);
1685 err = -EINVAL;
1686 }
1687
1688 return err;
1689}
1690
1691int gk20a_prepare_compressible_read(
1692 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1693 u64 compbits_hoffset, u64 compbits_voffset,
1694 u64 scatterbuffer_offset,
1695 u32 width, u32 height, u32 block_height_log2,
1696 u32 submit_flags, struct nvgpu_channel_fence *fence,
1697 u32 *valid_compbits, u32 *zbc_color,
1698 struct gk20a_fence **fence_out)
1699{
1700 struct gk20a *g = &l->g;
1701 int err = 0;
1702 struct gk20a_buffer_state *state;
1703 struct dma_buf *dmabuf;
1704 u32 missing_bits;
1705
1706 dmabuf = dma_buf_get(buffer_fd);
1707 if (IS_ERR(dmabuf))
1708 return -EINVAL;
1709
1710 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1711 if (err) {
1712 dma_buf_put(dmabuf);
1713 return err;
1714 }
1715
1716 missing_bits = (state->valid_compbits ^ request) & request;
1717
1718 nvgpu_mutex_acquire(&state->lock);
1719
1720 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1721
1722 gk20a_fence_put(state->fence);
1723 state->fence = NULL;
1724 /* state->fence = decompress();
1725 state->valid_compbits = 0; */
1726 err = -EINVAL;
1727 goto out;
1728 } else if (missing_bits) {
1729 u32 missing_cde_bits = missing_bits &
1730 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1731 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1732 missing_cde_bits) {
1733 err = gk20a_buffer_convert_gpu_to_cde(
1734 l, dmabuf,
1735 missing_cde_bits,
1736 offset, compbits_hoffset,
1737 compbits_voffset, scatterbuffer_offset,
1738 width, height, block_height_log2,
1739 submit_flags, fence,
1740 state);
1741 if (err)
1742 goto out;
1743 }
1744 }
1745
1746 if (state->fence && fence_out)
1747 *fence_out = gk20a_fence_get(state->fence);
1748
1749 if (valid_compbits)
1750 *valid_compbits = state->valid_compbits;
1751
1752 if (zbc_color)
1753 *zbc_color = state->zbc_color;
1754
1755out:
1756 nvgpu_mutex_release(&state->lock);
1757 dma_buf_put(dmabuf);
1758 return err;
1759}
1760
1761int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1762 u32 valid_compbits, u64 offset, u32 zbc_color)
1763{
1764 int err;
1765 struct gk20a_buffer_state *state;
1766 struct dma_buf *dmabuf;
1767
1768 dmabuf = dma_buf_get(buffer_fd);
1769 if (IS_ERR(dmabuf)) {
1770 nvgpu_err(g, "invalid dmabuf");
1771 return -EINVAL;
1772 }
1773
1774 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1775 if (err) {
1776 nvgpu_err(g, "could not get state from dmabuf");
1777 dma_buf_put(dmabuf);
1778 return err;
1779 }
1780
1781 nvgpu_mutex_acquire(&state->lock);
1782
1783 /* Update the compbits state. */
1784 state->valid_compbits = valid_compbits;
1785 state->zbc_color = zbc_color;
1786
1787 /* Discard previous compbit job fence. */
1788 gk20a_fence_put(state->fence);
1789 state->fence = NULL;
1790
1791 nvgpu_mutex_release(&state->lock);
1792 dma_buf_put(dmabuf);
1793 return 0;
1794}