summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/cde.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/cde.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c1786
1 files changed, 0 insertions, 1786 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
deleted file mode 100644
index 32b333f1..00000000
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ /dev/null
@@ -1,1786 +0,0 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22#include <uapi/linux/nvgpu.h>
23
24#include <trace/events/gk20a.h>
25
26#include <nvgpu/dma.h>
27#include <nvgpu/gmmu.h>
28#include <nvgpu/timers.h>
29#include <nvgpu/nvgpu_common.h>
30#include <nvgpu/kmem.h>
31#include <nvgpu/log.h>
32#include <nvgpu/bug.h>
33#include <nvgpu/firmware.h>
34#include <nvgpu/os_sched.h>
35
36#include <nvgpu/linux/vm.h>
37
38#include "gk20a/gk20a.h"
39#include "gk20a/channel_gk20a.h"
40#include "gk20a/mm_gk20a.h"
41#include "gk20a/fence_gk20a.h"
42#include "gk20a/gr_gk20a.h"
43
44#include "cde.h"
45#include "os_linux.h"
46#include "dmabuf.h"
47#include "channel.h"
48#include "cde_gm20b.h"
49#include "cde_gp10b.h"
50
51#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
52#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
53
54static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
55static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
56
57#define CTX_DELETE_TIME 1000
58
59#define MAX_CTX_USE_COUNT 42
60#define MAX_CTX_RETRY_TIME 2000
61
62static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
63{
64 struct nvgpu_mapped_buf *buffer;
65 dma_addr_t addr = 0;
66 struct gk20a *g = gk20a_from_vm(vm);
67
68 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
69 buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
70 if (buffer)
71 addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
72 nvgpu_mutex_release(&vm->update_gmmu_lock);
73
74 return addr;
75}
76
77static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
78{
79 unsigned int i;
80
81 for (i = 0; i < cde_ctx->num_bufs; i++) {
82 struct nvgpu_mem *mem = cde_ctx->mem + i;
83 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
84 }
85
86 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
87
88 cde_ctx->convert_cmd = NULL;
89 cde_ctx->init_convert_cmd = NULL;
90 cde_ctx->num_bufs = 0;
91 cde_ctx->num_params = 0;
92 cde_ctx->init_cmd_num_entries = 0;
93 cde_ctx->convert_cmd_num_entries = 0;
94 cde_ctx->init_cmd_executed = false;
95}
96
97static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
98__must_hold(&cde_app->mutex)
99{
100 struct nvgpu_os_linux *l = cde_ctx->l;
101 struct gk20a *g = &l->g;
102 struct channel_gk20a *ch = cde_ctx->ch;
103 struct vm_gk20a *vm = ch->vm;
104
105 trace_gk20a_cde_remove_ctx(cde_ctx);
106
107 /* release mapped memory */
108 gk20a_deinit_cde_img(cde_ctx);
109 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
110 cde_ctx->backing_store_vaddr);
111
112 /*
113 * free the channel
114 * gk20a_channel_close() will also unbind the channel from TSG
115 */
116 gk20a_channel_close(ch);
117 nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release);
118
119 /* housekeeping on app */
120 nvgpu_list_del(&cde_ctx->list);
121 l->cde_app.ctx_count--;
122 nvgpu_kfree(g, cde_ctx);
123}
124
125static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
126 bool wait_finish)
127__releases(&cde_app->mutex)
128__acquires(&cde_app->mutex)
129{
130 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
131
132 /* permanent contexts do not have deleter works */
133 if (!cde_ctx->is_temporary)
134 return;
135
136 if (wait_finish) {
137 nvgpu_mutex_release(&cde_app->mutex);
138 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
139 nvgpu_mutex_acquire(&cde_app->mutex);
140 } else {
141 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
142 }
143}
144
145static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
146__must_hold(&l->cde_app->mutex)
147{
148 struct gk20a_cde_app *cde_app = &l->cde_app;
149 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
150
151 /* safe to go off the mutex in cancel_deleter since app is
152 * deinitialised; no new jobs are started. deleter works may be only at
153 * waiting for the mutex or before, going to abort */
154
155 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
156 &cde_app->free_contexts, gk20a_cde_ctx, list) {
157 gk20a_cde_cancel_deleter(cde_ctx, true);
158 gk20a_cde_remove_ctx(cde_ctx);
159 }
160
161 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
162 &cde_app->used_contexts, gk20a_cde_ctx, list) {
163 gk20a_cde_cancel_deleter(cde_ctx, true);
164 gk20a_cde_remove_ctx(cde_ctx);
165 }
166}
167
168static void gk20a_cde_stop(struct nvgpu_os_linux *l)
169__must_hold(&l->cde_app->mutex)
170{
171 struct gk20a_cde_app *cde_app = &l->cde_app;
172
173 /* prevent further conversions and delayed works from working */
174 cde_app->initialised = false;
175 /* free all data, empty the list */
176 gk20a_cde_remove_contexts(l);
177}
178
179void gk20a_cde_destroy(struct nvgpu_os_linux *l)
180__acquires(&l->cde_app->mutex)
181__releases(&l->cde_app->mutex)
182{
183 struct gk20a_cde_app *cde_app = &l->cde_app;
184
185 if (!cde_app->initialised)
186 return;
187
188 nvgpu_mutex_acquire(&cde_app->mutex);
189 gk20a_cde_stop(l);
190 nvgpu_mutex_release(&cde_app->mutex);
191
192 nvgpu_mutex_destroy(&cde_app->mutex);
193}
194
195void gk20a_cde_suspend(struct nvgpu_os_linux *l)
196__acquires(&l->cde_app->mutex)
197__releases(&l->cde_app->mutex)
198{
199 struct gk20a_cde_app *cde_app = &l->cde_app;
200 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
201
202 if (!cde_app->initialised)
203 return;
204
205 nvgpu_mutex_acquire(&cde_app->mutex);
206
207 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
208 &cde_app->free_contexts, gk20a_cde_ctx, list) {
209 gk20a_cde_cancel_deleter(cde_ctx, false);
210 }
211
212 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
213 &cde_app->used_contexts, gk20a_cde_ctx, list) {
214 gk20a_cde_cancel_deleter(cde_ctx, false);
215 }
216
217 nvgpu_mutex_release(&cde_app->mutex);
218
219}
220
221static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
222__must_hold(&l->cde_app->mutex)
223{
224 struct gk20a_cde_app *cde_app = &l->cde_app;
225 struct gk20a_cde_ctx *cde_ctx;
226
227 cde_ctx = gk20a_cde_allocate_context(l);
228 if (IS_ERR(cde_ctx))
229 return PTR_ERR(cde_ctx);
230
231 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
232 cde_app->ctx_count++;
233 if (cde_app->ctx_count > cde_app->ctx_count_top)
234 cde_app->ctx_count_top = cde_app->ctx_count;
235
236 return 0;
237}
238
239static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
240__must_hold(&l->cde_app->mutex)
241{
242 int err;
243 int i;
244
245 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
246 err = gk20a_cde_create_context(l);
247 if (err)
248 goto out;
249 }
250
251 return 0;
252out:
253 gk20a_cde_remove_contexts(l);
254 return err;
255}
256
257static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
258 struct nvgpu_firmware *img,
259 struct gk20a_cde_hdr_buf *buf)
260{
261 struct nvgpu_mem *mem;
262 struct nvgpu_os_linux *l = cde_ctx->l;
263 struct gk20a *g = &l->g;
264 int err;
265
266 /* check that the file can hold the buf */
267 if (buf->data_byte_offset != 0 &&
268 buf->data_byte_offset + buf->num_bytes > img->size) {
269 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
270 cde_ctx->num_bufs);
271 return -EINVAL;
272 }
273
274 /* check that we have enough buf elems available */
275 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
276 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
277 cde_ctx->num_bufs);
278 return -ENOMEM;
279 }
280
281 /* allocate buf */
282 mem = cde_ctx->mem + cde_ctx->num_bufs;
283 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
284 if (err) {
285 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
286 cde_ctx->num_bufs);
287 return -ENOMEM;
288 }
289
290 /* copy the content */
291 if (buf->data_byte_offset != 0)
292 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
293 buf->num_bytes);
294
295 cde_ctx->num_bufs++;
296
297 return 0;
298}
299
300static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
301 int type, s32 shift, u64 mask, u64 value)
302{
303 struct nvgpu_os_linux *l = cde_ctx->l;
304 struct gk20a *g = &l->g;
305 u32 *target_mem_ptr = target;
306 u64 *target_mem_ptr_u64 = target;
307 u64 current_value, new_value;
308
309 value = (shift >= 0) ? value << shift : value >> -shift;
310 value &= mask;
311
312 /* read current data from the location */
313 current_value = 0;
314 if (type == TYPE_PARAM_TYPE_U32) {
315 if (mask != 0xfffffffful)
316 current_value = *target_mem_ptr;
317 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
318 if (mask != ~0ul)
319 current_value = *target_mem_ptr_u64;
320 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
321 current_value = *target_mem_ptr_u64;
322 current_value = (u64)(current_value >> 32) |
323 (u64)(current_value << 32);
324 } else {
325 nvgpu_warn(g, "cde: unknown type. type=%d",
326 type);
327 return -EINVAL;
328 }
329
330 current_value &= ~mask;
331 new_value = current_value | value;
332
333 /* store the element data back */
334 if (type == TYPE_PARAM_TYPE_U32)
335 *target_mem_ptr = (u32)new_value;
336 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
337 *target_mem_ptr_u64 = new_value;
338 else {
339 new_value = (u64)(new_value >> 32) |
340 (u64)(new_value << 32);
341 *target_mem_ptr_u64 = new_value;
342 }
343
344 return 0;
345}
346
347static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
348 struct nvgpu_firmware *img,
349 struct gk20a_cde_hdr_replace *replace)
350{
351 struct nvgpu_mem *source_mem;
352 struct nvgpu_mem *target_mem;
353 struct nvgpu_os_linux *l = cde_ctx->l;
354 struct gk20a *g = &l->g;
355 u32 *target_mem_ptr;
356 u64 vaddr;
357 int err;
358
359 if (replace->target_buf >= cde_ctx->num_bufs ||
360 replace->source_buf >= cde_ctx->num_bufs) {
361 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
362 replace->target_buf, replace->source_buf,
363 cde_ctx->num_bufs);
364 return -EINVAL;
365 }
366
367 source_mem = cde_ctx->mem + replace->source_buf;
368 target_mem = cde_ctx->mem + replace->target_buf;
369 target_mem_ptr = target_mem->cpu_va;
370
371 if (source_mem->size < (replace->source_byte_offset + 3) ||
372 target_mem->size < (replace->target_byte_offset + 3)) {
373 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
374 replace->target_byte_offset,
375 replace->source_byte_offset,
376 source_mem->size,
377 target_mem->size);
378 return -EINVAL;
379 }
380
381 /* calculate the target pointer */
382 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
383
384 /* determine patch value */
385 vaddr = source_mem->gpu_va + replace->source_byte_offset;
386 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
387 replace->shift, replace->mask,
388 vaddr);
389 if (err) {
390 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
391 err, replace->target_buf,
392 replace->target_byte_offset,
393 replace->source_buf,
394 replace->source_byte_offset);
395 }
396
397 return err;
398}
399
400static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
401{
402 struct nvgpu_os_linux *l = cde_ctx->l;
403 struct gk20a *g = &l->g;
404 struct nvgpu_mem *target_mem;
405 u32 *target_mem_ptr;
406 u64 new_data;
407 int user_id = 0, err;
408 unsigned int i;
409
410 for (i = 0; i < cde_ctx->num_params; i++) {
411 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
412 target_mem = cde_ctx->mem + param->target_buf;
413 target_mem_ptr = target_mem->cpu_va;
414 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
415
416 switch (param->id) {
417 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
418 new_data = g->gr.comptags_per_cacheline;
419 break;
420 case TYPE_PARAM_GPU_CONFIGURATION:
421 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
422 g->gr.cacheline_size;
423 break;
424 case TYPE_PARAM_FIRSTPAGEOFFSET:
425 new_data = cde_ctx->surf_param_offset;
426 break;
427 case TYPE_PARAM_NUMPAGES:
428 new_data = cde_ctx->surf_param_lines;
429 break;
430 case TYPE_PARAM_BACKINGSTORE:
431 new_data = cde_ctx->backing_store_vaddr;
432 break;
433 case TYPE_PARAM_DESTINATION:
434 new_data = cde_ctx->compbit_vaddr;
435 break;
436 case TYPE_PARAM_DESTINATION_SIZE:
437 new_data = cde_ctx->compbit_size;
438 break;
439 case TYPE_PARAM_BACKINGSTORE_SIZE:
440 new_data = g->gr.compbit_store.mem.size;
441 break;
442 case TYPE_PARAM_SOURCE_SMMU_ADDR:
443 new_data = gpuva_to_iova_base(cde_ctx->vm,
444 cde_ctx->surf_vaddr);
445 if (new_data == 0) {
446 nvgpu_warn(g, "cde: failed to find 0x%llx",
447 cde_ctx->surf_vaddr);
448 return -EINVAL;
449 }
450 break;
451 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
452 new_data = g->gr.compbit_store.base_hw;
453 break;
454 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
455 new_data = g->gr.gobs_per_comptagline_per_slice;
456 break;
457 case TYPE_PARAM_SCATTERBUFFER:
458 new_data = cde_ctx->scatterbuffer_vaddr;
459 break;
460 case TYPE_PARAM_SCATTERBUFFER_SIZE:
461 new_data = cde_ctx->scatterbuffer_size;
462 break;
463 default:
464 user_id = param->id - NUM_RESERVED_PARAMS;
465 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
466 continue;
467 new_data = cde_ctx->user_param_values[user_id];
468 }
469
470 nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
471 i, param->id, param->target_buf,
472 param->target_byte_offset, new_data,
473 param->data_offset, param->type, param->shift,
474 param->mask);
475
476 new_data += param->data_offset;
477
478 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
479 param->shift, param->mask, new_data);
480
481 if (err) {
482 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
483 err, i, param->id, param->target_buf,
484 param->target_byte_offset, new_data);
485 return err;
486 }
487 }
488
489 return 0;
490}
491
492static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
493 struct nvgpu_firmware *img,
494 struct gk20a_cde_hdr_param *param)
495{
496 struct nvgpu_mem *target_mem;
497 struct nvgpu_os_linux *l = cde_ctx->l;
498 struct gk20a *g = &l->g;
499
500 if (param->target_buf >= cde_ctx->num_bufs) {
501 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
502 cde_ctx->num_params, param->target_buf,
503 cde_ctx->num_bufs);
504 return -EINVAL;
505 }
506
507 target_mem = cde_ctx->mem + param->target_buf;
508 if (target_mem->size < (param->target_byte_offset + 3)) {
509 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
510 cde_ctx->num_params, param->target_byte_offset,
511 target_mem->size);
512 return -EINVAL;
513 }
514
515 /* does this parameter fit into our parameter structure */
516 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
517 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
518 cde_ctx->num_params);
519 return -ENOMEM;
520 }
521
522 /* is the given id valid? */
523 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
524 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
525 param->id, cde_ctx->num_params,
526 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
527 return -EINVAL;
528 }
529
530 cde_ctx->params[cde_ctx->num_params] = *param;
531 cde_ctx->num_params++;
532
533 return 0;
534}
535
536static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
537 struct nvgpu_firmware *img,
538 u32 required_class)
539{
540 struct nvgpu_os_linux *l = cde_ctx->l;
541 struct gk20a *g = &l->g;
542 int err;
543
544 /* CDE enabled */
545 cde_ctx->ch->cde = true;
546
547 err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0);
548 if (err) {
549 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
550 err);
551 return err;
552 }
553
554 return 0;
555}
556
557static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
558 struct nvgpu_firmware *img,
559 u32 op,
560 struct gk20a_cde_cmd_elem *cmd_elem,
561 u32 num_elems)
562{
563 struct nvgpu_os_linux *l = cde_ctx->l;
564 struct gk20a *g = &l->g;
565 struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem;
566 u32 *num_entries;
567 unsigned int i;
568
569 /* check command type */
570 if (op == TYPE_BUF_COMMAND_INIT) {
571 gpfifo = &cde_ctx->init_convert_cmd;
572 num_entries = &cde_ctx->init_cmd_num_entries;
573 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
574 gpfifo = &cde_ctx->convert_cmd;
575 num_entries = &cde_ctx->convert_cmd_num_entries;
576 } else {
577 nvgpu_warn(g, "cde: unknown command. op=%u",
578 op);
579 return -EINVAL;
580 }
581
582 /* allocate gpfifo entries to be pushed */
583 *gpfifo = nvgpu_kzalloc(g,
584 sizeof(struct nvgpu_gpfifo_entry) * num_elems);
585 if (!*gpfifo) {
586 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
587 return -ENOMEM;
588 }
589
590 gpfifo_elem = *gpfifo;
591 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
592 struct nvgpu_mem *target_mem;
593
594 /* validate the current entry */
595 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
596 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
597 cmd_elem->target_buf, cde_ctx->num_bufs);
598 return -EINVAL;
599 }
600
601 target_mem = cde_ctx->mem + cmd_elem->target_buf;
602 if (target_mem->size<
603 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
604 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
605 target_mem->size,
606 cmd_elem->target_byte_offset,
607 cmd_elem->num_bytes);
608 return -EINVAL;
609 }
610
611 /* store the element into gpfifo */
612 gpfifo_elem->entry0 =
613 u64_lo32(target_mem->gpu_va +
614 cmd_elem->target_byte_offset);
615 gpfifo_elem->entry1 =
616 u64_hi32(target_mem->gpu_va +
617 cmd_elem->target_byte_offset) |
618 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
619 sizeof(u32));
620 }
621
622 *num_entries = num_elems;
623 return 0;
624}
625
626static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
627{
628 struct nvgpu_os_linux *l = cde_ctx->l;
629 struct gk20a *g = &l->g;
630 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
631 sizeof(struct nvgpu_gpfifo_entry);
632 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
633 sizeof(struct nvgpu_gpfifo_entry);
634 unsigned long total_bytes = init_bytes + conv_bytes;
635 struct nvgpu_gpfifo_entry *combined_cmd;
636
637 /* allocate buffer that has space for both */
638 combined_cmd = nvgpu_kzalloc(g, total_bytes);
639 if (!combined_cmd) {
640 nvgpu_warn(g,
641 "cde: could not allocate memory for gpfifo entries");
642 return -ENOMEM;
643 }
644
645 /* move the original init here and append convert */
646 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
647 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
648 cde_ctx->convert_cmd, conv_bytes);
649
650 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
651 nvgpu_kfree(g, cde_ctx->convert_cmd);
652
653 cde_ctx->init_convert_cmd = combined_cmd;
654 cde_ctx->convert_cmd = combined_cmd
655 + cde_ctx->init_cmd_num_entries;
656
657 return 0;
658}
659
660static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
661 struct nvgpu_firmware *img)
662{
663 struct nvgpu_os_linux *l = cde_ctx->l;
664 struct gk20a *g = &l->g;
665 struct gk20a_cde_app *cde_app = &l->cde_app;
666 u32 *data = (u32 *)img->data;
667 u32 num_of_elems;
668 struct gk20a_cde_hdr_elem *elem;
669 u32 min_size = 0;
670 int err = 0;
671 unsigned int i;
672
673 min_size += 2 * sizeof(u32);
674 if (img->size < min_size) {
675 nvgpu_warn(g, "cde: invalid image header");
676 return -EINVAL;
677 }
678
679 cde_app->firmware_version = data[0];
680 num_of_elems = data[1];
681
682 min_size += num_of_elems * sizeof(*elem);
683 if (img->size < min_size) {
684 nvgpu_warn(g, "cde: bad image");
685 return -EINVAL;
686 }
687
688 elem = (struct gk20a_cde_hdr_elem *)&data[2];
689 for (i = 0; i < num_of_elems; i++) {
690 int err = 0;
691 switch (elem->type) {
692 case TYPE_BUF:
693 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
694 break;
695 case TYPE_REPLACE:
696 err = gk20a_init_cde_replace(cde_ctx, img,
697 &elem->replace);
698 break;
699 case TYPE_PARAM:
700 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
701 break;
702 case TYPE_REQUIRED_CLASS:
703 err = gk20a_init_cde_required_class(cde_ctx, img,
704 elem->required_class);
705 break;
706 case TYPE_COMMAND:
707 {
708 struct gk20a_cde_cmd_elem *cmd = (void *)
709 &img->data[elem->command.data_byte_offset];
710 err = gk20a_init_cde_command(cde_ctx, img,
711 elem->command.op, cmd,
712 elem->command.num_entries);
713 break;
714 }
715 case TYPE_ARRAY:
716 memcpy(&cde_app->arrays[elem->array.id][0],
717 elem->array.data,
718 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
719 break;
720 default:
721 nvgpu_warn(g, "cde: unknown header element");
722 err = -EINVAL;
723 }
724
725 if (err)
726 goto deinit_image;
727
728 elem++;
729 }
730
731 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
732 nvgpu_warn(g, "cde: convert command not defined");
733 err = -EINVAL;
734 goto deinit_image;
735 }
736
737 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
738 nvgpu_warn(g, "cde: convert command not defined");
739 err = -EINVAL;
740 goto deinit_image;
741 }
742
743 err = gk20a_cde_pack_cmdbufs(cde_ctx);
744 if (err)
745 goto deinit_image;
746
747 return 0;
748
749deinit_image:
750 gk20a_deinit_cde_img(cde_ctx);
751 return err;
752}
753
754static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
755 u32 op, struct nvgpu_channel_fence *fence,
756 u32 flags, struct gk20a_fence **fence_out)
757{
758 struct nvgpu_os_linux *l = cde_ctx->l;
759 struct gk20a *g = &l->g;
760 struct nvgpu_gpfifo_entry *gpfifo = NULL;
761 int num_entries = 0;
762
763 /* check command type */
764 if (op == TYPE_BUF_COMMAND_INIT) {
765 /* both init and convert combined */
766 gpfifo = cde_ctx->init_convert_cmd;
767 num_entries = cde_ctx->init_cmd_num_entries
768 + cde_ctx->convert_cmd_num_entries;
769 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
770 gpfifo = cde_ctx->convert_cmd;
771 num_entries = cde_ctx->convert_cmd_num_entries;
772 } else if (op == TYPE_BUF_COMMAND_NOOP) {
773 /* Any non-null gpfifo will suffice with 0 num_entries */
774 gpfifo = cde_ctx->init_convert_cmd;
775 num_entries = 0;
776 } else {
777 nvgpu_warn(g, "cde: unknown buffer");
778 return -EINVAL;
779 }
780
781 if (gpfifo == NULL) {
782 nvgpu_warn(g, "cde: buffer not available");
783 return -ENOSYS;
784 }
785
786 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
787 num_entries, flags, fence, fence_out,
788 NULL);
789}
790
791static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
792__acquires(&cde_app->mutex)
793__releases(&cde_app->mutex)
794{
795 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
796 struct gk20a *g = &cde_ctx->l->g;
797
798 nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
799 trace_gk20a_cde_release(cde_ctx);
800
801 nvgpu_mutex_acquire(&cde_app->mutex);
802
803 if (cde_ctx->in_use) {
804 cde_ctx->in_use = false;
805 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
806 cde_app->ctx_usecount--;
807 } else {
808 nvgpu_log_info(g, "double release cde context %p", cde_ctx);
809 }
810
811 nvgpu_mutex_release(&cde_app->mutex);
812}
813
814static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
815__acquires(&cde_app->mutex)
816__releases(&cde_app->mutex)
817{
818 struct delayed_work *delay_work = to_delayed_work(work);
819 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
820 struct gk20a_cde_ctx, ctx_deleter_work);
821 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
822 struct nvgpu_os_linux *l = cde_ctx->l;
823 struct gk20a *g = &l->g;
824 int err;
825
826 /* someone has just taken it? engine deletion started? */
827 if (cde_ctx->in_use || !cde_app->initialised)
828 return;
829
830 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
831 "cde: attempting to delete temporary %p", cde_ctx);
832
833 err = gk20a_busy(g);
834 if (err) {
835 /* this context would find new use anyway later, so not freeing
836 * here does not leak anything */
837 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
838 " temp ctx deletion");
839 return;
840 }
841
842 nvgpu_mutex_acquire(&cde_app->mutex);
843 if (cde_ctx->in_use || !cde_app->initialised) {
844 nvgpu_log(g, gpu_dbg_cde_ctx,
845 "cde: context use raced, not deleting %p",
846 cde_ctx);
847 goto out;
848 }
849
850 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
851 "double pending %p", cde_ctx);
852
853 gk20a_cde_remove_ctx(cde_ctx);
854 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
855 "cde: destroyed %p count=%d use=%d max=%d",
856 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
857 cde_app->ctx_count_top);
858
859out:
860 nvgpu_mutex_release(&cde_app->mutex);
861 gk20a_idle(g);
862}
863
864static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
865__must_hold(&cde_app->mutex)
866{
867 struct gk20a *g = &l->g;
868 struct gk20a_cde_app *cde_app = &l->cde_app;
869 struct gk20a_cde_ctx *cde_ctx;
870
871 /* exhausted? */
872
873 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
874 return ERR_PTR(-EAGAIN);
875
876 /* idle context available? */
877
878 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
879 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
880 gk20a_cde_ctx, list);
881 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
882 "cde: got free %p count=%d use=%d max=%d",
883 cde_ctx, cde_app->ctx_count,
884 cde_app->ctx_usecount,
885 cde_app->ctx_count_top);
886 trace_gk20a_cde_get_context(cde_ctx);
887
888 /* deleter work may be scheduled, but in_use prevents it */
889 cde_ctx->in_use = true;
890 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
891 cde_app->ctx_usecount++;
892
893 /* cancel any deletions now that ctx is in use */
894 gk20a_cde_cancel_deleter(cde_ctx, true);
895 return cde_ctx;
896 }
897
898 /* no free contexts, get a temporary one */
899
900 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx,
901 "cde: no free contexts, count=%d",
902 cde_app->ctx_count);
903
904 cde_ctx = gk20a_cde_allocate_context(l);
905 if (IS_ERR(cde_ctx)) {
906 nvgpu_warn(g, "cde: cannot allocate context: %ld",
907 PTR_ERR(cde_ctx));
908 return cde_ctx;
909 }
910
911 trace_gk20a_cde_get_context(cde_ctx);
912 cde_ctx->in_use = true;
913 cde_ctx->is_temporary = true;
914 cde_app->ctx_usecount++;
915 cde_app->ctx_count++;
916 if (cde_app->ctx_count > cde_app->ctx_count_top)
917 cde_app->ctx_count_top = cde_app->ctx_count;
918 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
919
920 return cde_ctx;
921}
922
923static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
924__releases(&cde_app->mutex)
925__acquires(&cde_app->mutex)
926{
927 struct gk20a *g = &l->g;
928 struct gk20a_cde_app *cde_app = &l->cde_app;
929 struct gk20a_cde_ctx *cde_ctx = NULL;
930 struct nvgpu_timeout timeout;
931
932 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
933 NVGPU_TIMER_CPU_TIMER);
934
935 do {
936 cde_ctx = gk20a_cde_do_get_context(l);
937 if (PTR_ERR(cde_ctx) != -EAGAIN)
938 break;
939
940 /* exhausted, retry */
941 nvgpu_mutex_release(&cde_app->mutex);
942 cond_resched();
943 nvgpu_mutex_acquire(&cde_app->mutex);
944 } while (!nvgpu_timeout_expired(&timeout));
945
946 return cde_ctx;
947}
948
949static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
950{
951 struct gk20a *g = &l->g;
952 struct gk20a_cde_ctx *cde_ctx;
953 int ret;
954
955 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
956 if (!cde_ctx)
957 return ERR_PTR(-ENOMEM);
958
959 cde_ctx->l = l;
960 cde_ctx->dev = dev_from_gk20a(g);
961
962 ret = gk20a_cde_load(cde_ctx);
963 if (ret) {
964 nvgpu_kfree(g, cde_ctx);
965 return ERR_PTR(ret);
966 }
967
968 nvgpu_init_list_node(&cde_ctx->list);
969 cde_ctx->is_temporary = false;
970 cde_ctx->in_use = false;
971 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
972 gk20a_cde_ctx_deleter_fn);
973
974 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
975 trace_gk20a_cde_allocate_context(cde_ctx);
976 return cde_ctx;
977}
978
979int gk20a_cde_convert(struct nvgpu_os_linux *l,
980 struct dma_buf *compbits_scatter_buf,
981 u64 compbits_byte_offset,
982 u64 scatterbuffer_byte_offset,
983 struct nvgpu_channel_fence *fence,
984 u32 __flags, struct gk20a_cde_param *params,
985 int num_params, struct gk20a_fence **fence_out)
986__acquires(&l->cde_app->mutex)
987__releases(&l->cde_app->mutex)
988{
989 struct gk20a *g = &l->g;
990 struct gk20a_cde_ctx *cde_ctx = NULL;
991 struct gk20a_comptags comptags;
992 struct nvgpu_os_buffer os_buf = {
993 compbits_scatter_buf,
994 NULL,
995 dev_from_gk20a(g)
996 };
997 u64 mapped_compbits_offset = 0;
998 u64 compbits_size = 0;
999 u64 mapped_scatterbuffer_offset = 0;
1000 u64 scatterbuffer_size = 0;
1001 u64 map_vaddr = 0;
1002 u64 map_offset = 0;
1003 u64 map_size = 0;
1004 u8 *surface = NULL;
1005 u64 big_page_mask = 0;
1006 u32 flags;
1007 int err, i;
1008 const s16 compbits_kind = 0;
1009 u32 submit_op;
1010 struct dma_buf_attachment *attachment;
1011
1012 nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
1013 compbits_byte_offset, scatterbuffer_byte_offset);
1014
1015 /* scatter buffer must be after compbits buffer */
1016 if (scatterbuffer_byte_offset &&
1017 scatterbuffer_byte_offset < compbits_byte_offset)
1018 return -EINVAL;
1019
1020 err = gk20a_busy(g);
1021 if (err)
1022 return err;
1023
1024 nvgpu_mutex_acquire(&l->cde_app.mutex);
1025 cde_ctx = gk20a_cde_get_context(l);
1026 nvgpu_mutex_release(&l->cde_app.mutex);
1027 if (IS_ERR(cde_ctx)) {
1028 err = PTR_ERR(cde_ctx);
1029 goto exit_idle;
1030 }
1031
1032 /* First, map the buffer to local va */
1033
1034 /* ensure that the compbits buffer has drvdata */
1035 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
1036 dev_from_gk20a(g));
1037 if (err)
1038 goto exit_idle;
1039
1040 /* compbits don't start at page aligned offset, so we need to align
1041 the region to be mapped */
1042 big_page_mask = cde_ctx->vm->big_page_size - 1;
1043 map_offset = compbits_byte_offset & ~big_page_mask;
1044 map_size = compbits_scatter_buf->size - map_offset;
1045
1046
1047 /* compute compbit start offset from the beginning of the mapped
1048 area */
1049 mapped_compbits_offset = compbits_byte_offset - map_offset;
1050 if (scatterbuffer_byte_offset) {
1051 compbits_size = scatterbuffer_byte_offset -
1052 compbits_byte_offset;
1053 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1054 map_offset;
1055 scatterbuffer_size = compbits_scatter_buf->size -
1056 scatterbuffer_byte_offset;
1057 } else {
1058 compbits_size = compbits_scatter_buf->size -
1059 compbits_byte_offset;
1060 }
1061
1062 nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1063 map_offset, map_size);
1064 nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1065 mapped_compbits_offset, compbits_size);
1066 nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1067 mapped_scatterbuffer_offset, scatterbuffer_size);
1068
1069
1070 /* map the destination buffer */
1071 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */
1072 err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
1073 NVGPU_VM_MAP_CACHEABLE |
1074 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
1075 NVGPU_KIND_INVALID,
1076 compbits_kind, /* incompressible kind */
1077 gk20a_mem_flag_none,
1078 map_offset, map_size,
1079 NULL,
1080 &map_vaddr);
1081 if (err) {
1082 dma_buf_put(compbits_scatter_buf);
1083 err = -EINVAL;
1084 goto exit_idle;
1085 }
1086
1087 if (scatterbuffer_byte_offset &&
1088 l->ops.cde.need_scatter_buffer &&
1089 l->ops.cde.need_scatter_buffer(g)) {
1090 struct sg_table *sgt;
1091 void *scatter_buffer;
1092
1093 surface = dma_buf_vmap(compbits_scatter_buf);
1094 if (IS_ERR(surface)) {
1095 nvgpu_warn(g,
1096 "dma_buf_vmap failed");
1097 err = -EINVAL;
1098 goto exit_unmap_vaddr;
1099 }
1100
1101 scatter_buffer = surface + scatterbuffer_byte_offset;
1102
1103 nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1104 surface, scatter_buffer);
1105 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf,
1106 &attachment);
1107 if (IS_ERR(sgt)) {
1108 nvgpu_warn(g,
1109 "mm_pin failed");
1110 err = -EINVAL;
1111 goto exit_unmap_surface;
1112 } else {
1113 err = l->ops.cde.populate_scatter_buffer(g, sgt,
1114 compbits_byte_offset, scatter_buffer,
1115 scatterbuffer_size);
1116 WARN_ON(err);
1117
1118 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1119 attachment, sgt);
1120 if (err)
1121 goto exit_unmap_surface;
1122 }
1123
1124 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1125 dma_buf_vunmap(compbits_scatter_buf, surface);
1126 surface = NULL;
1127 }
1128
1129 /* store source buffer compression tags */
1130 gk20a_get_comptags(&os_buf, &comptags);
1131 cde_ctx->surf_param_offset = comptags.offset;
1132 cde_ctx->surf_param_lines = comptags.lines;
1133
1134 /* store surface vaddr. This is actually compbit vaddr, but since
1135 compbits live in the same surface, and we can get the alloc base
1136 address by using gpuva_to_iova_base, this will do */
1137 cde_ctx->surf_vaddr = map_vaddr;
1138
1139 /* store information about destination */
1140 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1141 cde_ctx->compbit_size = compbits_size;
1142
1143 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1144 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1145
1146 /* remove existing argument data */
1147 memset(cde_ctx->user_param_values, 0,
1148 sizeof(cde_ctx->user_param_values));
1149
1150 /* read user space arguments for the conversion */
1151 for (i = 0; i < num_params; i++) {
1152 struct gk20a_cde_param *param = params + i;
1153 int id = param->id - NUM_RESERVED_PARAMS;
1154
1155 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1156 nvgpu_warn(g, "cde: unknown user parameter");
1157 err = -EINVAL;
1158 goto exit_unmap_surface;
1159 }
1160 cde_ctx->user_param_values[id] = param->value;
1161 }
1162
1163 /* patch data */
1164 err = gk20a_cde_patch_params(cde_ctx);
1165 if (err) {
1166 nvgpu_warn(g, "cde: failed to patch parameters");
1167 goto exit_unmap_surface;
1168 }
1169
1170 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1171 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1172 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1173 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1174 nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1175 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1176
1177 /* take always the postfence as it is needed for protecting the
1178 * cde context */
1179 flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
1180
1181 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1182 gk20a_idle(g);
1183
1184 if (comptags.lines == 0) {
1185 /*
1186 * Nothing to do on the buffer, but do a null kickoff for
1187 * managing the pre and post fences.
1188 */
1189 submit_op = TYPE_BUF_COMMAND_NOOP;
1190 } else if (!cde_ctx->init_cmd_executed) {
1191 /*
1192 * First time, so include the init pushbuf too in addition to
1193 * the conversion code.
1194 */
1195 submit_op = TYPE_BUF_COMMAND_INIT;
1196 } else {
1197 /*
1198 * The usual condition: execute just the conversion.
1199 */
1200 submit_op = TYPE_BUF_COMMAND_CONVERT;
1201 }
1202 err = gk20a_cde_execute_buffer(cde_ctx, submit_op,
1203 fence, flags, fence_out);
1204
1205 if (comptags.lines != 0 && !err)
1206 cde_ctx->init_cmd_executed = true;
1207
1208 /* unmap the buffers - channel holds references to them now */
1209 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1210
1211 return err;
1212
1213exit_unmap_surface:
1214 if (surface)
1215 dma_buf_vunmap(compbits_scatter_buf, surface);
1216exit_unmap_vaddr:
1217 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
1218exit_idle:
1219 gk20a_idle(g);
1220 return err;
1221}
1222
1223static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1224__acquires(&cde_app->mutex)
1225__releases(&cde_app->mutex)
1226{
1227 struct gk20a_cde_ctx *cde_ctx = data;
1228 struct nvgpu_os_linux *l = cde_ctx->l;
1229 struct gk20a *g = &l->g;
1230 struct gk20a_cde_app *cde_app = &l->cde_app;
1231 bool channel_idle;
1232
1233 channel_gk20a_joblist_lock(ch);
1234 channel_idle = channel_gk20a_joblist_is_empty(ch);
1235 channel_gk20a_joblist_unlock(ch);
1236
1237 if (!channel_idle)
1238 return;
1239
1240 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1241 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1242 if (!cde_ctx->in_use)
1243 nvgpu_log_info(g, "double finish cde context %p on channel %p",
1244 cde_ctx, ch);
1245
1246 if (ch->has_timedout) {
1247 if (cde_ctx->is_temporary) {
1248 nvgpu_warn(g,
1249 "cde: channel had timed out"
1250 " (temporary channel)");
1251 /* going to be deleted anyway */
1252 } else {
1253 nvgpu_warn(g,
1254 "cde: channel had timed out"
1255 ", reloading");
1256 /* mark it to be deleted, replace with a new one */
1257 nvgpu_mutex_acquire(&cde_app->mutex);
1258 cde_ctx->is_temporary = true;
1259 if (gk20a_cde_create_context(l)) {
1260 nvgpu_err(g, "cde: can't replace context");
1261 }
1262 nvgpu_mutex_release(&cde_app->mutex);
1263 }
1264 }
1265
1266 /* delete temporary contexts later (watch for doubles) */
1267 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1268 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1269 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1270 msecs_to_jiffies(CTX_DELETE_TIME));
1271 }
1272
1273 if (!ch->has_timedout)
1274 gk20a_cde_ctx_release(cde_ctx);
1275}
1276
1277static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1278{
1279 struct nvgpu_os_linux *l = cde_ctx->l;
1280 struct gk20a *g = &l->g;
1281 struct nvgpu_firmware *img;
1282 struct channel_gk20a *ch;
1283 struct tsg_gk20a *tsg;
1284 struct gr_gk20a *gr = &g->gr;
1285 struct nvgpu_gpfifo_args gpfifo_args;
1286 int err = 0;
1287 u64 vaddr;
1288
1289 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1290 if (!img) {
1291 nvgpu_err(g, "cde: could not fetch the firmware");
1292 return -ENOSYS;
1293 }
1294
1295 tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
1296 if (!tsg) {
1297 nvgpu_err(g, "cde: could not create TSG");
1298 err = -ENOMEM;
1299 goto err_get_gk20a_channel;
1300 }
1301
1302 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1303 cde_ctx,
1304 -1,
1305 false);
1306 if (!ch) {
1307 nvgpu_warn(g, "cde: gk20a channel not available");
1308 err = -ENOMEM;
1309 goto err_get_gk20a_channel;
1310 }
1311
1312 ch->timeout.enabled = false;
1313
1314 /* bind the channel to the vm */
1315 err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
1316 if (err) {
1317 nvgpu_warn(g, "cde: could not bind vm");
1318 goto err_commit_va;
1319 }
1320
1321 err = gk20a_tsg_bind_channel(tsg, ch);
1322 if (err) {
1323 nvgpu_err(g, "cde: unable to bind to tsg");
1324 goto err_alloc_gpfifo;
1325 }
1326
1327 gpfifo_args.num_entries = 1024;
1328 gpfifo_args.num_inflight_jobs = 0;
1329 gpfifo_args.flags = 0;
1330 /* allocate gpfifo (1024 should be more than enough) */
1331 err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args);
1332 if (err) {
1333 nvgpu_warn(g, "cde: unable to allocate gpfifo");
1334 goto err_alloc_gpfifo;
1335 }
1336
1337 /* map backing store to gpu virtual space */
1338 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1339 g->gr.compbit_store.mem.size,
1340 NVGPU_VM_MAP_CACHEABLE,
1341 gk20a_mem_flag_read_only,
1342 false,
1343 gr->compbit_store.mem.aperture);
1344
1345 if (!vaddr) {
1346 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1347 err = -ENOMEM;
1348 goto err_map_backingstore;
1349 }
1350
1351 /* store initialisation data */
1352 cde_ctx->ch = ch;
1353 cde_ctx->tsg = tsg;
1354 cde_ctx->vm = ch->vm;
1355 cde_ctx->backing_store_vaddr = vaddr;
1356
1357 /* initialise the firmware */
1358 err = gk20a_init_cde_img(cde_ctx, img);
1359 if (err) {
1360 nvgpu_warn(g, "cde: image initialisation failed");
1361 goto err_init_cde_img;
1362 }
1363
1364 /* initialisation done */
1365 nvgpu_release_firmware(g, img);
1366
1367 return 0;
1368
1369err_init_cde_img:
1370 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1371err_map_backingstore:
1372err_alloc_gpfifo:
1373 nvgpu_vm_put(ch->vm);
1374err_commit_va:
1375err_get_gk20a_channel:
1376 nvgpu_release_firmware(g, img);
1377 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1378 return err;
1379}
1380
1381int gk20a_cde_reload(struct nvgpu_os_linux *l)
1382__acquires(&l->cde_app->mutex)
1383__releases(&l->cde_app->mutex)
1384{
1385 struct gk20a *g = &l->g;
1386 struct gk20a_cde_app *cde_app = &l->cde_app;
1387 int err;
1388
1389 if (!cde_app->initialised)
1390 return -ENOSYS;
1391
1392 err = gk20a_busy(g);
1393 if (err)
1394 return err;
1395
1396 nvgpu_mutex_acquire(&cde_app->mutex);
1397
1398 gk20a_cde_stop(l);
1399
1400 err = gk20a_cde_create_contexts(l);
1401 if (!err)
1402 cde_app->initialised = true;
1403
1404 nvgpu_mutex_release(&cde_app->mutex);
1405
1406 gk20a_idle(g);
1407 return err;
1408}
1409
1410int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1411__acquires(&cde_app->mutex)
1412__releases(&cde_app->mutex)
1413{
1414 struct gk20a_cde_app *cde_app = &l->cde_app;
1415 struct gk20a *g = &l->g;
1416 int err;
1417
1418 if (cde_app->initialised)
1419 return 0;
1420
1421 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1422
1423 err = nvgpu_mutex_init(&cde_app->mutex);
1424 if (err)
1425 return err;
1426
1427 nvgpu_mutex_acquire(&cde_app->mutex);
1428
1429 nvgpu_init_list_node(&cde_app->free_contexts);
1430 nvgpu_init_list_node(&cde_app->used_contexts);
1431 cde_app->ctx_count = 0;
1432 cde_app->ctx_count_top = 0;
1433 cde_app->ctx_usecount = 0;
1434
1435 err = gk20a_cde_create_contexts(l);
1436 if (!err)
1437 cde_app->initialised = true;
1438
1439 nvgpu_mutex_release(&cde_app->mutex);
1440 nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1441
1442 if (err)
1443 nvgpu_mutex_destroy(&cde_app->mutex);
1444
1445 return err;
1446}
1447
1448enum cde_launch_patch_id {
1449 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1450 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1451 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1452 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1453 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1454 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1455 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1456 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1457 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1458 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1459 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1460 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1461 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1462 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1463 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1464 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1465 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1466 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1467 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1468 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1469 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1470 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1471 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1472 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1473 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1474 PATCH_H_LAUNCH_WORD1_ID = 1049,
1475 PATCH_H_LAUNCH_WORD2_ID = 1050,
1476 PATCH_V_LAUNCH_WORD1_ID = 1051,
1477 PATCH_V_LAUNCH_WORD2_ID = 1052,
1478 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1479 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1480 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1481 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1482};
1483
1484/* maximum number of WRITE_PATCHes in the below function */
1485#define MAX_CDE_LAUNCH_PATCHES 32
1486
1487static int gk20a_buffer_convert_gpu_to_cde_v1(
1488 struct nvgpu_os_linux *l,
1489 struct dma_buf *dmabuf, u32 consumer,
1490 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1491 u64 scatterbuffer_offset,
1492 u32 width, u32 height, u32 block_height_log2,
1493 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1494 struct gk20a_buffer_state *state)
1495{
1496 struct gk20a *g = &l->g;
1497 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1498 int param = 0;
1499 int err = 0;
1500 struct gk20a_fence *new_fence = NULL;
1501 const int wgx = 8;
1502 const int wgy = 8;
1503 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1504 const int xalign = compbits_per_byte * wgx;
1505 const int yalign = wgy;
1506
1507 /* Compute per launch parameters */
1508 const int xtiles = (width + 7) >> 3;
1509 const int ytiles = (height + 7) >> 3;
1510 const int gridw_h = roundup(xtiles, xalign) / xalign;
1511 const int gridh_h = roundup(ytiles, yalign) / yalign;
1512 const int gridw_v = roundup(ytiles, xalign) / xalign;
1513 const int gridh_v = roundup(xtiles, yalign) / yalign;
1514 const int xblocks = (xtiles + 1) >> 1;
1515 const int voffset = compbits_voffset - compbits_hoffset;
1516
1517 int hprog = -1;
1518 int vprog = -1;
1519
1520 if (l->ops.cde.get_program_numbers)
1521 l->ops.cde.get_program_numbers(g, block_height_log2,
1522 l->cde_app.shader_parameter,
1523 &hprog, &vprog);
1524 else {
1525 nvgpu_warn(g, "cde: chip not supported");
1526 return -ENOSYS;
1527 }
1528
1529 if (hprog < 0 || vprog < 0) {
1530 nvgpu_warn(g, "cde: could not determine programs");
1531 return -ENOSYS;
1532 }
1533
1534 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1535 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1536 xtiles, ytiles);
1537
1538 nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1539 width, height, block_height_log2,
1540 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1541 nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1542 width, height, xtiles, ytiles);
1543 nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1544 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1545 nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1546 hprog,
1547 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1548 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1549 vprog,
1550 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1551 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1552
1553 /* Write parameters */
1554#define WRITE_PATCH(NAME, VALUE) \
1555 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1556 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1557 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1558 block_height_log2);
1559 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1560 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1561 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1562 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1563 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1564
1565 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1566 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1567 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1568 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1569 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1570 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1571
1572 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1573 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1574 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1575 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1576 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1577 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1578
1579 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1580 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1581 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1582 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1583 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1584 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1585 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1586 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1587
1588 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1589 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1590 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1591 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1592 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1593 } else {
1594 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1595 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1596 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1597 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1598 }
1599
1600 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1601 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1602 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1603 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1604 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1605 } else {
1606 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1607 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1608 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1609 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1610 }
1611#undef WRITE_PATCH
1612
1613 err = gk20a_cde_convert(l, dmabuf,
1614 compbits_hoffset,
1615 scatterbuffer_offset,
1616 fence_in, submit_flags,
1617 params, param, &new_fence);
1618 if (err)
1619 goto out;
1620
1621 /* compbits generated, update state & fence */
1622 gk20a_fence_put(state->fence);
1623 state->fence = new_fence;
1624 state->valid_compbits |= consumer &
1625 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1626out:
1627 return err;
1628}
1629
1630static int gk20a_buffer_convert_gpu_to_cde(
1631 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1632 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1633 u64 scatterbuffer_offset,
1634 u32 width, u32 height, u32 block_height_log2,
1635 u32 submit_flags, struct nvgpu_channel_fence *fence_in,
1636 struct gk20a_buffer_state *state)
1637{
1638 struct gk20a *g = &l->g;
1639 int err = 0;
1640
1641 if (!l->cde_app.initialised)
1642 return -ENOSYS;
1643
1644 nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n",
1645 l->cde_app.firmware_version);
1646
1647 if (l->cde_app.firmware_version == 1) {
1648 err = gk20a_buffer_convert_gpu_to_cde_v1(
1649 l, dmabuf, consumer, offset, compbits_hoffset,
1650 compbits_voffset, scatterbuffer_offset,
1651 width, height, block_height_log2,
1652 submit_flags, fence_in, state);
1653 } else {
1654 nvgpu_err(g, "unsupported CDE firmware version %d",
1655 l->cde_app.firmware_version);
1656 err = -EINVAL;
1657 }
1658
1659 return err;
1660}
1661
1662int gk20a_prepare_compressible_read(
1663 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1664 u64 compbits_hoffset, u64 compbits_voffset,
1665 u64 scatterbuffer_offset,
1666 u32 width, u32 height, u32 block_height_log2,
1667 u32 submit_flags, struct nvgpu_channel_fence *fence,
1668 u32 *valid_compbits, u32 *zbc_color,
1669 struct gk20a_fence **fence_out)
1670{
1671 struct gk20a *g = &l->g;
1672 int err = 0;
1673 struct gk20a_buffer_state *state;
1674 struct dma_buf *dmabuf;
1675 u32 missing_bits;
1676
1677 dmabuf = dma_buf_get(buffer_fd);
1678 if (IS_ERR(dmabuf))
1679 return -EINVAL;
1680
1681 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1682 if (err) {
1683 dma_buf_put(dmabuf);
1684 return err;
1685 }
1686
1687 missing_bits = (state->valid_compbits ^ request) & request;
1688
1689 nvgpu_mutex_acquire(&state->lock);
1690
1691 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1692
1693 gk20a_fence_put(state->fence);
1694 state->fence = NULL;
1695 /* state->fence = decompress();
1696 state->valid_compbits = 0; */
1697 err = -EINVAL;
1698 goto out;
1699 } else if (missing_bits) {
1700 u32 missing_cde_bits = missing_bits &
1701 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1702 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1703 missing_cde_bits) {
1704 err = gk20a_buffer_convert_gpu_to_cde(
1705 l, dmabuf,
1706 missing_cde_bits,
1707 offset, compbits_hoffset,
1708 compbits_voffset, scatterbuffer_offset,
1709 width, height, block_height_log2,
1710 submit_flags, fence,
1711 state);
1712 if (err)
1713 goto out;
1714 }
1715 }
1716
1717 if (state->fence && fence_out)
1718 *fence_out = gk20a_fence_get(state->fence);
1719
1720 if (valid_compbits)
1721 *valid_compbits = state->valid_compbits;
1722
1723 if (zbc_color)
1724 *zbc_color = state->zbc_color;
1725
1726out:
1727 nvgpu_mutex_release(&state->lock);
1728 dma_buf_put(dmabuf);
1729 return err;
1730}
1731
1732int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1733 u32 valid_compbits, u64 offset, u32 zbc_color)
1734{
1735 int err;
1736 struct gk20a_buffer_state *state;
1737 struct dma_buf *dmabuf;
1738
1739 dmabuf = dma_buf_get(buffer_fd);
1740 if (IS_ERR(dmabuf)) {
1741 nvgpu_err(g, "invalid dmabuf");
1742 return -EINVAL;
1743 }
1744
1745 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1746 if (err) {
1747 nvgpu_err(g, "could not get state from dmabuf");
1748 dma_buf_put(dmabuf);
1749 return err;
1750 }
1751
1752 nvgpu_mutex_acquire(&state->lock);
1753
1754 /* Update the compbits state. */
1755 state->valid_compbits = valid_compbits;
1756 state->zbc_color = zbc_color;
1757
1758 /* Discard previous compbit job fence. */
1759 gk20a_fence_put(state->fence);
1760 state->fence = NULL;
1761
1762 nvgpu_mutex_release(&state->lock);
1763 dma_buf_put(dmabuf);
1764 return 0;
1765}
1766
1767int nvgpu_cde_init_ops(struct nvgpu_os_linux *l)
1768{
1769 struct gk20a *g = &l->g;
1770 u32 ver = g->params.gpu_arch + g->params.gpu_impl;
1771
1772 switch (ver) {
1773 case GK20A_GPUID_GM20B:
1774 case GK20A_GPUID_GM20B_B:
1775 l->ops.cde = gm20b_cde_ops.cde;
1776 break;
1777 case NVGPU_GPUID_GP10B:
1778 l->ops.cde = gp10b_cde_ops.cde;
1779 break;
1780 default:
1781 /* CDE is optional, so today ignoring unknown chip is fine */
1782 break;
1783 }
1784
1785 return 0;
1786}