summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-09-07 13:43:47 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-11 18:10:52 -0400
commitc37c9baae65bcf0ef08a319488c09f57131026cc (patch)
treee3b4252bfff7436574a909dd625de49229d538da /drivers/gpu/nvgpu/common/linux
parent17451138cf60f5d64eed88cc5defd44981926d9d (diff)
gpu: nvgpu: Move CDE code to Linux module
CDE is only used in Linux platforms, and the code is highly dependent on Linux APIs. Move the common CDE code to Linux module and leave only the chip specific parts to HAL. Change-Id: I507fe7eceaf7607303dfdddcf438449a5f582ea7 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554755 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.c1693
-rw-r--r--drivers/gpu/nvgpu/common/linux/cde.h309
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_cde.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c3
-rw-r--r--drivers/gpu/nvgpu/common/linux/module.c14
-rw-r--r--drivers/gpu/nvgpu/common/linux/os_linux.h2
6 files changed, 2025 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
new file mode 100644
index 00000000..5b0fb910
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -0,0 +1,1693 @@
1/*
2 * Color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/dma-mapping.h>
20#include <linux/fs.h>
21#include <linux/dma-buf.h>
22
23#include <trace/events/gk20a.h>
24
25#include <nvgpu/dma.h>
26#include <nvgpu/gmmu.h>
27#include <nvgpu/timers.h>
28#include <nvgpu/nvgpu_common.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/log.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/firmware.h>
33
34#include "gk20a/gk20a.h"
35#include "gk20a/channel_gk20a.h"
36#include "gk20a/mm_gk20a.h"
37#include "gk20a/fence_gk20a.h"
38#include "gk20a/gr_gk20a.h"
39
40#include "cde.h"
41#include "os_linux.h"
42
43#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
45
46/*
47 * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the
48 * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem.
49 */
50#include "common/linux/vm_priv.h"
51
52static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx);
53static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l);
54
55#define CTX_DELETE_TIME 1000
56
57#define MAX_CTX_USE_COUNT 42
58#define MAX_CTX_RETRY_TIME 2000
59
60static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx)
61{
62 unsigned int i;
63
64 for (i = 0; i < cde_ctx->num_bufs; i++) {
65 struct nvgpu_mem *mem = cde_ctx->mem + i;
66 nvgpu_dma_unmap_free(cde_ctx->vm, mem);
67 }
68
69 nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd);
70
71 cde_ctx->convert_cmd = NULL;
72 cde_ctx->init_convert_cmd = NULL;
73 cde_ctx->num_bufs = 0;
74 cde_ctx->num_params = 0;
75 cde_ctx->init_cmd_num_entries = 0;
76 cde_ctx->convert_cmd_num_entries = 0;
77 cde_ctx->init_cmd_executed = false;
78}
79
80static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx)
81__must_hold(&cde_app->mutex)
82{
83 struct nvgpu_os_linux *l = cde_ctx->l;
84 struct gk20a *g = &l->g;
85 struct channel_gk20a *ch = cde_ctx->ch;
86 struct vm_gk20a *vm = ch->vm;
87
88 trace_gk20a_cde_remove_ctx(cde_ctx);
89
90 /* release mapped memory */
91 gk20a_deinit_cde_img(cde_ctx);
92 nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem,
93 cde_ctx->backing_store_vaddr);
94
95 /* free the channel */
96 gk20a_channel_close(ch);
97
98 /* housekeeping on app */
99 nvgpu_list_del(&cde_ctx->list);
100 l->cde_app.ctx_count--;
101 nvgpu_kfree(g, cde_ctx);
102}
103
104static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx,
105 bool wait_finish)
106__releases(&cde_app->mutex)
107__acquires(&cde_app->mutex)
108{
109 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
110
111 /* permanent contexts do not have deleter works */
112 if (!cde_ctx->is_temporary)
113 return;
114
115 if (wait_finish) {
116 nvgpu_mutex_release(&cde_app->mutex);
117 cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work);
118 nvgpu_mutex_acquire(&cde_app->mutex);
119 } else {
120 cancel_delayed_work(&cde_ctx->ctx_deleter_work);
121 }
122}
123
124static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l)
125__must_hold(&l->cde_app->mutex)
126{
127 struct gk20a_cde_app *cde_app = &l->cde_app;
128 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
129
130 /* safe to go off the mutex in cancel_deleter since app is
131 * deinitialised; no new jobs are started. deleter works may be only at
132 * waiting for the mutex or before, going to abort */
133
134 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
135 &cde_app->free_contexts, gk20a_cde_ctx, list) {
136 gk20a_cde_cancel_deleter(cde_ctx, true);
137 gk20a_cde_remove_ctx(cde_ctx);
138 }
139
140 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
141 &cde_app->used_contexts, gk20a_cde_ctx, list) {
142 gk20a_cde_cancel_deleter(cde_ctx, true);
143 gk20a_cde_remove_ctx(cde_ctx);
144 }
145}
146
147static void gk20a_cde_stop(struct nvgpu_os_linux *l)
148__must_hold(&l->cde_app->mutex)
149{
150 struct gk20a_cde_app *cde_app = &l->cde_app;
151
152 /* prevent further conversions and delayed works from working */
153 cde_app->initialised = false;
154 /* free all data, empty the list */
155 gk20a_cde_remove_contexts(l);
156}
157
158void gk20a_cde_destroy(struct nvgpu_os_linux *l)
159__acquires(&l->cde_app->mutex)
160__releases(&l->cde_app->mutex)
161{
162 struct gk20a_cde_app *cde_app = &l->cde_app;
163
164 if (!cde_app->initialised)
165 return;
166
167 nvgpu_mutex_acquire(&cde_app->mutex);
168 gk20a_cde_stop(l);
169 nvgpu_mutex_release(&cde_app->mutex);
170
171 nvgpu_mutex_destroy(&cde_app->mutex);
172}
173
174void gk20a_cde_suspend(struct nvgpu_os_linux *l)
175__acquires(&l->cde_app->mutex)
176__releases(&l->cde_app->mutex)
177{
178 struct gk20a_cde_app *cde_app = &l->cde_app;
179 struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save;
180
181 if (!cde_app->initialised)
182 return;
183
184 nvgpu_mutex_acquire(&cde_app->mutex);
185
186 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
187 &cde_app->free_contexts, gk20a_cde_ctx, list) {
188 gk20a_cde_cancel_deleter(cde_ctx, false);
189 }
190
191 nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save,
192 &cde_app->used_contexts, gk20a_cde_ctx, list) {
193 gk20a_cde_cancel_deleter(cde_ctx, false);
194 }
195
196 nvgpu_mutex_release(&cde_app->mutex);
197
198}
199
200static int gk20a_cde_create_context(struct nvgpu_os_linux *l)
201__must_hold(&l->cde_app->mutex)
202{
203 struct gk20a_cde_app *cde_app = &l->cde_app;
204 struct gk20a_cde_ctx *cde_ctx;
205
206 cde_ctx = gk20a_cde_allocate_context(l);
207 if (IS_ERR(cde_ctx))
208 return PTR_ERR(cde_ctx);
209
210 nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts);
211 cde_app->ctx_count++;
212 if (cde_app->ctx_count > cde_app->ctx_count_top)
213 cde_app->ctx_count_top = cde_app->ctx_count;
214
215 return 0;
216}
217
218static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l)
219__must_hold(&l->cde_app->mutex)
220{
221 int err;
222 int i;
223
224 for (i = 0; i < NUM_CDE_CONTEXTS; i++) {
225 err = gk20a_cde_create_context(l);
226 if (err)
227 goto out;
228 }
229
230 return 0;
231out:
232 gk20a_cde_remove_contexts(l);
233 return err;
234}
235
236static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx,
237 struct nvgpu_firmware *img,
238 struct gk20a_cde_hdr_buf *buf)
239{
240 struct nvgpu_mem *mem;
241 struct nvgpu_os_linux *l = cde_ctx->l;
242 struct gk20a *g = &l->g;
243 int err;
244
245 /* check that the file can hold the buf */
246 if (buf->data_byte_offset != 0 &&
247 buf->data_byte_offset + buf->num_bytes > img->size) {
248 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
249 cde_ctx->num_bufs);
250 return -EINVAL;
251 }
252
253 /* check that we have enough buf elems available */
254 if (cde_ctx->num_bufs >= MAX_CDE_BUFS) {
255 nvgpu_warn(g, "cde: invalid data section. buffer idx = %d",
256 cde_ctx->num_bufs);
257 return -ENOMEM;
258 }
259
260 /* allocate buf */
261 mem = cde_ctx->mem + cde_ctx->num_bufs;
262 err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem);
263 if (err) {
264 nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d",
265 cde_ctx->num_bufs);
266 return -ENOMEM;
267 }
268
269 /* copy the content */
270 if (buf->data_byte_offset != 0)
271 memcpy(mem->cpu_va, img->data + buf->data_byte_offset,
272 buf->num_bytes);
273
274 cde_ctx->num_bufs++;
275
276 return 0;
277}
278
279static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target,
280 int type, s32 shift, u64 mask, u64 value)
281{
282 struct nvgpu_os_linux *l = cde_ctx->l;
283 struct gk20a *g = &l->g;
284 u32 *target_mem_ptr = target;
285 u64 *target_mem_ptr_u64 = target;
286 u64 current_value, new_value;
287
288 value = (shift >= 0) ? value << shift : value >> -shift;
289 value &= mask;
290
291 /* read current data from the location */
292 current_value = 0;
293 if (type == TYPE_PARAM_TYPE_U32) {
294 if (mask != 0xfffffffful)
295 current_value = *target_mem_ptr;
296 } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) {
297 if (mask != ~0ul)
298 current_value = *target_mem_ptr_u64;
299 } else if (type == TYPE_PARAM_TYPE_U64_BIG) {
300 current_value = *target_mem_ptr_u64;
301 current_value = (u64)(current_value >> 32) |
302 (u64)(current_value << 32);
303 } else {
304 nvgpu_warn(g, "cde: unknown type. type=%d",
305 type);
306 return -EINVAL;
307 }
308
309 current_value &= ~mask;
310 new_value = current_value | value;
311
312 /* store the element data back */
313 if (type == TYPE_PARAM_TYPE_U32)
314 *target_mem_ptr = (u32)new_value;
315 else if (type == TYPE_PARAM_TYPE_U64_LITTLE)
316 *target_mem_ptr_u64 = new_value;
317 else {
318 new_value = (u64)(new_value >> 32) |
319 (u64)(new_value << 32);
320 *target_mem_ptr_u64 = new_value;
321 }
322
323 return 0;
324}
325
326static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx,
327 struct nvgpu_firmware *img,
328 struct gk20a_cde_hdr_replace *replace)
329{
330 struct nvgpu_mem *source_mem;
331 struct nvgpu_mem *target_mem;
332 struct nvgpu_os_linux *l = cde_ctx->l;
333 struct gk20a *g = &l->g;
334 u32 *target_mem_ptr;
335 u64 vaddr;
336 int err;
337
338 if (replace->target_buf >= cde_ctx->num_bufs ||
339 replace->source_buf >= cde_ctx->num_bufs) {
340 nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d",
341 replace->target_buf, replace->source_buf,
342 cde_ctx->num_bufs);
343 return -EINVAL;
344 }
345
346 source_mem = cde_ctx->mem + replace->source_buf;
347 target_mem = cde_ctx->mem + replace->target_buf;
348 target_mem_ptr = target_mem->cpu_va;
349
350 if (source_mem->size < (replace->source_byte_offset + 3) ||
351 target_mem->size < (replace->target_byte_offset + 3)) {
352 nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu",
353 replace->target_byte_offset,
354 replace->source_byte_offset,
355 source_mem->size,
356 target_mem->size);
357 return -EINVAL;
358 }
359
360 /* calculate the target pointer */
361 target_mem_ptr += (replace->target_byte_offset / sizeof(u32));
362
363 /* determine patch value */
364 vaddr = source_mem->gpu_va + replace->source_byte_offset;
365 err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type,
366 replace->shift, replace->mask,
367 vaddr);
368 if (err) {
369 nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld",
370 err, replace->target_buf,
371 replace->target_byte_offset,
372 replace->source_buf,
373 replace->source_byte_offset);
374 }
375
376 return err;
377}
378
379static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx)
380{
381 struct nvgpu_os_linux *l = cde_ctx->l;
382 struct gk20a *g = &l->g;
383 struct nvgpu_mem *target_mem;
384 u32 *target_mem_ptr;
385 u64 new_data;
386 int user_id = 0, err;
387 unsigned int i;
388
389 for (i = 0; i < cde_ctx->num_params; i++) {
390 struct gk20a_cde_hdr_param *param = cde_ctx->params + i;
391 target_mem = cde_ctx->mem + param->target_buf;
392 target_mem_ptr = target_mem->cpu_va;
393 target_mem_ptr += (param->target_byte_offset / sizeof(u32));
394
395 switch (param->id) {
396 case TYPE_PARAM_COMPTAGS_PER_CACHELINE:
397 new_data = g->gr.comptags_per_cacheline;
398 break;
399 case TYPE_PARAM_GPU_CONFIGURATION:
400 new_data = (u64)g->ltc_count * g->gr.slices_per_ltc *
401 g->gr.cacheline_size;
402 break;
403 case TYPE_PARAM_FIRSTPAGEOFFSET:
404 new_data = cde_ctx->surf_param_offset;
405 break;
406 case TYPE_PARAM_NUMPAGES:
407 new_data = cde_ctx->surf_param_lines;
408 break;
409 case TYPE_PARAM_BACKINGSTORE:
410 new_data = cde_ctx->backing_store_vaddr;
411 break;
412 case TYPE_PARAM_DESTINATION:
413 new_data = cde_ctx->compbit_vaddr;
414 break;
415 case TYPE_PARAM_DESTINATION_SIZE:
416 new_data = cde_ctx->compbit_size;
417 break;
418 case TYPE_PARAM_BACKINGSTORE_SIZE:
419 new_data = g->gr.compbit_store.mem.size;
420 break;
421 case TYPE_PARAM_SOURCE_SMMU_ADDR:
422 new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm,
423 cde_ctx->surf_vaddr);
424 if (new_data == 0)
425 return -EINVAL;
426 break;
427 case TYPE_PARAM_BACKINGSTORE_BASE_HW:
428 new_data = g->gr.compbit_store.base_hw;
429 break;
430 case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE:
431 new_data = g->gr.gobs_per_comptagline_per_slice;
432 break;
433 case TYPE_PARAM_SCATTERBUFFER:
434 new_data = cde_ctx->scatterbuffer_vaddr;
435 break;
436 case TYPE_PARAM_SCATTERBUFFER_SIZE:
437 new_data = cde_ctx->scatterbuffer_size;
438 break;
439 default:
440 user_id = param->id - NUM_RESERVED_PARAMS;
441 if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS)
442 continue;
443 new_data = cde_ctx->user_param_values[user_id];
444 }
445
446 gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx",
447 i, param->id, param->target_buf,
448 param->target_byte_offset, new_data,
449 param->data_offset, param->type, param->shift,
450 param->mask);
451
452 new_data += param->data_offset;
453
454 err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type,
455 param->shift, param->mask, new_data);
456
457 if (err) {
458 nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu",
459 err, i, param->id, param->target_buf,
460 param->target_byte_offset, new_data);
461 return err;
462 }
463 }
464
465 return 0;
466}
467
468static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx,
469 struct nvgpu_firmware *img,
470 struct gk20a_cde_hdr_param *param)
471{
472 struct nvgpu_mem *target_mem;
473 struct nvgpu_os_linux *l = cde_ctx->l;
474 struct gk20a *g = &l->g;
475
476 if (param->target_buf >= cde_ctx->num_bufs) {
477 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u",
478 cde_ctx->num_params, param->target_buf,
479 cde_ctx->num_bufs);
480 return -EINVAL;
481 }
482
483 target_mem = cde_ctx->mem + param->target_buf;
484 if (target_mem->size < (param->target_byte_offset + 3)) {
485 nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu",
486 cde_ctx->num_params, param->target_byte_offset,
487 target_mem->size);
488 return -EINVAL;
489 }
490
491 /* does this parameter fit into our parameter structure */
492 if (cde_ctx->num_params >= MAX_CDE_PARAMS) {
493 nvgpu_warn(g, "cde: no room for new parameters param idx = %d",
494 cde_ctx->num_params);
495 return -ENOMEM;
496 }
497
498 /* is the given id valid? */
499 if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) {
500 nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u",
501 param->id, cde_ctx->num_params,
502 NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS);
503 return -EINVAL;
504 }
505
506 cde_ctx->params[cde_ctx->num_params] = *param;
507 cde_ctx->num_params++;
508
509 return 0;
510}
511
512static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx,
513 struct nvgpu_firmware *img,
514 u32 required_class)
515{
516 struct nvgpu_os_linux *l = cde_ctx->l;
517 struct gk20a *g = &l->g;
518 struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx;
519 int err;
520
521 alloc_obj_ctx.class_num = required_class;
522 alloc_obj_ctx.flags = 0;
523
524 /* CDE enabled */
525 cde_ctx->ch->cde = true;
526
527 err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx);
528 if (err) {
529 nvgpu_warn(g, "cde: failed to allocate ctx. err=%d",
530 err);
531 return err;
532 }
533
534 return 0;
535}
536
537static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx,
538 struct nvgpu_firmware *img,
539 u32 op,
540 struct gk20a_cde_cmd_elem *cmd_elem,
541 u32 num_elems)
542{
543 struct nvgpu_os_linux *l = cde_ctx->l;
544 struct gk20a *g = &l->g;
545 struct nvgpu_gpfifo **gpfifo, *gpfifo_elem;
546 u32 *num_entries;
547 unsigned int i;
548
549 /* check command type */
550 if (op == TYPE_BUF_COMMAND_INIT) {
551 gpfifo = &cde_ctx->init_convert_cmd;
552 num_entries = &cde_ctx->init_cmd_num_entries;
553 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
554 gpfifo = &cde_ctx->convert_cmd;
555 num_entries = &cde_ctx->convert_cmd_num_entries;
556 } else {
557 nvgpu_warn(g, "cde: unknown command. op=%u",
558 op);
559 return -EINVAL;
560 }
561
562 /* allocate gpfifo entries to be pushed */
563 *gpfifo = nvgpu_kzalloc(g,
564 sizeof(struct nvgpu_gpfifo) * num_elems);
565 if (!*gpfifo) {
566 nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries");
567 return -ENOMEM;
568 }
569
570 gpfifo_elem = *gpfifo;
571 for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) {
572 struct nvgpu_mem *target_mem;
573
574 /* validate the current entry */
575 if (cmd_elem->target_buf >= cde_ctx->num_bufs) {
576 nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)",
577 cmd_elem->target_buf, cde_ctx->num_bufs);
578 return -EINVAL;
579 }
580
581 target_mem = cde_ctx->mem + cmd_elem->target_buf;
582 if (target_mem->size<
583 cmd_elem->target_byte_offset + cmd_elem->num_bytes) {
584 nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)",
585 target_mem->size,
586 cmd_elem->target_byte_offset,
587 cmd_elem->num_bytes);
588 return -EINVAL;
589 }
590
591 /* store the element into gpfifo */
592 gpfifo_elem->entry0 =
593 u64_lo32(target_mem->gpu_va +
594 cmd_elem->target_byte_offset);
595 gpfifo_elem->entry1 =
596 u64_hi32(target_mem->gpu_va +
597 cmd_elem->target_byte_offset) |
598 pbdma_gp_entry1_length_f(cmd_elem->num_bytes /
599 sizeof(u32));
600 }
601
602 *num_entries = num_elems;
603 return 0;
604}
605
606static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx)
607{
608 struct nvgpu_os_linux *l = cde_ctx->l;
609 struct gk20a *g = &l->g;
610 unsigned long init_bytes = cde_ctx->init_cmd_num_entries *
611 sizeof(struct nvgpu_gpfifo);
612 unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries *
613 sizeof(struct nvgpu_gpfifo);
614 unsigned long total_bytes = init_bytes + conv_bytes;
615 struct nvgpu_gpfifo *combined_cmd;
616
617 /* allocate buffer that has space for both */
618 combined_cmd = nvgpu_kzalloc(g, total_bytes);
619 if (!combined_cmd) {
620 nvgpu_warn(g,
621 "cde: could not allocate memory for gpfifo entries");
622 return -ENOMEM;
623 }
624
625 /* move the original init here and append convert */
626 memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes);
627 memcpy(combined_cmd + cde_ctx->init_cmd_num_entries,
628 cde_ctx->convert_cmd, conv_bytes);
629
630 nvgpu_kfree(g, cde_ctx->init_convert_cmd);
631 nvgpu_kfree(g, cde_ctx->convert_cmd);
632
633 cde_ctx->init_convert_cmd = combined_cmd;
634 cde_ctx->convert_cmd = combined_cmd
635 + cde_ctx->init_cmd_num_entries;
636
637 return 0;
638}
639
640static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx,
641 struct nvgpu_firmware *img)
642{
643 struct nvgpu_os_linux *l = cde_ctx->l;
644 struct gk20a *g = &l->g;
645 struct gk20a_cde_app *cde_app = &l->cde_app;
646 u32 *data = (u32 *)img->data;
647 u32 num_of_elems;
648 struct gk20a_cde_hdr_elem *elem;
649 u32 min_size = 0;
650 int err = 0;
651 unsigned int i;
652
653 min_size += 2 * sizeof(u32);
654 if (img->size < min_size) {
655 nvgpu_warn(g, "cde: invalid image header");
656 return -EINVAL;
657 }
658
659 cde_app->firmware_version = data[0];
660 num_of_elems = data[1];
661
662 min_size += num_of_elems * sizeof(*elem);
663 if (img->size < min_size) {
664 nvgpu_warn(g, "cde: bad image");
665 return -EINVAL;
666 }
667
668 elem = (struct gk20a_cde_hdr_elem *)&data[2];
669 for (i = 0; i < num_of_elems; i++) {
670 int err = 0;
671 switch (elem->type) {
672 case TYPE_BUF:
673 err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf);
674 break;
675 case TYPE_REPLACE:
676 err = gk20a_init_cde_replace(cde_ctx, img,
677 &elem->replace);
678 break;
679 case TYPE_PARAM:
680 err = gk20a_init_cde_param(cde_ctx, img, &elem->param);
681 break;
682 case TYPE_REQUIRED_CLASS:
683 err = gk20a_init_cde_required_class(cde_ctx, img,
684 elem->required_class);
685 break;
686 case TYPE_COMMAND:
687 {
688 struct gk20a_cde_cmd_elem *cmd = (void *)
689 &img->data[elem->command.data_byte_offset];
690 err = gk20a_init_cde_command(cde_ctx, img,
691 elem->command.op, cmd,
692 elem->command.num_entries);
693 break;
694 }
695 case TYPE_ARRAY:
696 memcpy(&cde_app->arrays[elem->array.id][0],
697 elem->array.data,
698 MAX_CDE_ARRAY_ENTRIES*sizeof(u32));
699 break;
700 default:
701 nvgpu_warn(g, "cde: unknown header element");
702 err = -EINVAL;
703 }
704
705 if (err)
706 goto deinit_image;
707
708 elem++;
709 }
710
711 if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) {
712 nvgpu_warn(g, "cde: convert command not defined");
713 err = -EINVAL;
714 goto deinit_image;
715 }
716
717 if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) {
718 nvgpu_warn(g, "cde: convert command not defined");
719 err = -EINVAL;
720 goto deinit_image;
721 }
722
723 err = gk20a_cde_pack_cmdbufs(cde_ctx);
724 if (err)
725 goto deinit_image;
726
727 return 0;
728
729deinit_image:
730 gk20a_deinit_cde_img(cde_ctx);
731 return err;
732}
733
734static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
735 u32 op, struct nvgpu_fence *fence,
736 u32 flags, struct gk20a_fence **fence_out)
737{
738 struct nvgpu_os_linux *l = cde_ctx->l;
739 struct gk20a *g = &l->g;
740 struct nvgpu_gpfifo *gpfifo = NULL;
741 int num_entries = 0;
742
743 /* check command type */
744 if (op == TYPE_BUF_COMMAND_INIT) {
745 /* both init and convert combined */
746 gpfifo = cde_ctx->init_convert_cmd;
747 num_entries = cde_ctx->init_cmd_num_entries
748 + cde_ctx->convert_cmd_num_entries;
749 } else if (op == TYPE_BUF_COMMAND_CONVERT) {
750 gpfifo = cde_ctx->convert_cmd;
751 num_entries = cde_ctx->convert_cmd_num_entries;
752 } else {
753 nvgpu_warn(g, "cde: unknown buffer");
754 return -EINVAL;
755 }
756
757 if (gpfifo == NULL || num_entries == 0) {
758 nvgpu_warn(g, "cde: buffer not available");
759 return -ENOSYS;
760 }
761
762 return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
763 num_entries, flags, fence, fence_out, true,
764 NULL);
765}
766
767static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
768__acquires(&cde_app->mutex)
769__releases(&cde_app->mutex)
770{
771 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
772
773 gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx);
774 trace_gk20a_cde_release(cde_ctx);
775
776 nvgpu_mutex_acquire(&cde_app->mutex);
777
778 if (cde_ctx->in_use) {
779 cde_ctx->in_use = false;
780 nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts);
781 cde_app->ctx_usecount--;
782 } else {
783 gk20a_dbg_info("double release cde context %p", cde_ctx);
784 }
785
786 nvgpu_mutex_release(&cde_app->mutex);
787}
788
789static void gk20a_cde_ctx_deleter_fn(struct work_struct *work)
790__acquires(&cde_app->mutex)
791__releases(&cde_app->mutex)
792{
793 struct delayed_work *delay_work = to_delayed_work(work);
794 struct gk20a_cde_ctx *cde_ctx = container_of(delay_work,
795 struct gk20a_cde_ctx, ctx_deleter_work);
796 struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app;
797 struct nvgpu_os_linux *l = cde_ctx->l;
798 struct gk20a *g = &l->g;
799 int err;
800
801 /* someone has just taken it? engine deletion started? */
802 if (cde_ctx->in_use || !cde_app->initialised)
803 return;
804
805 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
806 "cde: attempting to delete temporary %p", cde_ctx);
807
808 err = gk20a_busy(g);
809 if (err) {
810 /* this context would find new use anyway later, so not freeing
811 * here does not leak anything */
812 nvgpu_warn(g, "cde: cannot set gk20a on, postponing"
813 " temp ctx deletion");
814 return;
815 }
816
817 nvgpu_mutex_acquire(&cde_app->mutex);
818 if (cde_ctx->in_use || !cde_app->initialised) {
819 gk20a_dbg(gpu_dbg_cde_ctx,
820 "cde: context use raced, not deleting %p",
821 cde_ctx);
822 goto out;
823 }
824
825 WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work),
826 "double pending %p", cde_ctx);
827
828 gk20a_cde_remove_ctx(cde_ctx);
829 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
830 "cde: destroyed %p count=%d use=%d max=%d",
831 cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount,
832 cde_app->ctx_count_top);
833
834out:
835 nvgpu_mutex_release(&cde_app->mutex);
836 gk20a_idle(g);
837}
838
839static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l)
840__must_hold(&cde_app->mutex)
841{
842 struct gk20a *g = &l->g;
843 struct gk20a_cde_app *cde_app = &l->cde_app;
844 struct gk20a_cde_ctx *cde_ctx;
845
846 /* exhausted? */
847
848 if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT)
849 return ERR_PTR(-EAGAIN);
850
851 /* idle context available? */
852
853 if (!nvgpu_list_empty(&cde_app->free_contexts)) {
854 cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts,
855 gk20a_cde_ctx, list);
856 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
857 "cde: got free %p count=%d use=%d max=%d",
858 cde_ctx, cde_app->ctx_count,
859 cde_app->ctx_usecount,
860 cde_app->ctx_count_top);
861 trace_gk20a_cde_get_context(cde_ctx);
862
863 /* deleter work may be scheduled, but in_use prevents it */
864 cde_ctx->in_use = true;
865 nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts);
866 cde_app->ctx_usecount++;
867
868 /* cancel any deletions now that ctx is in use */
869 gk20a_cde_cancel_deleter(cde_ctx, true);
870 return cde_ctx;
871 }
872
873 /* no free contexts, get a temporary one */
874
875 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx,
876 "cde: no free contexts, count=%d",
877 cde_app->ctx_count);
878
879 cde_ctx = gk20a_cde_allocate_context(l);
880 if (IS_ERR(cde_ctx)) {
881 nvgpu_warn(g, "cde: cannot allocate context: %ld",
882 PTR_ERR(cde_ctx));
883 return cde_ctx;
884 }
885
886 trace_gk20a_cde_get_context(cde_ctx);
887 cde_ctx->in_use = true;
888 cde_ctx->is_temporary = true;
889 cde_app->ctx_usecount++;
890 cde_app->ctx_count++;
891 if (cde_app->ctx_count > cde_app->ctx_count_top)
892 cde_app->ctx_count_top = cde_app->ctx_count;
893 nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts);
894
895 return cde_ctx;
896}
897
898static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l)
899__releases(&cde_app->mutex)
900__acquires(&cde_app->mutex)
901{
902 struct gk20a *g = &l->g;
903 struct gk20a_cde_app *cde_app = &l->cde_app;
904 struct gk20a_cde_ctx *cde_ctx = NULL;
905 struct nvgpu_timeout timeout;
906
907 nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME,
908 NVGPU_TIMER_CPU_TIMER);
909
910 do {
911 cde_ctx = gk20a_cde_do_get_context(l);
912 if (PTR_ERR(cde_ctx) != -EAGAIN)
913 break;
914
915 /* exhausted, retry */
916 nvgpu_mutex_release(&cde_app->mutex);
917 cond_resched();
918 nvgpu_mutex_acquire(&cde_app->mutex);
919 } while (!nvgpu_timeout_expired(&timeout));
920
921 return cde_ctx;
922}
923
924static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l)
925{
926 struct gk20a *g = &l->g;
927 struct gk20a_cde_ctx *cde_ctx;
928 int ret;
929
930 cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx));
931 if (!cde_ctx)
932 return ERR_PTR(-ENOMEM);
933
934 cde_ctx->l = l;
935 cde_ctx->dev = dev_from_gk20a(g);
936
937 ret = gk20a_cde_load(cde_ctx);
938 if (ret) {
939 nvgpu_kfree(g, cde_ctx);
940 return ERR_PTR(ret);
941 }
942
943 nvgpu_init_list_node(&cde_ctx->list);
944 cde_ctx->is_temporary = false;
945 cde_ctx->in_use = false;
946 INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work,
947 gk20a_cde_ctx_deleter_fn);
948
949 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx);
950 trace_gk20a_cde_allocate_context(cde_ctx);
951 return cde_ctx;
952}
953
954int gk20a_cde_convert(struct nvgpu_os_linux *l,
955 struct dma_buf *compbits_scatter_buf,
956 u64 compbits_byte_offset,
957 u64 scatterbuffer_byte_offset,
958 struct nvgpu_fence *fence,
959 u32 __flags, struct gk20a_cde_param *params,
960 int num_params, struct gk20a_fence **fence_out)
961__acquires(&l->cde_app->mutex)
962__releases(&l->cde_app->mutex)
963{
964 struct gk20a *g = &l->g;
965 struct gk20a_cde_ctx *cde_ctx = NULL;
966 struct gk20a_comptags comptags;
967 u64 mapped_compbits_offset = 0;
968 u64 compbits_size = 0;
969 u64 mapped_scatterbuffer_offset = 0;
970 u64 scatterbuffer_size = 0;
971 u64 map_vaddr = 0;
972 u64 map_offset = 0;
973 u64 map_size = 0;
974 u8 *surface = NULL;
975 u64 big_page_mask = 0;
976 u32 flags;
977 int err, i;
978 const s32 compbits_kind = 0;
979
980 gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu",
981 compbits_byte_offset, scatterbuffer_byte_offset);
982
983 /* scatter buffer must be after compbits buffer */
984 if (scatterbuffer_byte_offset &&
985 scatterbuffer_byte_offset < compbits_byte_offset)
986 return -EINVAL;
987
988 err = gk20a_busy(g);
989 if (err)
990 return err;
991
992 nvgpu_mutex_acquire(&l->cde_app.mutex);
993 cde_ctx = gk20a_cde_get_context(l);
994 nvgpu_mutex_release(&l->cde_app.mutex);
995 if (IS_ERR(cde_ctx)) {
996 err = PTR_ERR(cde_ctx);
997 goto exit_idle;
998 }
999
1000 /* First, map the buffer to local va */
1001
1002 /* ensure that the compbits buffer has drvdata */
1003 err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf,
1004 dev_from_gk20a(g));
1005 if (err)
1006 goto exit_idle;
1007
1008 /* compbits don't start at page aligned offset, so we need to align
1009 the region to be mapped */
1010 big_page_mask = cde_ctx->vm->big_page_size - 1;
1011 map_offset = compbits_byte_offset & ~big_page_mask;
1012 map_size = compbits_scatter_buf->size - map_offset;
1013
1014
1015 /* compute compbit start offset from the beginning of the mapped
1016 area */
1017 mapped_compbits_offset = compbits_byte_offset - map_offset;
1018 if (scatterbuffer_byte_offset) {
1019 compbits_size = scatterbuffer_byte_offset -
1020 compbits_byte_offset;
1021 mapped_scatterbuffer_offset = scatterbuffer_byte_offset -
1022 map_offset;
1023 scatterbuffer_size = compbits_scatter_buf->size -
1024 scatterbuffer_byte_offset;
1025 } else {
1026 compbits_size = compbits_scatter_buf->size -
1027 compbits_byte_offset;
1028 }
1029
1030 gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu",
1031 map_offset, map_size);
1032 gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu",
1033 mapped_compbits_offset, compbits_size);
1034 gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu",
1035 mapped_scatterbuffer_offset, scatterbuffer_size);
1036
1037
1038 /* map the destination buffer */
1039 get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */
1040 map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0,
1041 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1042 compbits_kind, true,
1043 gk20a_mem_flag_none,
1044 map_offset, map_size,
1045 NULL);
1046 if (!map_vaddr) {
1047 dma_buf_put(compbits_scatter_buf);
1048 err = -EINVAL;
1049 goto exit_idle;
1050 }
1051
1052 if (scatterbuffer_byte_offset &&
1053 g->ops.cde.need_scatter_buffer &&
1054 g->ops.cde.need_scatter_buffer(g)) {
1055 struct sg_table *sgt;
1056 void *scatter_buffer;
1057
1058 surface = dma_buf_vmap(compbits_scatter_buf);
1059 if (IS_ERR(surface)) {
1060 nvgpu_warn(g,
1061 "dma_buf_vmap failed");
1062 err = -EINVAL;
1063 goto exit_unmap_vaddr;
1064 }
1065
1066 scatter_buffer = surface + scatterbuffer_byte_offset;
1067
1068 gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p",
1069 surface, scatter_buffer);
1070 sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf);
1071 if (IS_ERR(sgt)) {
1072 nvgpu_warn(g,
1073 "mm_pin failed");
1074 err = -EINVAL;
1075 goto exit_unmap_surface;
1076 } else {
1077 err = g->ops.cde.populate_scatter_buffer(g, sgt,
1078 compbits_byte_offset, scatter_buffer,
1079 scatterbuffer_size);
1080 WARN_ON(err);
1081
1082 gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf,
1083 sgt);
1084 if (err)
1085 goto exit_unmap_surface;
1086 }
1087
1088 __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size);
1089 dma_buf_vunmap(compbits_scatter_buf, surface);
1090 surface = NULL;
1091 }
1092
1093 /* store source buffer compression tags */
1094 gk20a_get_comptags(dev_from_gk20a(g), compbits_scatter_buf, &comptags);
1095 cde_ctx->surf_param_offset = comptags.offset;
1096 cde_ctx->surf_param_lines = comptags.lines;
1097
1098 /* store surface vaddr. This is actually compbit vaddr, but since
1099 compbits live in the same surface, and we can get the alloc base
1100 address by using gk20a_mm_gpuva_to_iova_base, this will do */
1101 cde_ctx->surf_vaddr = map_vaddr;
1102
1103 /* store information about destination */
1104 cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset;
1105 cde_ctx->compbit_size = compbits_size;
1106
1107 cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset;
1108 cde_ctx->scatterbuffer_size = scatterbuffer_size;
1109
1110 /* remove existing argument data */
1111 memset(cde_ctx->user_param_values, 0,
1112 sizeof(cde_ctx->user_param_values));
1113
1114 /* read user space arguments for the conversion */
1115 for (i = 0; i < num_params; i++) {
1116 struct gk20a_cde_param *param = params + i;
1117 int id = param->id - NUM_RESERVED_PARAMS;
1118
1119 if (id < 0 || id >= MAX_CDE_USER_PARAMS) {
1120 nvgpu_warn(g, "cde: unknown user parameter");
1121 err = -EINVAL;
1122 goto exit_unmap_surface;
1123 }
1124 cde_ctx->user_param_values[id] = param->value;
1125 }
1126
1127 /* patch data */
1128 err = gk20a_cde_patch_params(cde_ctx);
1129 if (err) {
1130 nvgpu_warn(g, "cde: failed to patch parameters");
1131 goto exit_unmap_surface;
1132 }
1133
1134 gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n",
1135 g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr);
1136 gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n",
1137 cde_ctx->compbit_size, cde_ctx->compbit_vaddr);
1138 gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n",
1139 cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr);
1140
1141 /* take always the postfence as it is needed for protecting the
1142 * cde context */
1143 flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
1144
1145 /* gk20a_cde_execute_buffer() will grab a power reference of it's own */
1146 gk20a_idle(g);
1147
1148 /* execute the conversion buffer, combined with init first if it's the
1149 * first time */
1150 err = gk20a_cde_execute_buffer(cde_ctx,
1151 cde_ctx->init_cmd_executed
1152 ? TYPE_BUF_COMMAND_CONVERT
1153 : TYPE_BUF_COMMAND_INIT,
1154 fence, flags, fence_out);
1155
1156 cde_ctx->init_cmd_executed = true;
1157
1158 /* unmap the buffers - channel holds references to them now */
1159 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1160
1161 return err;
1162
1163exit_unmap_surface:
1164 if (surface)
1165 dma_buf_vunmap(compbits_scatter_buf, surface);
1166exit_unmap_vaddr:
1167 nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
1168exit_idle:
1169 gk20a_idle(g);
1170 return err;
1171}
1172
1173static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data)
1174__acquires(&cde_app->mutex)
1175__releases(&cde_app->mutex)
1176{
1177 struct gk20a_cde_ctx *cde_ctx = data;
1178 struct nvgpu_os_linux *l = cde_ctx->l;
1179 struct gk20a *g = &l->g;
1180 struct gk20a_cde_app *cde_app = &l->cde_app;
1181 bool channel_idle;
1182
1183 channel_gk20a_joblist_lock(ch);
1184 channel_idle = channel_gk20a_joblist_is_empty(ch);
1185 channel_gk20a_joblist_unlock(ch);
1186
1187 if (!channel_idle)
1188 return;
1189
1190 trace_gk20a_cde_finished_ctx_cb(cde_ctx);
1191 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx);
1192 if (!cde_ctx->in_use)
1193 gk20a_dbg_info("double finish cde context %p on channel %p",
1194 cde_ctx, ch);
1195
1196 if (ch->has_timedout) {
1197 if (cde_ctx->is_temporary) {
1198 nvgpu_warn(g,
1199 "cde: channel had timed out"
1200 " (temporary channel)");
1201 /* going to be deleted anyway */
1202 } else {
1203 nvgpu_warn(g,
1204 "cde: channel had timed out"
1205 ", reloading");
1206 /* mark it to be deleted, replace with a new one */
1207 nvgpu_mutex_acquire(&cde_app->mutex);
1208 cde_ctx->is_temporary = true;
1209 if (gk20a_cde_create_context(l)) {
1210 nvgpu_err(g, "cde: can't replace context");
1211 }
1212 nvgpu_mutex_release(&cde_app->mutex);
1213 }
1214 }
1215
1216 /* delete temporary contexts later (watch for doubles) */
1217 if (cde_ctx->is_temporary && cde_ctx->in_use) {
1218 WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work));
1219 schedule_delayed_work(&cde_ctx->ctx_deleter_work,
1220 msecs_to_jiffies(CTX_DELETE_TIME));
1221 }
1222
1223 if (!ch->has_timedout)
1224 gk20a_cde_ctx_release(cde_ctx);
1225}
1226
1227static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
1228{
1229 struct nvgpu_os_linux *l = cde_ctx->l;
1230 struct gk20a *g = &l->g;
1231 struct nvgpu_firmware *img;
1232 struct channel_gk20a *ch;
1233 struct gr_gk20a *gr = &g->gr;
1234 int err = 0;
1235 u64 vaddr;
1236
1237 img = nvgpu_request_firmware(g, "gpu2cde.bin", 0);
1238 if (!img) {
1239 nvgpu_err(g, "cde: could not fetch the firmware");
1240 return -ENOSYS;
1241 }
1242
1243 ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb,
1244 cde_ctx,
1245 -1,
1246 false);
1247 if (!ch) {
1248 nvgpu_warn(g, "cde: gk20a channel not available");
1249 err = -ENOMEM;
1250 goto err_get_gk20a_channel;
1251 }
1252
1253 /* bind the channel to the vm */
1254 err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch);
1255 if (err) {
1256 nvgpu_warn(g, "cde: could not bind vm");
1257 goto err_commit_va;
1258 }
1259
1260 /* allocate gpfifo (1024 should be more than enough) */
1261 err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0);
1262 if (err) {
1263 nvgpu_warn(g, "cde: unable to allocate gpfifo");
1264 goto err_alloc_gpfifo;
1265 }
1266
1267 /* map backing store to gpu virtual space */
1268 vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem,
1269 g->gr.compbit_store.mem.size,
1270 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1271 gk20a_mem_flag_read_only,
1272 false,
1273 gr->compbit_store.mem.aperture);
1274
1275 if (!vaddr) {
1276 nvgpu_warn(g, "cde: cannot map compression bit backing store");
1277 err = -ENOMEM;
1278 goto err_map_backingstore;
1279 }
1280
1281 /* store initialisation data */
1282 cde_ctx->ch = ch;
1283 cde_ctx->vm = ch->vm;
1284 cde_ctx->backing_store_vaddr = vaddr;
1285
1286 /* initialise the firmware */
1287 err = gk20a_init_cde_img(cde_ctx, img);
1288 if (err) {
1289 nvgpu_warn(g, "cde: image initialisation failed");
1290 goto err_init_cde_img;
1291 }
1292
1293 /* initialisation done */
1294 nvgpu_release_firmware(g, img);
1295
1296 return 0;
1297
1298err_init_cde_img:
1299 nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr);
1300err_map_backingstore:
1301err_alloc_gpfifo:
1302 nvgpu_vm_put(ch->vm);
1303err_commit_va:
1304err_get_gk20a_channel:
1305 nvgpu_release_firmware(g, img);
1306 nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err);
1307 return err;
1308}
1309
1310int gk20a_cde_reload(struct nvgpu_os_linux *l)
1311__acquires(&l->cde_app->mutex)
1312__releases(&l->cde_app->mutex)
1313{
1314 struct gk20a *g = &l->g;
1315 struct gk20a_cde_app *cde_app = &l->cde_app;
1316 int err;
1317
1318 if (!cde_app->initialised)
1319 return -ENOSYS;
1320
1321 err = gk20a_busy(g);
1322 if (err)
1323 return err;
1324
1325 nvgpu_mutex_acquire(&cde_app->mutex);
1326
1327 gk20a_cde_stop(l);
1328
1329 err = gk20a_cde_create_contexts(l);
1330 if (!err)
1331 cde_app->initialised = true;
1332
1333 nvgpu_mutex_release(&cde_app->mutex);
1334
1335 gk20a_idle(g);
1336 return err;
1337}
1338
1339int gk20a_init_cde_support(struct nvgpu_os_linux *l)
1340__acquires(&cde_app->mutex)
1341__releases(&cde_app->mutex)
1342{
1343 struct gk20a_cde_app *cde_app = &l->cde_app;
1344 int err;
1345
1346 if (cde_app->initialised)
1347 return 0;
1348
1349 gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init");
1350
1351 err = nvgpu_mutex_init(&cde_app->mutex);
1352 if (err)
1353 return err;
1354
1355 nvgpu_mutex_acquire(&cde_app->mutex);
1356
1357 nvgpu_init_list_node(&cde_app->free_contexts);
1358 nvgpu_init_list_node(&cde_app->used_contexts);
1359 cde_app->ctx_count = 0;
1360 cde_app->ctx_count_top = 0;
1361 cde_app->ctx_usecount = 0;
1362
1363 err = gk20a_cde_create_contexts(l);
1364 if (!err)
1365 cde_app->initialised = true;
1366
1367 nvgpu_mutex_release(&cde_app->mutex);
1368 gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err);
1369
1370 if (err)
1371 nvgpu_mutex_destroy(&cde_app->mutex);
1372
1373 return err;
1374}
1375
1376enum cde_launch_patch_id {
1377 PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024,
1378 PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025,
1379 PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */
1380 PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027,
1381 PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028,
1382 PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */
1383 PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */
1384 PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */
1385 PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032,
1386 PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */
1387 PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */
1388 PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035,
1389 PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036,
1390 PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037,
1391 PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038,
1392 PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039,
1393 PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040,
1394 PATCH_USER_CONST_XBLOCKS_ID = 1041,
1395 PATCH_H_USER_CONST_DSTOFFSET_ID = 1042,
1396 PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043,
1397 PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044,
1398 PATCH_V_USER_CONST_DSTOFFSET_ID = 1045,
1399 PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046,
1400 PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047,
1401 PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048,
1402 PATCH_H_LAUNCH_WORD1_ID = 1049,
1403 PATCH_H_LAUNCH_WORD2_ID = 1050,
1404 PATCH_V_LAUNCH_WORD1_ID = 1051,
1405 PATCH_V_LAUNCH_WORD2_ID = 1052,
1406 PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053,
1407 PATCH_H_QMD_REGISTER_COUNT_ID = 1054,
1408 PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055,
1409 PATCH_V_QMD_REGISTER_COUNT_ID = 1056,
1410};
1411
1412/* maximum number of WRITE_PATCHes in the below function */
1413#define MAX_CDE_LAUNCH_PATCHES 32
1414
1415static int gk20a_buffer_convert_gpu_to_cde_v1(
1416 struct nvgpu_os_linux *l,
1417 struct dma_buf *dmabuf, u32 consumer,
1418 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1419 u64 scatterbuffer_offset,
1420 u32 width, u32 height, u32 block_height_log2,
1421 u32 submit_flags, struct nvgpu_fence *fence_in,
1422 struct gk20a_buffer_state *state)
1423{
1424 struct gk20a *g = &l->g;
1425 struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
1426 int param = 0;
1427 int err = 0;
1428 struct gk20a_fence *new_fence = NULL;
1429 const int wgx = 8;
1430 const int wgy = 8;
1431 const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
1432 const int xalign = compbits_per_byte * wgx;
1433 const int yalign = wgy;
1434
1435 /* Compute per launch parameters */
1436 const int xtiles = (width + 7) >> 3;
1437 const int ytiles = (height + 7) >> 3;
1438 const int gridw_h = roundup(xtiles, xalign) / xalign;
1439 const int gridh_h = roundup(ytiles, yalign) / yalign;
1440 const int gridw_v = roundup(ytiles, xalign) / xalign;
1441 const int gridh_v = roundup(xtiles, yalign) / yalign;
1442 const int xblocks = (xtiles + 1) >> 1;
1443 const int voffset = compbits_voffset - compbits_hoffset;
1444
1445 int hprog = -1;
1446 int vprog = -1;
1447
1448 if (g->ops.cde.get_program_numbers)
1449 g->ops.cde.get_program_numbers(g, block_height_log2,
1450 l->cde_app.shader_parameter,
1451 &hprog, &vprog);
1452 else {
1453 nvgpu_warn(g, "cde: chip not supported");
1454 return -ENOSYS;
1455 }
1456
1457 if (hprog < 0 || vprog < 0) {
1458 nvgpu_warn(g, "cde: could not determine programs");
1459 return -ENOSYS;
1460 }
1461
1462 if (xtiles > 8192 / 8 || ytiles > 8192 / 8)
1463 nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)",
1464 xtiles, ytiles);
1465
1466 gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx",
1467 width, height, block_height_log2,
1468 compbits_hoffset, compbits_voffset, scatterbuffer_offset);
1469 gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)",
1470 width, height, xtiles, ytiles);
1471 gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)",
1472 wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v);
1473 gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d",
1474 hprog,
1475 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog],
1476 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog],
1477 vprog,
1478 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog],
1479 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1480
1481 /* Write parameters */
1482#define WRITE_PATCH(NAME, VALUE) \
1483 params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE}
1484 WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks);
1485 WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2,
1486 block_height_log2);
1487 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx);
1488 WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy);
1489 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx);
1490 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy);
1491 WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1);
1492
1493 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h);
1494 WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h);
1495 WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0);
1496 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h);
1497 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h);
1498 WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1);
1499
1500 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v);
1501 WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v);
1502 WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset);
1503 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v);
1504 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v);
1505 WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1);
1506
1507 WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET,
1508 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]);
1509 WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT,
1510 l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]);
1511 WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET,
1512 l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]);
1513 WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT,
1514 l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]);
1515
1516 if (consumer & NVGPU_GPU_COMPBITS_CDEH) {
1517 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1518 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1519 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1520 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1521 } else {
1522 WRITE_PATCH(PATCH_H_LAUNCH_WORD1,
1523 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1524 WRITE_PATCH(PATCH_H_LAUNCH_WORD2,
1525 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1526 }
1527
1528 if (consumer & NVGPU_GPU_COMPBITS_CDEV) {
1529 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1530 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]);
1531 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1532 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]);
1533 } else {
1534 WRITE_PATCH(PATCH_V_LAUNCH_WORD1,
1535 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]);
1536 WRITE_PATCH(PATCH_V_LAUNCH_WORD2,
1537 l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]);
1538 }
1539#undef WRITE_PATCH
1540
1541 err = gk20a_cde_convert(l, dmabuf,
1542 compbits_hoffset,
1543 scatterbuffer_offset,
1544 fence_in, submit_flags,
1545 params, param, &new_fence);
1546 if (err)
1547 goto out;
1548
1549 /* compbits generated, update state & fence */
1550 gk20a_fence_put(state->fence);
1551 state->fence = new_fence;
1552 state->valid_compbits |= consumer &
1553 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1554out:
1555 return err;
1556}
1557
1558static int gk20a_buffer_convert_gpu_to_cde(
1559 struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer,
1560 u64 offset, u64 compbits_hoffset, u64 compbits_voffset,
1561 u64 scatterbuffer_offset,
1562 u32 width, u32 height, u32 block_height_log2,
1563 u32 submit_flags, struct nvgpu_fence *fence_in,
1564 struct gk20a_buffer_state *state)
1565{
1566 struct gk20a *g = &l->g;
1567 int err = 0;
1568
1569 if (!l->cde_app.initialised)
1570 return -ENOSYS;
1571
1572 gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n",
1573 l->cde_app.firmware_version);
1574
1575 if (l->cde_app.firmware_version == 1) {
1576 err = gk20a_buffer_convert_gpu_to_cde_v1(
1577 l, dmabuf, consumer, offset, compbits_hoffset,
1578 compbits_voffset, scatterbuffer_offset,
1579 width, height, block_height_log2,
1580 submit_flags, fence_in, state);
1581 } else {
1582 nvgpu_err(g, "unsupported CDE firmware version %d",
1583 l->cde_app.firmware_version);
1584 err = -EINVAL;
1585 }
1586
1587 return err;
1588}
1589
1590int gk20a_prepare_compressible_read(
1591 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
1592 u64 compbits_hoffset, u64 compbits_voffset,
1593 u64 scatterbuffer_offset,
1594 u32 width, u32 height, u32 block_height_log2,
1595 u32 submit_flags, struct nvgpu_fence *fence,
1596 u32 *valid_compbits, u32 *zbc_color,
1597 struct gk20a_fence **fence_out)
1598{
1599 struct gk20a *g = &l->g;
1600 int err = 0;
1601 struct gk20a_buffer_state *state;
1602 struct dma_buf *dmabuf;
1603 u32 missing_bits;
1604
1605 dmabuf = dma_buf_get(buffer_fd);
1606 if (IS_ERR(dmabuf))
1607 return -EINVAL;
1608
1609 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1610 if (err) {
1611 dma_buf_put(dmabuf);
1612 return err;
1613 }
1614
1615 missing_bits = (state->valid_compbits ^ request) & request;
1616
1617 nvgpu_mutex_acquire(&state->lock);
1618
1619 if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
1620
1621 gk20a_fence_put(state->fence);
1622 state->fence = NULL;
1623 /* state->fence = decompress();
1624 state->valid_compbits = 0; */
1625 err = -EINVAL;
1626 goto out;
1627 } else if (missing_bits) {
1628 u32 missing_cde_bits = missing_bits &
1629 (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
1630 if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) &&
1631 missing_cde_bits) {
1632 err = gk20a_buffer_convert_gpu_to_cde(
1633 l, dmabuf,
1634 missing_cde_bits,
1635 offset, compbits_hoffset,
1636 compbits_voffset, scatterbuffer_offset,
1637 width, height, block_height_log2,
1638 submit_flags, fence,
1639 state);
1640 if (err)
1641 goto out;
1642 }
1643 }
1644
1645 if (state->fence && fence_out)
1646 *fence_out = gk20a_fence_get(state->fence);
1647
1648 if (valid_compbits)
1649 *valid_compbits = state->valid_compbits;
1650
1651 if (zbc_color)
1652 *zbc_color = state->zbc_color;
1653
1654out:
1655 nvgpu_mutex_release(&state->lock);
1656 dma_buf_put(dmabuf);
1657 return err;
1658}
1659
1660int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
1661 u32 valid_compbits, u64 offset, u32 zbc_color)
1662{
1663 int err;
1664 struct gk20a_buffer_state *state;
1665 struct dma_buf *dmabuf;
1666
1667 dmabuf = dma_buf_get(buffer_fd);
1668 if (IS_ERR(dmabuf)) {
1669 nvgpu_err(g, "invalid dmabuf");
1670 return -EINVAL;
1671 }
1672
1673 err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
1674 if (err) {
1675 nvgpu_err(g, "could not get state from dmabuf");
1676 dma_buf_put(dmabuf);
1677 return err;
1678 }
1679
1680 nvgpu_mutex_acquire(&state->lock);
1681
1682 /* Update the compbits state. */
1683 state->valid_compbits = valid_compbits;
1684 state->zbc_color = zbc_color;
1685
1686 /* Discard previous compbit job fence. */
1687 gk20a_fence_put(state->fence);
1688 state->fence = NULL;
1689
1690 nvgpu_mutex_release(&state->lock);
1691 dma_buf_put(dmabuf);
1692 return 0;
1693}
diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h
new file mode 100644
index 00000000..22732a2a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/cde.h
@@ -0,0 +1,309 @@
1/*
2 * GK20A color decompression engine support
3 *
4 * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef _CDE_GK20A_H_
20#define _CDE_GK20A_H_
21
22#define MAX_CDE_BUFS 10
23#define MAX_CDE_PARAMS 64
24#define MAX_CDE_USER_PARAMS 40
25#define MAX_CDE_ARRAY_ENTRIES 9
26
27/*
28 * The size of the context ring buffer that is dedicated for handling cde
29 * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
30 * wait on the previous job to that channel, so increasing this value
31 * reduces the likelihood of stalls.
32 */
33#define NUM_CDE_CONTEXTS 4
34
35struct dma_buf;
36struct gk20a;
37
38/*
39 * this element defines a buffer that is allocated and mapped into gpu address
40 * space. data_byte_offset defines the beginning of the buffer inside the
41 * firmare. num_bytes defines how many bytes the firmware contains.
42 *
43 * If data_byte_offset is zero, we allocate an empty buffer.
44 */
45
46struct gk20a_cde_hdr_buf {
47 u64 data_byte_offset;
48 u64 num_bytes;
49};
50
51/*
52 * this element defines a constant patching in buffers. It basically
53 * computes physical address to <source_buf>+source_byte_offset. The
54 * address is then modified into patch value as per:
55 * value = (current_value & ~mask) | (address << shift) & mask .
56 *
57 * The type field defines the register size as:
58 * 0=u32,
59 * 1=u64 (little endian),
60 * 2=u64 (big endian)
61 */
62
63struct gk20a_cde_hdr_replace {
64 u32 target_buf;
65 u32 source_buf;
66 s32 shift;
67 u32 type;
68 u64 target_byte_offset;
69 u64 source_byte_offset;
70 u64 mask;
71};
72
73enum {
74 TYPE_PARAM_TYPE_U32 = 0,
75 TYPE_PARAM_TYPE_U64_LITTLE,
76 TYPE_PARAM_TYPE_U64_BIG
77};
78
79/*
80 * this element defines a runtime patching in buffers. Parameters with id from
81 * 0 to 1024 are reserved for special usage as follows:
82 * 0 = comptags_per_cacheline,
83 * 1 = slices_per_fbp,
84 * 2 = num_fbps
85 * 3 = source buffer first page offset
86 * 4 = source buffer block height log2
87 * 5 = backing store memory address
88 * 6 = destination memory address
89 * 7 = destination size (bytes)
90 * 8 = backing store size (bytes)
91 * 9 = cache line size
92 *
93 * Parameters above id 1024 are user-specified. I.e. they determine where a
94 * parameters from user space should be placed in buffers, what is their
95 * type, etc.
96 *
97 * Once the value is available, we add data_offset to the value.
98 *
99 * The value address is then modified into patch value as per:
100 * value = (current_value & ~mask) | (address << shift) & mask .
101 *
102 * The type field defines the register size as:
103 * 0=u32,
104 * 1=u64 (little endian),
105 * 2=u64 (big endian)
106 */
107
108struct gk20a_cde_hdr_param {
109 u32 id;
110 u32 target_buf;
111 s32 shift;
112 u32 type;
113 s64 data_offset;
114 u64 target_byte_offset;
115 u64 mask;
116};
117
118enum {
119 TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
120 TYPE_PARAM_GPU_CONFIGURATION,
121 TYPE_PARAM_FIRSTPAGEOFFSET,
122 TYPE_PARAM_NUMPAGES,
123 TYPE_PARAM_BACKINGSTORE,
124 TYPE_PARAM_DESTINATION,
125 TYPE_PARAM_DESTINATION_SIZE,
126 TYPE_PARAM_BACKINGSTORE_SIZE,
127 TYPE_PARAM_SOURCE_SMMU_ADDR,
128 TYPE_PARAM_BACKINGSTORE_BASE_HW,
129 TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
130 TYPE_PARAM_SCATTERBUFFER,
131 TYPE_PARAM_SCATTERBUFFER_SIZE,
132 NUM_RESERVED_PARAMS = 1024,
133};
134
135/*
136 * This header element defines a command. The op field determines whether the
137 * element is defining an init (0) or convert command (1). data_byte_offset
138 * denotes the beginning address of command elements in the file.
139 */
140
141struct gk20a_cde_hdr_command {
142 u32 op;
143 u32 num_entries;
144 u64 data_byte_offset;
145};
146
147enum {
148 TYPE_BUF_COMMAND_INIT = 0,
149 TYPE_BUF_COMMAND_CONVERT
150};
151
152/*
153 * This is a command element defines one entry inside push buffer. target_buf
154 * defines the buffer including the pushbuffer entries, target_byte_offset the
155 * offset inside the buffer and num_bytes the number of words in the buffer.
156 */
157
158struct gk20a_cde_cmd_elem {
159 u32 target_buf;
160 u32 padding;
161 u64 target_byte_offset;
162 u64 num_bytes;
163};
164
165/*
166 * This element is used for storing a small array of data.
167 */
168
169enum {
170 ARRAY_PROGRAM_OFFSET = 0,
171 ARRAY_REGISTER_COUNT,
172 ARRAY_LAUNCH_COMMAND,
173 NUM_CDE_ARRAYS
174};
175
176struct gk20a_cde_hdr_array {
177 u32 id;
178 u32 data[MAX_CDE_ARRAY_ENTRIES];
179};
180
181/*
182 * Following defines a single header element. Each element has a type and
183 * some of the data structures.
184 */
185
186struct gk20a_cde_hdr_elem {
187 u32 type;
188 u32 padding;
189 union {
190 struct gk20a_cde_hdr_buf buf;
191 struct gk20a_cde_hdr_replace replace;
192 struct gk20a_cde_hdr_param param;
193 u32 required_class;
194 struct gk20a_cde_hdr_command command;
195 struct gk20a_cde_hdr_array array;
196 };
197};
198
199enum {
200 TYPE_BUF = 0,
201 TYPE_REPLACE,
202 TYPE_PARAM,
203 TYPE_REQUIRED_CLASS,
204 TYPE_COMMAND,
205 TYPE_ARRAY
206};
207
208struct gk20a_cde_param {
209 u32 id;
210 u32 padding;
211 u64 value;
212};
213
214struct gk20a_cde_ctx {
215 struct nvgpu_os_linux *l;
216 struct device *dev;
217
218 /* channel related data */
219 struct channel_gk20a *ch;
220 struct vm_gk20a *vm;
221
222 /* buf converter configuration */
223 struct nvgpu_mem mem[MAX_CDE_BUFS];
224 unsigned int num_bufs;
225
226 /* buffer patching params (where should patching be done) */
227 struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
228 unsigned int num_params;
229
230 /* storage for user space parameter values */
231 u32 user_param_values[MAX_CDE_USER_PARAMS];
232
233 u32 surf_param_offset;
234 u32 surf_param_lines;
235 u64 surf_vaddr;
236
237 u64 compbit_vaddr;
238 u64 compbit_size;
239
240 u64 scatterbuffer_vaddr;
241 u64 scatterbuffer_size;
242
243 u64 backing_store_vaddr;
244
245 struct nvgpu_gpfifo *init_convert_cmd;
246 int init_cmd_num_entries;
247
248 struct nvgpu_gpfifo *convert_cmd;
249 int convert_cmd_num_entries;
250
251 struct kobj_attribute attr;
252
253 bool init_cmd_executed;
254
255 struct nvgpu_list_node list;
256 bool is_temporary;
257 bool in_use;
258 struct delayed_work ctx_deleter_work;
259};
260
261static inline struct gk20a_cde_ctx *
262gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
263{
264 return (struct gk20a_cde_ctx *)
265 ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
266};
267
268struct gk20a_cde_app {
269 bool initialised;
270 struct nvgpu_mutex mutex;
271
272 struct nvgpu_list_node free_contexts;
273 struct nvgpu_list_node used_contexts;
274 unsigned int ctx_count;
275 unsigned int ctx_usecount;
276 unsigned int ctx_count_top;
277
278 u32 firmware_version;
279
280 u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
281
282 u32 shader_parameter;
283};
284
285void gk20a_cde_destroy(struct nvgpu_os_linux *l);
286void gk20a_cde_suspend(struct nvgpu_os_linux *l);
287int gk20a_init_cde_support(struct nvgpu_os_linux *l);
288int gk20a_cde_reload(struct nvgpu_os_linux *l);
289int gk20a_cde_convert(struct nvgpu_os_linux *l,
290 struct dma_buf *compbits_buf,
291 u64 compbits_byte_offset,
292 u64 scatterbuffer_byte_offset,
293 struct nvgpu_fence *fence,
294 u32 __flags, struct gk20a_cde_param *params,
295 int num_params, struct gk20a_fence **fence_out);
296
297int gk20a_prepare_compressible_read(
298 struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
299 u64 compbits_hoffset, u64 compbits_voffset,
300 u64 scatterbuffer_offset,
301 u32 width, u32 height, u32 block_height_log2,
302 u32 submit_flags, struct nvgpu_fence *fence,
303 u32 *valid_compbits, u32 *zbc_color,
304 struct gk20a_fence **fence_out);
305int gk20a_mark_compressible_write(
306 struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
307 u32 zbc_color);
308
309#endif
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
index 40cc64a4..cbea83b9 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_cde.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -22,8 +22,8 @@
22static ssize_t gk20a_cde_reload_write(struct file *file, 22static ssize_t gk20a_cde_reload_write(struct file *file,
23 const char __user *userbuf, size_t count, loff_t *ppos) 23 const char __user *userbuf, size_t count, loff_t *ppos)
24{ 24{
25 struct gk20a *g = file->private_data; 25 struct nvgpu_os_linux *l = file->private_data;
26 gk20a_cde_reload(g); 26 gk20a_cde_reload(l);
27 return count; 27 return count;
28} 28}
29 29
@@ -41,13 +41,13 @@ void gk20a_cde_debugfs_init(struct gk20a *g)
41 return; 41 return;
42 42
43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, 43 debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
44 l->debugfs, &g->cde_app.shader_parameter); 44 l->debugfs, &l->cde_app.shader_parameter);
45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, 45 debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
46 l->debugfs, &g->cde_app.ctx_count); 46 l->debugfs, &l->cde_app.ctx_count);
47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, 47 debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
48 l->debugfs, &g->cde_app.ctx_usecount); 48 l->debugfs, &l->cde_app.ctx_usecount);
49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, 49 debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
50 l->debugfs, &g->cde_app.ctx_count_top); 50 l->debugfs, &l->cde_app.ctx_count_top);
51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, 51 debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
52 g, &gk20a_cde_reload_fops); 52 l, &gk20a_cde_reload_fops);
53} 53}
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
index 0d79b143..0357f098 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -138,6 +138,7 @@ static int gk20a_ctrl_prepare_compressible_read(
138 struct gk20a *g, 138 struct gk20a *g,
139 struct nvgpu_gpu_prepare_compressible_read_args *args) 139 struct nvgpu_gpu_prepare_compressible_read_args *args)
140{ 140{
141 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
141 struct nvgpu_fence fence; 142 struct nvgpu_fence fence;
142 struct gk20a_fence *fence_out = NULL; 143 struct gk20a_fence *fence_out = NULL;
143 int ret = 0; 144 int ret = 0;
@@ -146,7 +147,7 @@ static int gk20a_ctrl_prepare_compressible_read(
146 fence.id = args->fence.syncpt_id; 147 fence.id = args->fence.syncpt_id;
147 fence.value = args->fence.syncpt_value; 148 fence.value = args->fence.syncpt_value;
148 149
149 ret = gk20a_prepare_compressible_read(g, args->handle, 150 ret = gk20a_prepare_compressible_read(l, args->handle,
150 args->request_compbits, args->offset, 151 args->request_compbits, args->offset,
151 args->compbits_hoffset, args->compbits_voffset, 152 args->compbits_hoffset, args->compbits_voffset,
152 args->scatterbuffer_offset, 153 args->scatterbuffer_offset,
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index 6a590baa..509930c7 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -39,6 +39,7 @@
39#include "pci.h" 39#include "pci.h"
40#include "module.h" 40#include "module.h"
41#include "intr.h" 41#include "intr.h"
42#include "cde.h"
42#ifdef CONFIG_TEGRA_19x_GPU 43#ifdef CONFIG_TEGRA_19x_GPU
43#include "nvgpu_gpuid_t19x.h" 44#include "nvgpu_gpuid_t19x.h"
44#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 45#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -185,7 +186,7 @@ int gk20a_pm_finalize_poweron(struct device *dev)
185 gk20a_scale_resume(dev_from_gk20a(g)); 186 gk20a_scale_resume(dev_from_gk20a(g));
186 187
187 if (platform->has_cde) 188 if (platform->has_cde)
188 gk20a_init_cde_support(g); 189 gk20a_init_cde_support(l);
189 190
190done: 191done:
191 if (err) 192 if (err)
@@ -197,6 +198,7 @@ done:
197static int gk20a_pm_prepare_poweroff(struct device *dev) 198static int gk20a_pm_prepare_poweroff(struct device *dev)
198{ 199{
199 struct gk20a *g = get_gk20a(dev); 200 struct gk20a *g = get_gk20a(dev);
201 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
200 int ret = 0; 202 int ret = 0;
201 struct gk20a_platform *platform = gk20a_get_platform(dev); 203 struct gk20a_platform *platform = gk20a_get_platform(dev);
202 204
@@ -207,8 +209,15 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
207 if (!g->power_on) 209 if (!g->power_on)
208 goto done; 210 goto done;
209 211
212 if (gk20a_fifo_is_engine_busy(g)) {
213 ret = -EBUSY;
214 goto done;
215 }
216
210 gk20a_scale_suspend(dev); 217 gk20a_scale_suspend(dev);
211 218
219 gk20a_cde_suspend(l);
220
212 ret = gk20a_prepare_poweroff(g); 221 ret = gk20a_prepare_poweroff(g);
213 if (ret) 222 if (ret)
214 goto error; 223 goto error;
@@ -974,6 +983,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
974{ 983{
975 struct device *dev = &pdev->dev; 984 struct device *dev = &pdev->dev;
976 struct gk20a *g = get_gk20a(dev); 985 struct gk20a *g = get_gk20a(dev);
986 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
977 struct gk20a_platform *platform = gk20a_get_platform(dev); 987 struct gk20a_platform *platform = gk20a_get_platform(dev);
978 988
979 gk20a_dbg_fn(""); 989 gk20a_dbg_fn("");
@@ -982,7 +992,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
982 return vgpu_remove(pdev); 992 return vgpu_remove(pdev);
983 993
984 if (platform->has_cde) 994 if (platform->has_cde)
985 gk20a_cde_destroy(g); 995 gk20a_cde_destroy(l);
986 996
987 gk20a_ctxsw_trace_cleanup(g); 997 gk20a_ctxsw_trace_cleanup(g);
988 998
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h
index ed8364a9..160a5738 100644
--- a/drivers/gpu/nvgpu/common/linux/os_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/os_linux.h
@@ -19,6 +19,7 @@
19#include <linux/cdev.h> 19#include <linux/cdev.h>
20 20
21#include "gk20a/gk20a.h" 21#include "gk20a/gk20a.h"
22#include "cde.h"
22 23
23struct nvgpu_os_linux { 24struct nvgpu_os_linux {
24 struct gk20a g; 25 struct gk20a g;
@@ -108,6 +109,7 @@ struct nvgpu_os_linux {
108 struct dentry *debugfs_force_preemption_gfxp; 109 struct dentry *debugfs_force_preemption_gfxp;
109 struct dentry *debugfs_dump_ctxsw_stats; 110 struct dentry *debugfs_dump_ctxsw_stats;
110#endif 111#endif
112 struct gk20a_cde_app cde_app;
111}; 113};
112 114
113static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) 115static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)