diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/cde.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/cde.c | 1786 |
1 files changed, 0 insertions, 1786 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c deleted file mode 100644 index 32b333f1..00000000 --- a/drivers/gpu/nvgpu/common/linux/cde.c +++ /dev/null | |||
@@ -1,1786 +0,0 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dma-mapping.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <trace/events/gk20a.h> | ||
25 | |||
26 | #include <nvgpu/dma.h> | ||
27 | #include <nvgpu/gmmu.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/nvgpu_common.h> | ||
30 | #include <nvgpu/kmem.h> | ||
31 | #include <nvgpu/log.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/firmware.h> | ||
34 | #include <nvgpu/os_sched.h> | ||
35 | |||
36 | #include <nvgpu/linux/vm.h> | ||
37 | |||
38 | #include "gk20a/gk20a.h" | ||
39 | #include "gk20a/channel_gk20a.h" | ||
40 | #include "gk20a/mm_gk20a.h" | ||
41 | #include "gk20a/fence_gk20a.h" | ||
42 | #include "gk20a/gr_gk20a.h" | ||
43 | |||
44 | #include "cde.h" | ||
45 | #include "os_linux.h" | ||
46 | #include "dmabuf.h" | ||
47 | #include "channel.h" | ||
48 | #include "cde_gm20b.h" | ||
49 | #include "cde_gp10b.h" | ||
50 | |||
51 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
52 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
53 | |||
54 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
55 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); | ||
56 | |||
57 | #define CTX_DELETE_TIME 1000 | ||
58 | |||
59 | #define MAX_CTX_USE_COUNT 42 | ||
60 | #define MAX_CTX_RETRY_TIME 2000 | ||
61 | |||
62 | static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | ||
63 | { | ||
64 | struct nvgpu_mapped_buf *buffer; | ||
65 | dma_addr_t addr = 0; | ||
66 | struct gk20a *g = gk20a_from_vm(vm); | ||
67 | |||
68 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
69 | buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); | ||
70 | if (buffer) | ||
71 | addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); | ||
72 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
73 | |||
74 | return addr; | ||
75 | } | ||
76 | |||
77 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
78 | { | ||
79 | unsigned int i; | ||
80 | |||
81 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
82 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
83 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
84 | } | ||
85 | |||
86 | nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); | ||
87 | |||
88 | cde_ctx->convert_cmd = NULL; | ||
89 | cde_ctx->init_convert_cmd = NULL; | ||
90 | cde_ctx->num_bufs = 0; | ||
91 | cde_ctx->num_params = 0; | ||
92 | cde_ctx->init_cmd_num_entries = 0; | ||
93 | cde_ctx->convert_cmd_num_entries = 0; | ||
94 | cde_ctx->init_cmd_executed = false; | ||
95 | } | ||
96 | |||
97 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
98 | __must_hold(&cde_app->mutex) | ||
99 | { | ||
100 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
101 | struct gk20a *g = &l->g; | ||
102 | struct channel_gk20a *ch = cde_ctx->ch; | ||
103 | struct vm_gk20a *vm = ch->vm; | ||
104 | |||
105 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
106 | |||
107 | /* release mapped memory */ | ||
108 | gk20a_deinit_cde_img(cde_ctx); | ||
109 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
110 | cde_ctx->backing_store_vaddr); | ||
111 | |||
112 | /* | ||
113 | * free the channel | ||
114 | * gk20a_channel_close() will also unbind the channel from TSG | ||
115 | */ | ||
116 | gk20a_channel_close(ch); | ||
117 | nvgpu_ref_put(&cde_ctx->tsg->refcount, gk20a_tsg_release); | ||
118 | |||
119 | /* housekeeping on app */ | ||
120 | nvgpu_list_del(&cde_ctx->list); | ||
121 | l->cde_app.ctx_count--; | ||
122 | nvgpu_kfree(g, cde_ctx); | ||
123 | } | ||
124 | |||
125 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
126 | bool wait_finish) | ||
127 | __releases(&cde_app->mutex) | ||
128 | __acquires(&cde_app->mutex) | ||
129 | { | ||
130 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
131 | |||
132 | /* permanent contexts do not have deleter works */ | ||
133 | if (!cde_ctx->is_temporary) | ||
134 | return; | ||
135 | |||
136 | if (wait_finish) { | ||
137 | nvgpu_mutex_release(&cde_app->mutex); | ||
138 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
139 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
140 | } else { | ||
141 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) | ||
146 | __must_hold(&l->cde_app->mutex) | ||
147 | { | ||
148 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
149 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
150 | |||
151 | /* safe to go off the mutex in cancel_deleter since app is | ||
152 | * deinitialised; no new jobs are started. deleter works may be only at | ||
153 | * waiting for the mutex or before, going to abort */ | ||
154 | |||
155 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
156 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
157 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
158 | gk20a_cde_remove_ctx(cde_ctx); | ||
159 | } | ||
160 | |||
161 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
162 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
163 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
164 | gk20a_cde_remove_ctx(cde_ctx); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | static void gk20a_cde_stop(struct nvgpu_os_linux *l) | ||
169 | __must_hold(&l->cde_app->mutex) | ||
170 | { | ||
171 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
172 | |||
173 | /* prevent further conversions and delayed works from working */ | ||
174 | cde_app->initialised = false; | ||
175 | /* free all data, empty the list */ | ||
176 | gk20a_cde_remove_contexts(l); | ||
177 | } | ||
178 | |||
179 | void gk20a_cde_destroy(struct nvgpu_os_linux *l) | ||
180 | __acquires(&l->cde_app->mutex) | ||
181 | __releases(&l->cde_app->mutex) | ||
182 | { | ||
183 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
184 | |||
185 | if (!cde_app->initialised) | ||
186 | return; | ||
187 | |||
188 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
189 | gk20a_cde_stop(l); | ||
190 | nvgpu_mutex_release(&cde_app->mutex); | ||
191 | |||
192 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
193 | } | ||
194 | |||
195 | void gk20a_cde_suspend(struct nvgpu_os_linux *l) | ||
196 | __acquires(&l->cde_app->mutex) | ||
197 | __releases(&l->cde_app->mutex) | ||
198 | { | ||
199 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
200 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
201 | |||
202 | if (!cde_app->initialised) | ||
203 | return; | ||
204 | |||
205 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
206 | |||
207 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
208 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
209 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
210 | } | ||
211 | |||
212 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
213 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
214 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
215 | } | ||
216 | |||
217 | nvgpu_mutex_release(&cde_app->mutex); | ||
218 | |||
219 | } | ||
220 | |||
221 | static int gk20a_cde_create_context(struct nvgpu_os_linux *l) | ||
222 | __must_hold(&l->cde_app->mutex) | ||
223 | { | ||
224 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
225 | struct gk20a_cde_ctx *cde_ctx; | ||
226 | |||
227 | cde_ctx = gk20a_cde_allocate_context(l); | ||
228 | if (IS_ERR(cde_ctx)) | ||
229 | return PTR_ERR(cde_ctx); | ||
230 | |||
231 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
232 | cde_app->ctx_count++; | ||
233 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
234 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
235 | |||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) | ||
240 | __must_hold(&l->cde_app->mutex) | ||
241 | { | ||
242 | int err; | ||
243 | int i; | ||
244 | |||
245 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
246 | err = gk20a_cde_create_context(l); | ||
247 | if (err) | ||
248 | goto out; | ||
249 | } | ||
250 | |||
251 | return 0; | ||
252 | out: | ||
253 | gk20a_cde_remove_contexts(l); | ||
254 | return err; | ||
255 | } | ||
256 | |||
257 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
258 | struct nvgpu_firmware *img, | ||
259 | struct gk20a_cde_hdr_buf *buf) | ||
260 | { | ||
261 | struct nvgpu_mem *mem; | ||
262 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
263 | struct gk20a *g = &l->g; | ||
264 | int err; | ||
265 | |||
266 | /* check that the file can hold the buf */ | ||
267 | if (buf->data_byte_offset != 0 && | ||
268 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
269 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
270 | cde_ctx->num_bufs); | ||
271 | return -EINVAL; | ||
272 | } | ||
273 | |||
274 | /* check that we have enough buf elems available */ | ||
275 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
276 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
277 | cde_ctx->num_bufs); | ||
278 | return -ENOMEM; | ||
279 | } | ||
280 | |||
281 | /* allocate buf */ | ||
282 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
283 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
284 | if (err) { | ||
285 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
286 | cde_ctx->num_bufs); | ||
287 | return -ENOMEM; | ||
288 | } | ||
289 | |||
290 | /* copy the content */ | ||
291 | if (buf->data_byte_offset != 0) | ||
292 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
293 | buf->num_bytes); | ||
294 | |||
295 | cde_ctx->num_bufs++; | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
301 | int type, s32 shift, u64 mask, u64 value) | ||
302 | { | ||
303 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
304 | struct gk20a *g = &l->g; | ||
305 | u32 *target_mem_ptr = target; | ||
306 | u64 *target_mem_ptr_u64 = target; | ||
307 | u64 current_value, new_value; | ||
308 | |||
309 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
310 | value &= mask; | ||
311 | |||
312 | /* read current data from the location */ | ||
313 | current_value = 0; | ||
314 | if (type == TYPE_PARAM_TYPE_U32) { | ||
315 | if (mask != 0xfffffffful) | ||
316 | current_value = *target_mem_ptr; | ||
317 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
318 | if (mask != ~0ul) | ||
319 | current_value = *target_mem_ptr_u64; | ||
320 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
321 | current_value = *target_mem_ptr_u64; | ||
322 | current_value = (u64)(current_value >> 32) | | ||
323 | (u64)(current_value << 32); | ||
324 | } else { | ||
325 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
326 | type); | ||
327 | return -EINVAL; | ||
328 | } | ||
329 | |||
330 | current_value &= ~mask; | ||
331 | new_value = current_value | value; | ||
332 | |||
333 | /* store the element data back */ | ||
334 | if (type == TYPE_PARAM_TYPE_U32) | ||
335 | *target_mem_ptr = (u32)new_value; | ||
336 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
337 | *target_mem_ptr_u64 = new_value; | ||
338 | else { | ||
339 | new_value = (u64)(new_value >> 32) | | ||
340 | (u64)(new_value << 32); | ||
341 | *target_mem_ptr_u64 = new_value; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
348 | struct nvgpu_firmware *img, | ||
349 | struct gk20a_cde_hdr_replace *replace) | ||
350 | { | ||
351 | struct nvgpu_mem *source_mem; | ||
352 | struct nvgpu_mem *target_mem; | ||
353 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
354 | struct gk20a *g = &l->g; | ||
355 | u32 *target_mem_ptr; | ||
356 | u64 vaddr; | ||
357 | int err; | ||
358 | |||
359 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
360 | replace->source_buf >= cde_ctx->num_bufs) { | ||
361 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
362 | replace->target_buf, replace->source_buf, | ||
363 | cde_ctx->num_bufs); | ||
364 | return -EINVAL; | ||
365 | } | ||
366 | |||
367 | source_mem = cde_ctx->mem + replace->source_buf; | ||
368 | target_mem = cde_ctx->mem + replace->target_buf; | ||
369 | target_mem_ptr = target_mem->cpu_va; | ||
370 | |||
371 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
372 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
373 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
374 | replace->target_byte_offset, | ||
375 | replace->source_byte_offset, | ||
376 | source_mem->size, | ||
377 | target_mem->size); | ||
378 | return -EINVAL; | ||
379 | } | ||
380 | |||
381 | /* calculate the target pointer */ | ||
382 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
383 | |||
384 | /* determine patch value */ | ||
385 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
386 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
387 | replace->shift, replace->mask, | ||
388 | vaddr); | ||
389 | if (err) { | ||
390 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
391 | err, replace->target_buf, | ||
392 | replace->target_byte_offset, | ||
393 | replace->source_buf, | ||
394 | replace->source_byte_offset); | ||
395 | } | ||
396 | |||
397 | return err; | ||
398 | } | ||
399 | |||
400 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
401 | { | ||
402 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
403 | struct gk20a *g = &l->g; | ||
404 | struct nvgpu_mem *target_mem; | ||
405 | u32 *target_mem_ptr; | ||
406 | u64 new_data; | ||
407 | int user_id = 0, err; | ||
408 | unsigned int i; | ||
409 | |||
410 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
411 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
412 | target_mem = cde_ctx->mem + param->target_buf; | ||
413 | target_mem_ptr = target_mem->cpu_va; | ||
414 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
415 | |||
416 | switch (param->id) { | ||
417 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
418 | new_data = g->gr.comptags_per_cacheline; | ||
419 | break; | ||
420 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
421 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
422 | g->gr.cacheline_size; | ||
423 | break; | ||
424 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
425 | new_data = cde_ctx->surf_param_offset; | ||
426 | break; | ||
427 | case TYPE_PARAM_NUMPAGES: | ||
428 | new_data = cde_ctx->surf_param_lines; | ||
429 | break; | ||
430 | case TYPE_PARAM_BACKINGSTORE: | ||
431 | new_data = cde_ctx->backing_store_vaddr; | ||
432 | break; | ||
433 | case TYPE_PARAM_DESTINATION: | ||
434 | new_data = cde_ctx->compbit_vaddr; | ||
435 | break; | ||
436 | case TYPE_PARAM_DESTINATION_SIZE: | ||
437 | new_data = cde_ctx->compbit_size; | ||
438 | break; | ||
439 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
440 | new_data = g->gr.compbit_store.mem.size; | ||
441 | break; | ||
442 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
443 | new_data = gpuva_to_iova_base(cde_ctx->vm, | ||
444 | cde_ctx->surf_vaddr); | ||
445 | if (new_data == 0) { | ||
446 | nvgpu_warn(g, "cde: failed to find 0x%llx", | ||
447 | cde_ctx->surf_vaddr); | ||
448 | return -EINVAL; | ||
449 | } | ||
450 | break; | ||
451 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
452 | new_data = g->gr.compbit_store.base_hw; | ||
453 | break; | ||
454 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
455 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
456 | break; | ||
457 | case TYPE_PARAM_SCATTERBUFFER: | ||
458 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
459 | break; | ||
460 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
461 | new_data = cde_ctx->scatterbuffer_size; | ||
462 | break; | ||
463 | default: | ||
464 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
465 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
466 | continue; | ||
467 | new_data = cde_ctx->user_param_values[user_id]; | ||
468 | } | ||
469 | |||
470 | nvgpu_log(g, gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
471 | i, param->id, param->target_buf, | ||
472 | param->target_byte_offset, new_data, | ||
473 | param->data_offset, param->type, param->shift, | ||
474 | param->mask); | ||
475 | |||
476 | new_data += param->data_offset; | ||
477 | |||
478 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
479 | param->shift, param->mask, new_data); | ||
480 | |||
481 | if (err) { | ||
482 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
483 | err, i, param->id, param->target_buf, | ||
484 | param->target_byte_offset, new_data); | ||
485 | return err; | ||
486 | } | ||
487 | } | ||
488 | |||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
493 | struct nvgpu_firmware *img, | ||
494 | struct gk20a_cde_hdr_param *param) | ||
495 | { | ||
496 | struct nvgpu_mem *target_mem; | ||
497 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
498 | struct gk20a *g = &l->g; | ||
499 | |||
500 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
501 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
502 | cde_ctx->num_params, param->target_buf, | ||
503 | cde_ctx->num_bufs); | ||
504 | return -EINVAL; | ||
505 | } | ||
506 | |||
507 | target_mem = cde_ctx->mem + param->target_buf; | ||
508 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
509 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
510 | cde_ctx->num_params, param->target_byte_offset, | ||
511 | target_mem->size); | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | /* does this parameter fit into our parameter structure */ | ||
516 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
517 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
518 | cde_ctx->num_params); | ||
519 | return -ENOMEM; | ||
520 | } | ||
521 | |||
522 | /* is the given id valid? */ | ||
523 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
524 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
525 | param->id, cde_ctx->num_params, | ||
526 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
527 | return -EINVAL; | ||
528 | } | ||
529 | |||
530 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
531 | cde_ctx->num_params++; | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
537 | struct nvgpu_firmware *img, | ||
538 | u32 required_class) | ||
539 | { | ||
540 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
541 | struct gk20a *g = &l->g; | ||
542 | int err; | ||
543 | |||
544 | /* CDE enabled */ | ||
545 | cde_ctx->ch->cde = true; | ||
546 | |||
547 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); | ||
548 | if (err) { | ||
549 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
550 | err); | ||
551 | return err; | ||
552 | } | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
558 | struct nvgpu_firmware *img, | ||
559 | u32 op, | ||
560 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
561 | u32 num_elems) | ||
562 | { | ||
563 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
564 | struct gk20a *g = &l->g; | ||
565 | struct nvgpu_gpfifo_entry **gpfifo, *gpfifo_elem; | ||
566 | u32 *num_entries; | ||
567 | unsigned int i; | ||
568 | |||
569 | /* check command type */ | ||
570 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
571 | gpfifo = &cde_ctx->init_convert_cmd; | ||
572 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
573 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
574 | gpfifo = &cde_ctx->convert_cmd; | ||
575 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
576 | } else { | ||
577 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
578 | op); | ||
579 | return -EINVAL; | ||
580 | } | ||
581 | |||
582 | /* allocate gpfifo entries to be pushed */ | ||
583 | *gpfifo = nvgpu_kzalloc(g, | ||
584 | sizeof(struct nvgpu_gpfifo_entry) * num_elems); | ||
585 | if (!*gpfifo) { | ||
586 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
587 | return -ENOMEM; | ||
588 | } | ||
589 | |||
590 | gpfifo_elem = *gpfifo; | ||
591 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
592 | struct nvgpu_mem *target_mem; | ||
593 | |||
594 | /* validate the current entry */ | ||
595 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
596 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
597 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | |||
601 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
602 | if (target_mem->size< | ||
603 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
604 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
605 | target_mem->size, | ||
606 | cmd_elem->target_byte_offset, | ||
607 | cmd_elem->num_bytes); | ||
608 | return -EINVAL; | ||
609 | } | ||
610 | |||
611 | /* store the element into gpfifo */ | ||
612 | gpfifo_elem->entry0 = | ||
613 | u64_lo32(target_mem->gpu_va + | ||
614 | cmd_elem->target_byte_offset); | ||
615 | gpfifo_elem->entry1 = | ||
616 | u64_hi32(target_mem->gpu_va + | ||
617 | cmd_elem->target_byte_offset) | | ||
618 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
619 | sizeof(u32)); | ||
620 | } | ||
621 | |||
622 | *num_entries = num_elems; | ||
623 | return 0; | ||
624 | } | ||
625 | |||
626 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
627 | { | ||
628 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
629 | struct gk20a *g = &l->g; | ||
630 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
631 | sizeof(struct nvgpu_gpfifo_entry); | ||
632 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
633 | sizeof(struct nvgpu_gpfifo_entry); | ||
634 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
635 | struct nvgpu_gpfifo_entry *combined_cmd; | ||
636 | |||
637 | /* allocate buffer that has space for both */ | ||
638 | combined_cmd = nvgpu_kzalloc(g, total_bytes); | ||
639 | if (!combined_cmd) { | ||
640 | nvgpu_warn(g, | ||
641 | "cde: could not allocate memory for gpfifo entries"); | ||
642 | return -ENOMEM; | ||
643 | } | ||
644 | |||
645 | /* move the original init here and append convert */ | ||
646 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
647 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
648 | cde_ctx->convert_cmd, conv_bytes); | ||
649 | |||
650 | nvgpu_kfree(g, cde_ctx->init_convert_cmd); | ||
651 | nvgpu_kfree(g, cde_ctx->convert_cmd); | ||
652 | |||
653 | cde_ctx->init_convert_cmd = combined_cmd; | ||
654 | cde_ctx->convert_cmd = combined_cmd | ||
655 | + cde_ctx->init_cmd_num_entries; | ||
656 | |||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
661 | struct nvgpu_firmware *img) | ||
662 | { | ||
663 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
664 | struct gk20a *g = &l->g; | ||
665 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
666 | u32 *data = (u32 *)img->data; | ||
667 | u32 num_of_elems; | ||
668 | struct gk20a_cde_hdr_elem *elem; | ||
669 | u32 min_size = 0; | ||
670 | int err = 0; | ||
671 | unsigned int i; | ||
672 | |||
673 | min_size += 2 * sizeof(u32); | ||
674 | if (img->size < min_size) { | ||
675 | nvgpu_warn(g, "cde: invalid image header"); | ||
676 | return -EINVAL; | ||
677 | } | ||
678 | |||
679 | cde_app->firmware_version = data[0]; | ||
680 | num_of_elems = data[1]; | ||
681 | |||
682 | min_size += num_of_elems * sizeof(*elem); | ||
683 | if (img->size < min_size) { | ||
684 | nvgpu_warn(g, "cde: bad image"); | ||
685 | return -EINVAL; | ||
686 | } | ||
687 | |||
688 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
689 | for (i = 0; i < num_of_elems; i++) { | ||
690 | int err = 0; | ||
691 | switch (elem->type) { | ||
692 | case TYPE_BUF: | ||
693 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
694 | break; | ||
695 | case TYPE_REPLACE: | ||
696 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
697 | &elem->replace); | ||
698 | break; | ||
699 | case TYPE_PARAM: | ||
700 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
701 | break; | ||
702 | case TYPE_REQUIRED_CLASS: | ||
703 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
704 | elem->required_class); | ||
705 | break; | ||
706 | case TYPE_COMMAND: | ||
707 | { | ||
708 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
709 | &img->data[elem->command.data_byte_offset]; | ||
710 | err = gk20a_init_cde_command(cde_ctx, img, | ||
711 | elem->command.op, cmd, | ||
712 | elem->command.num_entries); | ||
713 | break; | ||
714 | } | ||
715 | case TYPE_ARRAY: | ||
716 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
717 | elem->array.data, | ||
718 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
719 | break; | ||
720 | default: | ||
721 | nvgpu_warn(g, "cde: unknown header element"); | ||
722 | err = -EINVAL; | ||
723 | } | ||
724 | |||
725 | if (err) | ||
726 | goto deinit_image; | ||
727 | |||
728 | elem++; | ||
729 | } | ||
730 | |||
731 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
732 | nvgpu_warn(g, "cde: convert command not defined"); | ||
733 | err = -EINVAL; | ||
734 | goto deinit_image; | ||
735 | } | ||
736 | |||
737 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
738 | nvgpu_warn(g, "cde: convert command not defined"); | ||
739 | err = -EINVAL; | ||
740 | goto deinit_image; | ||
741 | } | ||
742 | |||
743 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
744 | if (err) | ||
745 | goto deinit_image; | ||
746 | |||
747 | return 0; | ||
748 | |||
749 | deinit_image: | ||
750 | gk20a_deinit_cde_img(cde_ctx); | ||
751 | return err; | ||
752 | } | ||
753 | |||
754 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
755 | u32 op, struct nvgpu_channel_fence *fence, | ||
756 | u32 flags, struct gk20a_fence **fence_out) | ||
757 | { | ||
758 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
759 | struct gk20a *g = &l->g; | ||
760 | struct nvgpu_gpfifo_entry *gpfifo = NULL; | ||
761 | int num_entries = 0; | ||
762 | |||
763 | /* check command type */ | ||
764 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
765 | /* both init and convert combined */ | ||
766 | gpfifo = cde_ctx->init_convert_cmd; | ||
767 | num_entries = cde_ctx->init_cmd_num_entries | ||
768 | + cde_ctx->convert_cmd_num_entries; | ||
769 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
770 | gpfifo = cde_ctx->convert_cmd; | ||
771 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
772 | } else if (op == TYPE_BUF_COMMAND_NOOP) { | ||
773 | /* Any non-null gpfifo will suffice with 0 num_entries */ | ||
774 | gpfifo = cde_ctx->init_convert_cmd; | ||
775 | num_entries = 0; | ||
776 | } else { | ||
777 | nvgpu_warn(g, "cde: unknown buffer"); | ||
778 | return -EINVAL; | ||
779 | } | ||
780 | |||
781 | if (gpfifo == NULL) { | ||
782 | nvgpu_warn(g, "cde: buffer not available"); | ||
783 | return -ENOSYS; | ||
784 | } | ||
785 | |||
786 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, | ||
787 | num_entries, flags, fence, fence_out, | ||
788 | NULL); | ||
789 | } | ||
790 | |||
791 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
792 | __acquires(&cde_app->mutex) | ||
793 | __releases(&cde_app->mutex) | ||
794 | { | ||
795 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
796 | struct gk20a *g = &cde_ctx->l->g; | ||
797 | |||
798 | nvgpu_log(g, gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
799 | trace_gk20a_cde_release(cde_ctx); | ||
800 | |||
801 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
802 | |||
803 | if (cde_ctx->in_use) { | ||
804 | cde_ctx->in_use = false; | ||
805 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
806 | cde_app->ctx_usecount--; | ||
807 | } else { | ||
808 | nvgpu_log_info(g, "double release cde context %p", cde_ctx); | ||
809 | } | ||
810 | |||
811 | nvgpu_mutex_release(&cde_app->mutex); | ||
812 | } | ||
813 | |||
814 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
815 | __acquires(&cde_app->mutex) | ||
816 | __releases(&cde_app->mutex) | ||
817 | { | ||
818 | struct delayed_work *delay_work = to_delayed_work(work); | ||
819 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
820 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
821 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
822 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
823 | struct gk20a *g = &l->g; | ||
824 | int err; | ||
825 | |||
826 | /* someone has just taken it? engine deletion started? */ | ||
827 | if (cde_ctx->in_use || !cde_app->initialised) | ||
828 | return; | ||
829 | |||
830 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
831 | "cde: attempting to delete temporary %p", cde_ctx); | ||
832 | |||
833 | err = gk20a_busy(g); | ||
834 | if (err) { | ||
835 | /* this context would find new use anyway later, so not freeing | ||
836 | * here does not leak anything */ | ||
837 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
838 | " temp ctx deletion"); | ||
839 | return; | ||
840 | } | ||
841 | |||
842 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
843 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
844 | nvgpu_log(g, gpu_dbg_cde_ctx, | ||
845 | "cde: context use raced, not deleting %p", | ||
846 | cde_ctx); | ||
847 | goto out; | ||
848 | } | ||
849 | |||
850 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
851 | "double pending %p", cde_ctx); | ||
852 | |||
853 | gk20a_cde_remove_ctx(cde_ctx); | ||
854 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
855 | "cde: destroyed %p count=%d use=%d max=%d", | ||
856 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
857 | cde_app->ctx_count_top); | ||
858 | |||
859 | out: | ||
860 | nvgpu_mutex_release(&cde_app->mutex); | ||
861 | gk20a_idle(g); | ||
862 | } | ||
863 | |||
864 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) | ||
865 | __must_hold(&cde_app->mutex) | ||
866 | { | ||
867 | struct gk20a *g = &l->g; | ||
868 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
869 | struct gk20a_cde_ctx *cde_ctx; | ||
870 | |||
871 | /* exhausted? */ | ||
872 | |||
873 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
874 | return ERR_PTR(-EAGAIN); | ||
875 | |||
876 | /* idle context available? */ | ||
877 | |||
878 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
879 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
880 | gk20a_cde_ctx, list); | ||
881 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
882 | "cde: got free %p count=%d use=%d max=%d", | ||
883 | cde_ctx, cde_app->ctx_count, | ||
884 | cde_app->ctx_usecount, | ||
885 | cde_app->ctx_count_top); | ||
886 | trace_gk20a_cde_get_context(cde_ctx); | ||
887 | |||
888 | /* deleter work may be scheduled, but in_use prevents it */ | ||
889 | cde_ctx->in_use = true; | ||
890 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
891 | cde_app->ctx_usecount++; | ||
892 | |||
893 | /* cancel any deletions now that ctx is in use */ | ||
894 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
895 | return cde_ctx; | ||
896 | } | ||
897 | |||
898 | /* no free contexts, get a temporary one */ | ||
899 | |||
900 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
901 | "cde: no free contexts, count=%d", | ||
902 | cde_app->ctx_count); | ||
903 | |||
904 | cde_ctx = gk20a_cde_allocate_context(l); | ||
905 | if (IS_ERR(cde_ctx)) { | ||
906 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
907 | PTR_ERR(cde_ctx)); | ||
908 | return cde_ctx; | ||
909 | } | ||
910 | |||
911 | trace_gk20a_cde_get_context(cde_ctx); | ||
912 | cde_ctx->in_use = true; | ||
913 | cde_ctx->is_temporary = true; | ||
914 | cde_app->ctx_usecount++; | ||
915 | cde_app->ctx_count++; | ||
916 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
917 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
918 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
919 | |||
920 | return cde_ctx; | ||
921 | } | ||
922 | |||
923 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) | ||
924 | __releases(&cde_app->mutex) | ||
925 | __acquires(&cde_app->mutex) | ||
926 | { | ||
927 | struct gk20a *g = &l->g; | ||
928 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
929 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
930 | struct nvgpu_timeout timeout; | ||
931 | |||
932 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
933 | NVGPU_TIMER_CPU_TIMER); | ||
934 | |||
935 | do { | ||
936 | cde_ctx = gk20a_cde_do_get_context(l); | ||
937 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
938 | break; | ||
939 | |||
940 | /* exhausted, retry */ | ||
941 | nvgpu_mutex_release(&cde_app->mutex); | ||
942 | cond_resched(); | ||
943 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
944 | } while (!nvgpu_timeout_expired(&timeout)); | ||
945 | |||
946 | return cde_ctx; | ||
947 | } | ||
948 | |||
949 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) | ||
950 | { | ||
951 | struct gk20a *g = &l->g; | ||
952 | struct gk20a_cde_ctx *cde_ctx; | ||
953 | int ret; | ||
954 | |||
955 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
956 | if (!cde_ctx) | ||
957 | return ERR_PTR(-ENOMEM); | ||
958 | |||
959 | cde_ctx->l = l; | ||
960 | cde_ctx->dev = dev_from_gk20a(g); | ||
961 | |||
962 | ret = gk20a_cde_load(cde_ctx); | ||
963 | if (ret) { | ||
964 | nvgpu_kfree(g, cde_ctx); | ||
965 | return ERR_PTR(ret); | ||
966 | } | ||
967 | |||
968 | nvgpu_init_list_node(&cde_ctx->list); | ||
969 | cde_ctx->is_temporary = false; | ||
970 | cde_ctx->in_use = false; | ||
971 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
972 | gk20a_cde_ctx_deleter_fn); | ||
973 | |||
974 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
975 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
976 | return cde_ctx; | ||
977 | } | ||
978 | |||
979 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
980 | struct dma_buf *compbits_scatter_buf, | ||
981 | u64 compbits_byte_offset, | ||
982 | u64 scatterbuffer_byte_offset, | ||
983 | struct nvgpu_channel_fence *fence, | ||
984 | u32 __flags, struct gk20a_cde_param *params, | ||
985 | int num_params, struct gk20a_fence **fence_out) | ||
986 | __acquires(&l->cde_app->mutex) | ||
987 | __releases(&l->cde_app->mutex) | ||
988 | { | ||
989 | struct gk20a *g = &l->g; | ||
990 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
991 | struct gk20a_comptags comptags; | ||
992 | struct nvgpu_os_buffer os_buf = { | ||
993 | compbits_scatter_buf, | ||
994 | NULL, | ||
995 | dev_from_gk20a(g) | ||
996 | }; | ||
997 | u64 mapped_compbits_offset = 0; | ||
998 | u64 compbits_size = 0; | ||
999 | u64 mapped_scatterbuffer_offset = 0; | ||
1000 | u64 scatterbuffer_size = 0; | ||
1001 | u64 map_vaddr = 0; | ||
1002 | u64 map_offset = 0; | ||
1003 | u64 map_size = 0; | ||
1004 | u8 *surface = NULL; | ||
1005 | u64 big_page_mask = 0; | ||
1006 | u32 flags; | ||
1007 | int err, i; | ||
1008 | const s16 compbits_kind = 0; | ||
1009 | u32 submit_op; | ||
1010 | struct dma_buf_attachment *attachment; | ||
1011 | |||
1012 | nvgpu_log(g, gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
1013 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
1014 | |||
1015 | /* scatter buffer must be after compbits buffer */ | ||
1016 | if (scatterbuffer_byte_offset && | ||
1017 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
1018 | return -EINVAL; | ||
1019 | |||
1020 | err = gk20a_busy(g); | ||
1021 | if (err) | ||
1022 | return err; | ||
1023 | |||
1024 | nvgpu_mutex_acquire(&l->cde_app.mutex); | ||
1025 | cde_ctx = gk20a_cde_get_context(l); | ||
1026 | nvgpu_mutex_release(&l->cde_app.mutex); | ||
1027 | if (IS_ERR(cde_ctx)) { | ||
1028 | err = PTR_ERR(cde_ctx); | ||
1029 | goto exit_idle; | ||
1030 | } | ||
1031 | |||
1032 | /* First, map the buffer to local va */ | ||
1033 | |||
1034 | /* ensure that the compbits buffer has drvdata */ | ||
1035 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
1036 | dev_from_gk20a(g)); | ||
1037 | if (err) | ||
1038 | goto exit_idle; | ||
1039 | |||
1040 | /* compbits don't start at page aligned offset, so we need to align | ||
1041 | the region to be mapped */ | ||
1042 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
1043 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
1044 | map_size = compbits_scatter_buf->size - map_offset; | ||
1045 | |||
1046 | |||
1047 | /* compute compbit start offset from the beginning of the mapped | ||
1048 | area */ | ||
1049 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
1050 | if (scatterbuffer_byte_offset) { | ||
1051 | compbits_size = scatterbuffer_byte_offset - | ||
1052 | compbits_byte_offset; | ||
1053 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
1054 | map_offset; | ||
1055 | scatterbuffer_size = compbits_scatter_buf->size - | ||
1056 | scatterbuffer_byte_offset; | ||
1057 | } else { | ||
1058 | compbits_size = compbits_scatter_buf->size - | ||
1059 | compbits_byte_offset; | ||
1060 | } | ||
1061 | |||
1062 | nvgpu_log(g, gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
1063 | map_offset, map_size); | ||
1064 | nvgpu_log(g, gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
1065 | mapped_compbits_offset, compbits_size); | ||
1066 | nvgpu_log(g, gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
1067 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
1068 | |||
1069 | |||
1070 | /* map the destination buffer */ | ||
1071 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ | ||
1072 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, | ||
1073 | NVGPU_VM_MAP_CACHEABLE | | ||
1074 | NVGPU_VM_MAP_DIRECT_KIND_CTRL, | ||
1075 | NVGPU_KIND_INVALID, | ||
1076 | compbits_kind, /* incompressible kind */ | ||
1077 | gk20a_mem_flag_none, | ||
1078 | map_offset, map_size, | ||
1079 | NULL, | ||
1080 | &map_vaddr); | ||
1081 | if (err) { | ||
1082 | dma_buf_put(compbits_scatter_buf); | ||
1083 | err = -EINVAL; | ||
1084 | goto exit_idle; | ||
1085 | } | ||
1086 | |||
1087 | if (scatterbuffer_byte_offset && | ||
1088 | l->ops.cde.need_scatter_buffer && | ||
1089 | l->ops.cde.need_scatter_buffer(g)) { | ||
1090 | struct sg_table *sgt; | ||
1091 | void *scatter_buffer; | ||
1092 | |||
1093 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1094 | if (IS_ERR(surface)) { | ||
1095 | nvgpu_warn(g, | ||
1096 | "dma_buf_vmap failed"); | ||
1097 | err = -EINVAL; | ||
1098 | goto exit_unmap_vaddr; | ||
1099 | } | ||
1100 | |||
1101 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1102 | |||
1103 | nvgpu_log(g, gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1104 | surface, scatter_buffer); | ||
1105 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1106 | &attachment); | ||
1107 | if (IS_ERR(sgt)) { | ||
1108 | nvgpu_warn(g, | ||
1109 | "mm_pin failed"); | ||
1110 | err = -EINVAL; | ||
1111 | goto exit_unmap_surface; | ||
1112 | } else { | ||
1113 | err = l->ops.cde.populate_scatter_buffer(g, sgt, | ||
1114 | compbits_byte_offset, scatter_buffer, | ||
1115 | scatterbuffer_size); | ||
1116 | WARN_ON(err); | ||
1117 | |||
1118 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1119 | attachment, sgt); | ||
1120 | if (err) | ||
1121 | goto exit_unmap_surface; | ||
1122 | } | ||
1123 | |||
1124 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1125 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1126 | surface = NULL; | ||
1127 | } | ||
1128 | |||
1129 | /* store source buffer compression tags */ | ||
1130 | gk20a_get_comptags(&os_buf, &comptags); | ||
1131 | cde_ctx->surf_param_offset = comptags.offset; | ||
1132 | cde_ctx->surf_param_lines = comptags.lines; | ||
1133 | |||
1134 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
1135 | compbits live in the same surface, and we can get the alloc base | ||
1136 | address by using gpuva_to_iova_base, this will do */ | ||
1137 | cde_ctx->surf_vaddr = map_vaddr; | ||
1138 | |||
1139 | /* store information about destination */ | ||
1140 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
1141 | cde_ctx->compbit_size = compbits_size; | ||
1142 | |||
1143 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1144 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1145 | |||
1146 | /* remove existing argument data */ | ||
1147 | memset(cde_ctx->user_param_values, 0, | ||
1148 | sizeof(cde_ctx->user_param_values)); | ||
1149 | |||
1150 | /* read user space arguments for the conversion */ | ||
1151 | for (i = 0; i < num_params; i++) { | ||
1152 | struct gk20a_cde_param *param = params + i; | ||
1153 | int id = param->id - NUM_RESERVED_PARAMS; | ||
1154 | |||
1155 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
1156 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
1157 | err = -EINVAL; | ||
1158 | goto exit_unmap_surface; | ||
1159 | } | ||
1160 | cde_ctx->user_param_values[id] = param->value; | ||
1161 | } | ||
1162 | |||
1163 | /* patch data */ | ||
1164 | err = gk20a_cde_patch_params(cde_ctx); | ||
1165 | if (err) { | ||
1166 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
1167 | goto exit_unmap_surface; | ||
1168 | } | ||
1169 | |||
1170 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
1171 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
1172 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
1173 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
1174 | nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1175 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1176 | |||
1177 | /* take always the postfence as it is needed for protecting the | ||
1178 | * cde context */ | ||
1179 | flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET; | ||
1180 | |||
1181 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
1182 | gk20a_idle(g); | ||
1183 | |||
1184 | if (comptags.lines == 0) { | ||
1185 | /* | ||
1186 | * Nothing to do on the buffer, but do a null kickoff for | ||
1187 | * managing the pre and post fences. | ||
1188 | */ | ||
1189 | submit_op = TYPE_BUF_COMMAND_NOOP; | ||
1190 | } else if (!cde_ctx->init_cmd_executed) { | ||
1191 | /* | ||
1192 | * First time, so include the init pushbuf too in addition to | ||
1193 | * the conversion code. | ||
1194 | */ | ||
1195 | submit_op = TYPE_BUF_COMMAND_INIT; | ||
1196 | } else { | ||
1197 | /* | ||
1198 | * The usual condition: execute just the conversion. | ||
1199 | */ | ||
1200 | submit_op = TYPE_BUF_COMMAND_CONVERT; | ||
1201 | } | ||
1202 | err = gk20a_cde_execute_buffer(cde_ctx, submit_op, | ||
1203 | fence, flags, fence_out); | ||
1204 | |||
1205 | if (comptags.lines != 0 && !err) | ||
1206 | cde_ctx->init_cmd_executed = true; | ||
1207 | |||
1208 | /* unmap the buffers - channel holds references to them now */ | ||
1209 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1210 | |||
1211 | return err; | ||
1212 | |||
1213 | exit_unmap_surface: | ||
1214 | if (surface) | ||
1215 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1216 | exit_unmap_vaddr: | ||
1217 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1218 | exit_idle: | ||
1219 | gk20a_idle(g); | ||
1220 | return err; | ||
1221 | } | ||
1222 | |||
1223 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
1224 | __acquires(&cde_app->mutex) | ||
1225 | __releases(&cde_app->mutex) | ||
1226 | { | ||
1227 | struct gk20a_cde_ctx *cde_ctx = data; | ||
1228 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1229 | struct gk20a *g = &l->g; | ||
1230 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1231 | bool channel_idle; | ||
1232 | |||
1233 | channel_gk20a_joblist_lock(ch); | ||
1234 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
1235 | channel_gk20a_joblist_unlock(ch); | ||
1236 | |||
1237 | if (!channel_idle) | ||
1238 | return; | ||
1239 | |||
1240 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
1241 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
1242 | if (!cde_ctx->in_use) | ||
1243 | nvgpu_log_info(g, "double finish cde context %p on channel %p", | ||
1244 | cde_ctx, ch); | ||
1245 | |||
1246 | if (ch->has_timedout) { | ||
1247 | if (cde_ctx->is_temporary) { | ||
1248 | nvgpu_warn(g, | ||
1249 | "cde: channel had timed out" | ||
1250 | " (temporary channel)"); | ||
1251 | /* going to be deleted anyway */ | ||
1252 | } else { | ||
1253 | nvgpu_warn(g, | ||
1254 | "cde: channel had timed out" | ||
1255 | ", reloading"); | ||
1256 | /* mark it to be deleted, replace with a new one */ | ||
1257 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1258 | cde_ctx->is_temporary = true; | ||
1259 | if (gk20a_cde_create_context(l)) { | ||
1260 | nvgpu_err(g, "cde: can't replace context"); | ||
1261 | } | ||
1262 | nvgpu_mutex_release(&cde_app->mutex); | ||
1263 | } | ||
1264 | } | ||
1265 | |||
1266 | /* delete temporary contexts later (watch for doubles) */ | ||
1267 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
1268 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
1269 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
1270 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
1271 | } | ||
1272 | |||
1273 | if (!ch->has_timedout) | ||
1274 | gk20a_cde_ctx_release(cde_ctx); | ||
1275 | } | ||
1276 | |||
1277 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
1278 | { | ||
1279 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1280 | struct gk20a *g = &l->g; | ||
1281 | struct nvgpu_firmware *img; | ||
1282 | struct channel_gk20a *ch; | ||
1283 | struct tsg_gk20a *tsg; | ||
1284 | struct gr_gk20a *gr = &g->gr; | ||
1285 | struct nvgpu_gpfifo_args gpfifo_args; | ||
1286 | int err = 0; | ||
1287 | u64 vaddr; | ||
1288 | |||
1289 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
1290 | if (!img) { | ||
1291 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
1292 | return -ENOSYS; | ||
1293 | } | ||
1294 | |||
1295 | tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
1296 | if (!tsg) { | ||
1297 | nvgpu_err(g, "cde: could not create TSG"); | ||
1298 | err = -ENOMEM; | ||
1299 | goto err_get_gk20a_channel; | ||
1300 | } | ||
1301 | |||
1302 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
1303 | cde_ctx, | ||
1304 | -1, | ||
1305 | false); | ||
1306 | if (!ch) { | ||
1307 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
1308 | err = -ENOMEM; | ||
1309 | goto err_get_gk20a_channel; | ||
1310 | } | ||
1311 | |||
1312 | ch->timeout.enabled = false; | ||
1313 | |||
1314 | /* bind the channel to the vm */ | ||
1315 | err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch); | ||
1316 | if (err) { | ||
1317 | nvgpu_warn(g, "cde: could not bind vm"); | ||
1318 | goto err_commit_va; | ||
1319 | } | ||
1320 | |||
1321 | err = gk20a_tsg_bind_channel(tsg, ch); | ||
1322 | if (err) { | ||
1323 | nvgpu_err(g, "cde: unable to bind to tsg"); | ||
1324 | goto err_alloc_gpfifo; | ||
1325 | } | ||
1326 | |||
1327 | gpfifo_args.num_entries = 1024; | ||
1328 | gpfifo_args.num_inflight_jobs = 0; | ||
1329 | gpfifo_args.flags = 0; | ||
1330 | /* allocate gpfifo (1024 should be more than enough) */ | ||
1331 | err = gk20a_channel_alloc_gpfifo(ch, &gpfifo_args); | ||
1332 | if (err) { | ||
1333 | nvgpu_warn(g, "cde: unable to allocate gpfifo"); | ||
1334 | goto err_alloc_gpfifo; | ||
1335 | } | ||
1336 | |||
1337 | /* map backing store to gpu virtual space */ | ||
1338 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
1339 | g->gr.compbit_store.mem.size, | ||
1340 | NVGPU_VM_MAP_CACHEABLE, | ||
1341 | gk20a_mem_flag_read_only, | ||
1342 | false, | ||
1343 | gr->compbit_store.mem.aperture); | ||
1344 | |||
1345 | if (!vaddr) { | ||
1346 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
1347 | err = -ENOMEM; | ||
1348 | goto err_map_backingstore; | ||
1349 | } | ||
1350 | |||
1351 | /* store initialisation data */ | ||
1352 | cde_ctx->ch = ch; | ||
1353 | cde_ctx->tsg = tsg; | ||
1354 | cde_ctx->vm = ch->vm; | ||
1355 | cde_ctx->backing_store_vaddr = vaddr; | ||
1356 | |||
1357 | /* initialise the firmware */ | ||
1358 | err = gk20a_init_cde_img(cde_ctx, img); | ||
1359 | if (err) { | ||
1360 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
1361 | goto err_init_cde_img; | ||
1362 | } | ||
1363 | |||
1364 | /* initialisation done */ | ||
1365 | nvgpu_release_firmware(g, img); | ||
1366 | |||
1367 | return 0; | ||
1368 | |||
1369 | err_init_cde_img: | ||
1370 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
1371 | err_map_backingstore: | ||
1372 | err_alloc_gpfifo: | ||
1373 | nvgpu_vm_put(ch->vm); | ||
1374 | err_commit_va: | ||
1375 | err_get_gk20a_channel: | ||
1376 | nvgpu_release_firmware(g, img); | ||
1377 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
1378 | return err; | ||
1379 | } | ||
1380 | |||
1381 | int gk20a_cde_reload(struct nvgpu_os_linux *l) | ||
1382 | __acquires(&l->cde_app->mutex) | ||
1383 | __releases(&l->cde_app->mutex) | ||
1384 | { | ||
1385 | struct gk20a *g = &l->g; | ||
1386 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1387 | int err; | ||
1388 | |||
1389 | if (!cde_app->initialised) | ||
1390 | return -ENOSYS; | ||
1391 | |||
1392 | err = gk20a_busy(g); | ||
1393 | if (err) | ||
1394 | return err; | ||
1395 | |||
1396 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1397 | |||
1398 | gk20a_cde_stop(l); | ||
1399 | |||
1400 | err = gk20a_cde_create_contexts(l); | ||
1401 | if (!err) | ||
1402 | cde_app->initialised = true; | ||
1403 | |||
1404 | nvgpu_mutex_release(&cde_app->mutex); | ||
1405 | |||
1406 | gk20a_idle(g); | ||
1407 | return err; | ||
1408 | } | ||
1409 | |||
1410 | int gk20a_init_cde_support(struct nvgpu_os_linux *l) | ||
1411 | __acquires(&cde_app->mutex) | ||
1412 | __releases(&cde_app->mutex) | ||
1413 | { | ||
1414 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1415 | struct gk20a *g = &l->g; | ||
1416 | int err; | ||
1417 | |||
1418 | if (cde_app->initialised) | ||
1419 | return 0; | ||
1420 | |||
1421 | nvgpu_log(g, gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1422 | |||
1423 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
1424 | if (err) | ||
1425 | return err; | ||
1426 | |||
1427 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1428 | |||
1429 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
1430 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
1431 | cde_app->ctx_count = 0; | ||
1432 | cde_app->ctx_count_top = 0; | ||
1433 | cde_app->ctx_usecount = 0; | ||
1434 | |||
1435 | err = gk20a_cde_create_contexts(l); | ||
1436 | if (!err) | ||
1437 | cde_app->initialised = true; | ||
1438 | |||
1439 | nvgpu_mutex_release(&cde_app->mutex); | ||
1440 | nvgpu_log(g, gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
1441 | |||
1442 | if (err) | ||
1443 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
1444 | |||
1445 | return err; | ||
1446 | } | ||
1447 | |||
1448 | enum cde_launch_patch_id { | ||
1449 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
1450 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
1451 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
1452 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
1453 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
1454 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
1455 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
1456 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
1457 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
1458 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
1459 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
1460 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
1461 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
1462 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
1463 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
1464 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
1465 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
1466 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
1467 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
1468 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
1469 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
1470 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
1471 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
1472 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
1473 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
1474 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
1475 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
1476 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
1477 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
1478 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
1479 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
1480 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
1481 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
1482 | }; | ||
1483 | |||
1484 | /* maximum number of WRITE_PATCHes in the below function */ | ||
1485 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
1486 | |||
1487 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
1488 | struct nvgpu_os_linux *l, | ||
1489 | struct dma_buf *dmabuf, u32 consumer, | ||
1490 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1491 | u64 scatterbuffer_offset, | ||
1492 | u32 width, u32 height, u32 block_height_log2, | ||
1493 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
1494 | struct gk20a_buffer_state *state) | ||
1495 | { | ||
1496 | struct gk20a *g = &l->g; | ||
1497 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1498 | int param = 0; | ||
1499 | int err = 0; | ||
1500 | struct gk20a_fence *new_fence = NULL; | ||
1501 | const int wgx = 8; | ||
1502 | const int wgy = 8; | ||
1503 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1504 | const int xalign = compbits_per_byte * wgx; | ||
1505 | const int yalign = wgy; | ||
1506 | |||
1507 | /* Compute per launch parameters */ | ||
1508 | const int xtiles = (width + 7) >> 3; | ||
1509 | const int ytiles = (height + 7) >> 3; | ||
1510 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
1511 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
1512 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
1513 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
1514 | const int xblocks = (xtiles + 1) >> 1; | ||
1515 | const int voffset = compbits_voffset - compbits_hoffset; | ||
1516 | |||
1517 | int hprog = -1; | ||
1518 | int vprog = -1; | ||
1519 | |||
1520 | if (l->ops.cde.get_program_numbers) | ||
1521 | l->ops.cde.get_program_numbers(g, block_height_log2, | ||
1522 | l->cde_app.shader_parameter, | ||
1523 | &hprog, &vprog); | ||
1524 | else { | ||
1525 | nvgpu_warn(g, "cde: chip not supported"); | ||
1526 | return -ENOSYS; | ||
1527 | } | ||
1528 | |||
1529 | if (hprog < 0 || vprog < 0) { | ||
1530 | nvgpu_warn(g, "cde: could not determine programs"); | ||
1531 | return -ENOSYS; | ||
1532 | } | ||
1533 | |||
1534 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
1535 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1536 | xtiles, ytiles); | ||
1537 | |||
1538 | nvgpu_log(g, gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
1539 | width, height, block_height_log2, | ||
1540 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
1541 | nvgpu_log(g, gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1542 | width, height, xtiles, ytiles); | ||
1543 | nvgpu_log(g, gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
1544 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
1545 | nvgpu_log(g, gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
1546 | hprog, | ||
1547 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
1548 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
1549 | vprog, | ||
1550 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
1551 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1552 | |||
1553 | /* Write parameters */ | ||
1554 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1555 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1556 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
1557 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1558 | block_height_log2); | ||
1559 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1560 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1561 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1562 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1563 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1564 | |||
1565 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
1566 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
1567 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
1568 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
1569 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
1570 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1571 | |||
1572 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
1573 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
1574 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
1575 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
1576 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
1577 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1578 | |||
1579 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
1580 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
1581 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
1582 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
1583 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
1584 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
1585 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
1586 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1587 | |||
1588 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
1589 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1590 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1591 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1592 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1593 | } else { | ||
1594 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1595 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1596 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1597 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1598 | } | ||
1599 | |||
1600 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
1601 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1602 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1603 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1604 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1605 | } else { | ||
1606 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1607 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1608 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1609 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1610 | } | ||
1611 | #undef WRITE_PATCH | ||
1612 | |||
1613 | err = gk20a_cde_convert(l, dmabuf, | ||
1614 | compbits_hoffset, | ||
1615 | scatterbuffer_offset, | ||
1616 | fence_in, submit_flags, | ||
1617 | params, param, &new_fence); | ||
1618 | if (err) | ||
1619 | goto out; | ||
1620 | |||
1621 | /* compbits generated, update state & fence */ | ||
1622 | gk20a_fence_put(state->fence); | ||
1623 | state->fence = new_fence; | ||
1624 | state->valid_compbits |= consumer & | ||
1625 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1626 | out: | ||
1627 | return err; | ||
1628 | } | ||
1629 | |||
1630 | static int gk20a_buffer_convert_gpu_to_cde( | ||
1631 | struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, | ||
1632 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1633 | u64 scatterbuffer_offset, | ||
1634 | u32 width, u32 height, u32 block_height_log2, | ||
1635 | u32 submit_flags, struct nvgpu_channel_fence *fence_in, | ||
1636 | struct gk20a_buffer_state *state) | ||
1637 | { | ||
1638 | struct gk20a *g = &l->g; | ||
1639 | int err = 0; | ||
1640 | |||
1641 | if (!l->cde_app.initialised) | ||
1642 | return -ENOSYS; | ||
1643 | |||
1644 | nvgpu_log(g, gpu_dbg_cde, "firmware version = %d\n", | ||
1645 | l->cde_app.firmware_version); | ||
1646 | |||
1647 | if (l->cde_app.firmware_version == 1) { | ||
1648 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
1649 | l, dmabuf, consumer, offset, compbits_hoffset, | ||
1650 | compbits_voffset, scatterbuffer_offset, | ||
1651 | width, height, block_height_log2, | ||
1652 | submit_flags, fence_in, state); | ||
1653 | } else { | ||
1654 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
1655 | l->cde_app.firmware_version); | ||
1656 | err = -EINVAL; | ||
1657 | } | ||
1658 | |||
1659 | return err; | ||
1660 | } | ||
1661 | |||
1662 | int gk20a_prepare_compressible_read( | ||
1663 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
1664 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1665 | u64 scatterbuffer_offset, | ||
1666 | u32 width, u32 height, u32 block_height_log2, | ||
1667 | u32 submit_flags, struct nvgpu_channel_fence *fence, | ||
1668 | u32 *valid_compbits, u32 *zbc_color, | ||
1669 | struct gk20a_fence **fence_out) | ||
1670 | { | ||
1671 | struct gk20a *g = &l->g; | ||
1672 | int err = 0; | ||
1673 | struct gk20a_buffer_state *state; | ||
1674 | struct dma_buf *dmabuf; | ||
1675 | u32 missing_bits; | ||
1676 | |||
1677 | dmabuf = dma_buf_get(buffer_fd); | ||
1678 | if (IS_ERR(dmabuf)) | ||
1679 | return -EINVAL; | ||
1680 | |||
1681 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1682 | if (err) { | ||
1683 | dma_buf_put(dmabuf); | ||
1684 | return err; | ||
1685 | } | ||
1686 | |||
1687 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1688 | |||
1689 | nvgpu_mutex_acquire(&state->lock); | ||
1690 | |||
1691 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
1692 | |||
1693 | gk20a_fence_put(state->fence); | ||
1694 | state->fence = NULL; | ||
1695 | /* state->fence = decompress(); | ||
1696 | state->valid_compbits = 0; */ | ||
1697 | err = -EINVAL; | ||
1698 | goto out; | ||
1699 | } else if (missing_bits) { | ||
1700 | u32 missing_cde_bits = missing_bits & | ||
1701 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1702 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
1703 | missing_cde_bits) { | ||
1704 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1705 | l, dmabuf, | ||
1706 | missing_cde_bits, | ||
1707 | offset, compbits_hoffset, | ||
1708 | compbits_voffset, scatterbuffer_offset, | ||
1709 | width, height, block_height_log2, | ||
1710 | submit_flags, fence, | ||
1711 | state); | ||
1712 | if (err) | ||
1713 | goto out; | ||
1714 | } | ||
1715 | } | ||
1716 | |||
1717 | if (state->fence && fence_out) | ||
1718 | *fence_out = gk20a_fence_get(state->fence); | ||
1719 | |||
1720 | if (valid_compbits) | ||
1721 | *valid_compbits = state->valid_compbits; | ||
1722 | |||
1723 | if (zbc_color) | ||
1724 | *zbc_color = state->zbc_color; | ||
1725 | |||
1726 | out: | ||
1727 | nvgpu_mutex_release(&state->lock); | ||
1728 | dma_buf_put(dmabuf); | ||
1729 | return err; | ||
1730 | } | ||
1731 | |||
1732 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1733 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
1734 | { | ||
1735 | int err; | ||
1736 | struct gk20a_buffer_state *state; | ||
1737 | struct dma_buf *dmabuf; | ||
1738 | |||
1739 | dmabuf = dma_buf_get(buffer_fd); | ||
1740 | if (IS_ERR(dmabuf)) { | ||
1741 | nvgpu_err(g, "invalid dmabuf"); | ||
1742 | return -EINVAL; | ||
1743 | } | ||
1744 | |||
1745 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1746 | if (err) { | ||
1747 | nvgpu_err(g, "could not get state from dmabuf"); | ||
1748 | dma_buf_put(dmabuf); | ||
1749 | return err; | ||
1750 | } | ||
1751 | |||
1752 | nvgpu_mutex_acquire(&state->lock); | ||
1753 | |||
1754 | /* Update the compbits state. */ | ||
1755 | state->valid_compbits = valid_compbits; | ||
1756 | state->zbc_color = zbc_color; | ||
1757 | |||
1758 | /* Discard previous compbit job fence. */ | ||
1759 | gk20a_fence_put(state->fence); | ||
1760 | state->fence = NULL; | ||
1761 | |||
1762 | nvgpu_mutex_release(&state->lock); | ||
1763 | dma_buf_put(dmabuf); | ||
1764 | return 0; | ||
1765 | } | ||
1766 | |||
1767 | int nvgpu_cde_init_ops(struct nvgpu_os_linux *l) | ||
1768 | { | ||
1769 | struct gk20a *g = &l->g; | ||
1770 | u32 ver = g->params.gpu_arch + g->params.gpu_impl; | ||
1771 | |||
1772 | switch (ver) { | ||
1773 | case GK20A_GPUID_GM20B: | ||
1774 | case GK20A_GPUID_GM20B_B: | ||
1775 | l->ops.cde = gm20b_cde_ops.cde; | ||
1776 | break; | ||
1777 | case NVGPU_GPUID_GP10B: | ||
1778 | l->ops.cde = gp10b_cde_ops.cde; | ||
1779 | break; | ||
1780 | default: | ||
1781 | /* CDE is optional, so today ignoring unknown chip is fine */ | ||
1782 | break; | ||
1783 | } | ||
1784 | |||
1785 | return 0; | ||
1786 | } | ||