diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/cde.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/cde.c | 1710 |
1 files changed, 1710 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c new file mode 100644 index 00000000..143e5b75 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/cde.c | |||
@@ -0,0 +1,1710 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dma-mapping.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | #include <uapi/linux/nvgpu.h> | ||
23 | |||
24 | #include <trace/events/gk20a.h> | ||
25 | |||
26 | #include <nvgpu/dma.h> | ||
27 | #include <nvgpu/gmmu.h> | ||
28 | #include <nvgpu/timers.h> | ||
29 | #include <nvgpu/nvgpu_common.h> | ||
30 | #include <nvgpu/kmem.h> | ||
31 | #include <nvgpu/log.h> | ||
32 | #include <nvgpu/bug.h> | ||
33 | #include <nvgpu/firmware.h> | ||
34 | |||
35 | #include <nvgpu/linux/vm.h> | ||
36 | |||
37 | #include "gk20a/gk20a.h" | ||
38 | #include "gk20a/channel_gk20a.h" | ||
39 | #include "gk20a/mm_gk20a.h" | ||
40 | #include "gk20a/fence_gk20a.h" | ||
41 | #include "gk20a/gr_gk20a.h" | ||
42 | |||
43 | #include "cde.h" | ||
44 | #include "os_linux.h" | ||
45 | #include "dmabuf.h" | ||
46 | #include "channel.h" | ||
47 | |||
48 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
49 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
50 | |||
51 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
52 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); | ||
53 | |||
54 | #define CTX_DELETE_TIME 1000 | ||
55 | |||
56 | #define MAX_CTX_USE_COUNT 42 | ||
57 | #define MAX_CTX_RETRY_TIME 2000 | ||
58 | |||
59 | static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | ||
60 | { | ||
61 | struct nvgpu_mapped_buf *buffer; | ||
62 | dma_addr_t addr = 0; | ||
63 | struct gk20a *g = gk20a_from_vm(vm); | ||
64 | |||
65 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
66 | buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr); | ||
67 | if (buffer) | ||
68 | addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl); | ||
69 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
70 | |||
71 | return addr; | ||
72 | } | ||
73 | |||
74 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
75 | { | ||
76 | unsigned int i; | ||
77 | |||
78 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
79 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
80 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
81 | } | ||
82 | |||
83 | nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); | ||
84 | |||
85 | cde_ctx->convert_cmd = NULL; | ||
86 | cde_ctx->init_convert_cmd = NULL; | ||
87 | cde_ctx->num_bufs = 0; | ||
88 | cde_ctx->num_params = 0; | ||
89 | cde_ctx->init_cmd_num_entries = 0; | ||
90 | cde_ctx->convert_cmd_num_entries = 0; | ||
91 | cde_ctx->init_cmd_executed = false; | ||
92 | } | ||
93 | |||
94 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
95 | __must_hold(&cde_app->mutex) | ||
96 | { | ||
97 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
98 | struct gk20a *g = &l->g; | ||
99 | struct channel_gk20a *ch = cde_ctx->ch; | ||
100 | struct vm_gk20a *vm = ch->vm; | ||
101 | |||
102 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
103 | |||
104 | /* release mapped memory */ | ||
105 | gk20a_deinit_cde_img(cde_ctx); | ||
106 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
107 | cde_ctx->backing_store_vaddr); | ||
108 | |||
109 | /* free the channel */ | ||
110 | gk20a_channel_close(ch); | ||
111 | |||
112 | /* housekeeping on app */ | ||
113 | nvgpu_list_del(&cde_ctx->list); | ||
114 | l->cde_app.ctx_count--; | ||
115 | nvgpu_kfree(g, cde_ctx); | ||
116 | } | ||
117 | |||
118 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
119 | bool wait_finish) | ||
120 | __releases(&cde_app->mutex) | ||
121 | __acquires(&cde_app->mutex) | ||
122 | { | ||
123 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
124 | |||
125 | /* permanent contexts do not have deleter works */ | ||
126 | if (!cde_ctx->is_temporary) | ||
127 | return; | ||
128 | |||
129 | if (wait_finish) { | ||
130 | nvgpu_mutex_release(&cde_app->mutex); | ||
131 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
132 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
133 | } else { | ||
134 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
135 | } | ||
136 | } | ||
137 | |||
138 | static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) | ||
139 | __must_hold(&l->cde_app->mutex) | ||
140 | { | ||
141 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
142 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
143 | |||
144 | /* safe to go off the mutex in cancel_deleter since app is | ||
145 | * deinitialised; no new jobs are started. deleter works may be only at | ||
146 | * waiting for the mutex or before, going to abort */ | ||
147 | |||
148 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
149 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
150 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
151 | gk20a_cde_remove_ctx(cde_ctx); | ||
152 | } | ||
153 | |||
154 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
155 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
156 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
157 | gk20a_cde_remove_ctx(cde_ctx); | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static void gk20a_cde_stop(struct nvgpu_os_linux *l) | ||
162 | __must_hold(&l->cde_app->mutex) | ||
163 | { | ||
164 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
165 | |||
166 | /* prevent further conversions and delayed works from working */ | ||
167 | cde_app->initialised = false; | ||
168 | /* free all data, empty the list */ | ||
169 | gk20a_cde_remove_contexts(l); | ||
170 | } | ||
171 | |||
172 | void gk20a_cde_destroy(struct nvgpu_os_linux *l) | ||
173 | __acquires(&l->cde_app->mutex) | ||
174 | __releases(&l->cde_app->mutex) | ||
175 | { | ||
176 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
177 | |||
178 | if (!cde_app->initialised) | ||
179 | return; | ||
180 | |||
181 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
182 | gk20a_cde_stop(l); | ||
183 | nvgpu_mutex_release(&cde_app->mutex); | ||
184 | |||
185 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
186 | } | ||
187 | |||
188 | void gk20a_cde_suspend(struct nvgpu_os_linux *l) | ||
189 | __acquires(&l->cde_app->mutex) | ||
190 | __releases(&l->cde_app->mutex) | ||
191 | { | ||
192 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
193 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
194 | |||
195 | if (!cde_app->initialised) | ||
196 | return; | ||
197 | |||
198 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
199 | |||
200 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
201 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
202 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
203 | } | ||
204 | |||
205 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
206 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
207 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
208 | } | ||
209 | |||
210 | nvgpu_mutex_release(&cde_app->mutex); | ||
211 | |||
212 | } | ||
213 | |||
214 | static int gk20a_cde_create_context(struct nvgpu_os_linux *l) | ||
215 | __must_hold(&l->cde_app->mutex) | ||
216 | { | ||
217 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
218 | struct gk20a_cde_ctx *cde_ctx; | ||
219 | |||
220 | cde_ctx = gk20a_cde_allocate_context(l); | ||
221 | if (IS_ERR(cde_ctx)) | ||
222 | return PTR_ERR(cde_ctx); | ||
223 | |||
224 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
225 | cde_app->ctx_count++; | ||
226 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
227 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) | ||
233 | __must_hold(&l->cde_app->mutex) | ||
234 | { | ||
235 | int err; | ||
236 | int i; | ||
237 | |||
238 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
239 | err = gk20a_cde_create_context(l); | ||
240 | if (err) | ||
241 | goto out; | ||
242 | } | ||
243 | |||
244 | return 0; | ||
245 | out: | ||
246 | gk20a_cde_remove_contexts(l); | ||
247 | return err; | ||
248 | } | ||
249 | |||
250 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
251 | struct nvgpu_firmware *img, | ||
252 | struct gk20a_cde_hdr_buf *buf) | ||
253 | { | ||
254 | struct nvgpu_mem *mem; | ||
255 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
256 | struct gk20a *g = &l->g; | ||
257 | int err; | ||
258 | |||
259 | /* check that the file can hold the buf */ | ||
260 | if (buf->data_byte_offset != 0 && | ||
261 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
262 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
263 | cde_ctx->num_bufs); | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | |||
267 | /* check that we have enough buf elems available */ | ||
268 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
269 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
270 | cde_ctx->num_bufs); | ||
271 | return -ENOMEM; | ||
272 | } | ||
273 | |||
274 | /* allocate buf */ | ||
275 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
276 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
277 | if (err) { | ||
278 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
279 | cde_ctx->num_bufs); | ||
280 | return -ENOMEM; | ||
281 | } | ||
282 | |||
283 | /* copy the content */ | ||
284 | if (buf->data_byte_offset != 0) | ||
285 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
286 | buf->num_bytes); | ||
287 | |||
288 | cde_ctx->num_bufs++; | ||
289 | |||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
294 | int type, s32 shift, u64 mask, u64 value) | ||
295 | { | ||
296 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
297 | struct gk20a *g = &l->g; | ||
298 | u32 *target_mem_ptr = target; | ||
299 | u64 *target_mem_ptr_u64 = target; | ||
300 | u64 current_value, new_value; | ||
301 | |||
302 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
303 | value &= mask; | ||
304 | |||
305 | /* read current data from the location */ | ||
306 | current_value = 0; | ||
307 | if (type == TYPE_PARAM_TYPE_U32) { | ||
308 | if (mask != 0xfffffffful) | ||
309 | current_value = *target_mem_ptr; | ||
310 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
311 | if (mask != ~0ul) | ||
312 | current_value = *target_mem_ptr_u64; | ||
313 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
314 | current_value = *target_mem_ptr_u64; | ||
315 | current_value = (u64)(current_value >> 32) | | ||
316 | (u64)(current_value << 32); | ||
317 | } else { | ||
318 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
319 | type); | ||
320 | return -EINVAL; | ||
321 | } | ||
322 | |||
323 | current_value &= ~mask; | ||
324 | new_value = current_value | value; | ||
325 | |||
326 | /* store the element data back */ | ||
327 | if (type == TYPE_PARAM_TYPE_U32) | ||
328 | *target_mem_ptr = (u32)new_value; | ||
329 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
330 | *target_mem_ptr_u64 = new_value; | ||
331 | else { | ||
332 | new_value = (u64)(new_value >> 32) | | ||
333 | (u64)(new_value << 32); | ||
334 | *target_mem_ptr_u64 = new_value; | ||
335 | } | ||
336 | |||
337 | return 0; | ||
338 | } | ||
339 | |||
340 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
341 | struct nvgpu_firmware *img, | ||
342 | struct gk20a_cde_hdr_replace *replace) | ||
343 | { | ||
344 | struct nvgpu_mem *source_mem; | ||
345 | struct nvgpu_mem *target_mem; | ||
346 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
347 | struct gk20a *g = &l->g; | ||
348 | u32 *target_mem_ptr; | ||
349 | u64 vaddr; | ||
350 | int err; | ||
351 | |||
352 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
353 | replace->source_buf >= cde_ctx->num_bufs) { | ||
354 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
355 | replace->target_buf, replace->source_buf, | ||
356 | cde_ctx->num_bufs); | ||
357 | return -EINVAL; | ||
358 | } | ||
359 | |||
360 | source_mem = cde_ctx->mem + replace->source_buf; | ||
361 | target_mem = cde_ctx->mem + replace->target_buf; | ||
362 | target_mem_ptr = target_mem->cpu_va; | ||
363 | |||
364 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
365 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
366 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
367 | replace->target_byte_offset, | ||
368 | replace->source_byte_offset, | ||
369 | source_mem->size, | ||
370 | target_mem->size); | ||
371 | return -EINVAL; | ||
372 | } | ||
373 | |||
374 | /* calculate the target pointer */ | ||
375 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
376 | |||
377 | /* determine patch value */ | ||
378 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
379 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
380 | replace->shift, replace->mask, | ||
381 | vaddr); | ||
382 | if (err) { | ||
383 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
384 | err, replace->target_buf, | ||
385 | replace->target_byte_offset, | ||
386 | replace->source_buf, | ||
387 | replace->source_byte_offset); | ||
388 | } | ||
389 | |||
390 | return err; | ||
391 | } | ||
392 | |||
393 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
394 | { | ||
395 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
396 | struct gk20a *g = &l->g; | ||
397 | struct nvgpu_mem *target_mem; | ||
398 | u32 *target_mem_ptr; | ||
399 | u64 new_data; | ||
400 | int user_id = 0, err; | ||
401 | unsigned int i; | ||
402 | |||
403 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
404 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
405 | target_mem = cde_ctx->mem + param->target_buf; | ||
406 | target_mem_ptr = target_mem->cpu_va; | ||
407 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
408 | |||
409 | switch (param->id) { | ||
410 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
411 | new_data = g->gr.comptags_per_cacheline; | ||
412 | break; | ||
413 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
414 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
415 | g->gr.cacheline_size; | ||
416 | break; | ||
417 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
418 | new_data = cde_ctx->surf_param_offset; | ||
419 | break; | ||
420 | case TYPE_PARAM_NUMPAGES: | ||
421 | new_data = cde_ctx->surf_param_lines; | ||
422 | break; | ||
423 | case TYPE_PARAM_BACKINGSTORE: | ||
424 | new_data = cde_ctx->backing_store_vaddr; | ||
425 | break; | ||
426 | case TYPE_PARAM_DESTINATION: | ||
427 | new_data = cde_ctx->compbit_vaddr; | ||
428 | break; | ||
429 | case TYPE_PARAM_DESTINATION_SIZE: | ||
430 | new_data = cde_ctx->compbit_size; | ||
431 | break; | ||
432 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
433 | new_data = g->gr.compbit_store.mem.size; | ||
434 | break; | ||
435 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
436 | new_data = gpuva_to_iova_base(cde_ctx->vm, | ||
437 | cde_ctx->surf_vaddr); | ||
438 | if (new_data == 0) | ||
439 | return -EINVAL; | ||
440 | break; | ||
441 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
442 | new_data = g->gr.compbit_store.base_hw; | ||
443 | break; | ||
444 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
445 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
446 | break; | ||
447 | case TYPE_PARAM_SCATTERBUFFER: | ||
448 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
449 | break; | ||
450 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
451 | new_data = cde_ctx->scatterbuffer_size; | ||
452 | break; | ||
453 | default: | ||
454 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
455 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
456 | continue; | ||
457 | new_data = cde_ctx->user_param_values[user_id]; | ||
458 | } | ||
459 | |||
460 | gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
461 | i, param->id, param->target_buf, | ||
462 | param->target_byte_offset, new_data, | ||
463 | param->data_offset, param->type, param->shift, | ||
464 | param->mask); | ||
465 | |||
466 | new_data += param->data_offset; | ||
467 | |||
468 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
469 | param->shift, param->mask, new_data); | ||
470 | |||
471 | if (err) { | ||
472 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
473 | err, i, param->id, param->target_buf, | ||
474 | param->target_byte_offset, new_data); | ||
475 | return err; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
483 | struct nvgpu_firmware *img, | ||
484 | struct gk20a_cde_hdr_param *param) | ||
485 | { | ||
486 | struct nvgpu_mem *target_mem; | ||
487 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
488 | struct gk20a *g = &l->g; | ||
489 | |||
490 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
491 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
492 | cde_ctx->num_params, param->target_buf, | ||
493 | cde_ctx->num_bufs); | ||
494 | return -EINVAL; | ||
495 | } | ||
496 | |||
497 | target_mem = cde_ctx->mem + param->target_buf; | ||
498 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
499 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
500 | cde_ctx->num_params, param->target_byte_offset, | ||
501 | target_mem->size); | ||
502 | return -EINVAL; | ||
503 | } | ||
504 | |||
505 | /* does this parameter fit into our parameter structure */ | ||
506 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
507 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
508 | cde_ctx->num_params); | ||
509 | return -ENOMEM; | ||
510 | } | ||
511 | |||
512 | /* is the given id valid? */ | ||
513 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
514 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
515 | param->id, cde_ctx->num_params, | ||
516 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
517 | return -EINVAL; | ||
518 | } | ||
519 | |||
520 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
521 | cde_ctx->num_params++; | ||
522 | |||
523 | return 0; | ||
524 | } | ||
525 | |||
526 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
527 | struct nvgpu_firmware *img, | ||
528 | u32 required_class) | ||
529 | { | ||
530 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
531 | struct gk20a *g = &l->g; | ||
532 | int err; | ||
533 | |||
534 | /* CDE enabled */ | ||
535 | cde_ctx->ch->cde = true; | ||
536 | |||
537 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, required_class, 0); | ||
538 | if (err) { | ||
539 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
540 | err); | ||
541 | return err; | ||
542 | } | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
548 | struct nvgpu_firmware *img, | ||
549 | u32 op, | ||
550 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
551 | u32 num_elems) | ||
552 | { | ||
553 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
554 | struct gk20a *g = &l->g; | ||
555 | struct nvgpu_gpfifo **gpfifo, *gpfifo_elem; | ||
556 | u32 *num_entries; | ||
557 | unsigned int i; | ||
558 | |||
559 | /* check command type */ | ||
560 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
561 | gpfifo = &cde_ctx->init_convert_cmd; | ||
562 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
563 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
564 | gpfifo = &cde_ctx->convert_cmd; | ||
565 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
566 | } else { | ||
567 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
568 | op); | ||
569 | return -EINVAL; | ||
570 | } | ||
571 | |||
572 | /* allocate gpfifo entries to be pushed */ | ||
573 | *gpfifo = nvgpu_kzalloc(g, | ||
574 | sizeof(struct nvgpu_gpfifo) * num_elems); | ||
575 | if (!*gpfifo) { | ||
576 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
577 | return -ENOMEM; | ||
578 | } | ||
579 | |||
580 | gpfifo_elem = *gpfifo; | ||
581 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
582 | struct nvgpu_mem *target_mem; | ||
583 | |||
584 | /* validate the current entry */ | ||
585 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
586 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
587 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
588 | return -EINVAL; | ||
589 | } | ||
590 | |||
591 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
592 | if (target_mem->size< | ||
593 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
594 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
595 | target_mem->size, | ||
596 | cmd_elem->target_byte_offset, | ||
597 | cmd_elem->num_bytes); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | |||
601 | /* store the element into gpfifo */ | ||
602 | gpfifo_elem->entry0 = | ||
603 | u64_lo32(target_mem->gpu_va + | ||
604 | cmd_elem->target_byte_offset); | ||
605 | gpfifo_elem->entry1 = | ||
606 | u64_hi32(target_mem->gpu_va + | ||
607 | cmd_elem->target_byte_offset) | | ||
608 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
609 | sizeof(u32)); | ||
610 | } | ||
611 | |||
612 | *num_entries = num_elems; | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
617 | { | ||
618 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
619 | struct gk20a *g = &l->g; | ||
620 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
621 | sizeof(struct nvgpu_gpfifo); | ||
622 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
623 | sizeof(struct nvgpu_gpfifo); | ||
624 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
625 | struct nvgpu_gpfifo *combined_cmd; | ||
626 | |||
627 | /* allocate buffer that has space for both */ | ||
628 | combined_cmd = nvgpu_kzalloc(g, total_bytes); | ||
629 | if (!combined_cmd) { | ||
630 | nvgpu_warn(g, | ||
631 | "cde: could not allocate memory for gpfifo entries"); | ||
632 | return -ENOMEM; | ||
633 | } | ||
634 | |||
635 | /* move the original init here and append convert */ | ||
636 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
637 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
638 | cde_ctx->convert_cmd, conv_bytes); | ||
639 | |||
640 | nvgpu_kfree(g, cde_ctx->init_convert_cmd); | ||
641 | nvgpu_kfree(g, cde_ctx->convert_cmd); | ||
642 | |||
643 | cde_ctx->init_convert_cmd = combined_cmd; | ||
644 | cde_ctx->convert_cmd = combined_cmd | ||
645 | + cde_ctx->init_cmd_num_entries; | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
651 | struct nvgpu_firmware *img) | ||
652 | { | ||
653 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
654 | struct gk20a *g = &l->g; | ||
655 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
656 | u32 *data = (u32 *)img->data; | ||
657 | u32 num_of_elems; | ||
658 | struct gk20a_cde_hdr_elem *elem; | ||
659 | u32 min_size = 0; | ||
660 | int err = 0; | ||
661 | unsigned int i; | ||
662 | |||
663 | min_size += 2 * sizeof(u32); | ||
664 | if (img->size < min_size) { | ||
665 | nvgpu_warn(g, "cde: invalid image header"); | ||
666 | return -EINVAL; | ||
667 | } | ||
668 | |||
669 | cde_app->firmware_version = data[0]; | ||
670 | num_of_elems = data[1]; | ||
671 | |||
672 | min_size += num_of_elems * sizeof(*elem); | ||
673 | if (img->size < min_size) { | ||
674 | nvgpu_warn(g, "cde: bad image"); | ||
675 | return -EINVAL; | ||
676 | } | ||
677 | |||
678 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
679 | for (i = 0; i < num_of_elems; i++) { | ||
680 | int err = 0; | ||
681 | switch (elem->type) { | ||
682 | case TYPE_BUF: | ||
683 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
684 | break; | ||
685 | case TYPE_REPLACE: | ||
686 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
687 | &elem->replace); | ||
688 | break; | ||
689 | case TYPE_PARAM: | ||
690 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
691 | break; | ||
692 | case TYPE_REQUIRED_CLASS: | ||
693 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
694 | elem->required_class); | ||
695 | break; | ||
696 | case TYPE_COMMAND: | ||
697 | { | ||
698 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
699 | &img->data[elem->command.data_byte_offset]; | ||
700 | err = gk20a_init_cde_command(cde_ctx, img, | ||
701 | elem->command.op, cmd, | ||
702 | elem->command.num_entries); | ||
703 | break; | ||
704 | } | ||
705 | case TYPE_ARRAY: | ||
706 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
707 | elem->array.data, | ||
708 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
709 | break; | ||
710 | default: | ||
711 | nvgpu_warn(g, "cde: unknown header element"); | ||
712 | err = -EINVAL; | ||
713 | } | ||
714 | |||
715 | if (err) | ||
716 | goto deinit_image; | ||
717 | |||
718 | elem++; | ||
719 | } | ||
720 | |||
721 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
722 | nvgpu_warn(g, "cde: convert command not defined"); | ||
723 | err = -EINVAL; | ||
724 | goto deinit_image; | ||
725 | } | ||
726 | |||
727 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
728 | nvgpu_warn(g, "cde: convert command not defined"); | ||
729 | err = -EINVAL; | ||
730 | goto deinit_image; | ||
731 | } | ||
732 | |||
733 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
734 | if (err) | ||
735 | goto deinit_image; | ||
736 | |||
737 | return 0; | ||
738 | |||
739 | deinit_image: | ||
740 | gk20a_deinit_cde_img(cde_ctx); | ||
741 | return err; | ||
742 | } | ||
743 | |||
744 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
745 | u32 op, struct nvgpu_fence *fence, | ||
746 | u32 flags, struct gk20a_fence **fence_out) | ||
747 | { | ||
748 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
749 | struct gk20a *g = &l->g; | ||
750 | struct nvgpu_gpfifo *gpfifo = NULL; | ||
751 | int num_entries = 0; | ||
752 | |||
753 | /* check command type */ | ||
754 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
755 | /* both init and convert combined */ | ||
756 | gpfifo = cde_ctx->init_convert_cmd; | ||
757 | num_entries = cde_ctx->init_cmd_num_entries | ||
758 | + cde_ctx->convert_cmd_num_entries; | ||
759 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
760 | gpfifo = cde_ctx->convert_cmd; | ||
761 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
762 | } else { | ||
763 | nvgpu_warn(g, "cde: unknown buffer"); | ||
764 | return -EINVAL; | ||
765 | } | ||
766 | |||
767 | if (gpfifo == NULL || num_entries == 0) { | ||
768 | nvgpu_warn(g, "cde: buffer not available"); | ||
769 | return -ENOSYS; | ||
770 | } | ||
771 | |||
772 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, | ||
773 | num_entries, flags, fence, fence_out, true, | ||
774 | NULL); | ||
775 | } | ||
776 | |||
777 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
778 | __acquires(&cde_app->mutex) | ||
779 | __releases(&cde_app->mutex) | ||
780 | { | ||
781 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
782 | |||
783 | gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
784 | trace_gk20a_cde_release(cde_ctx); | ||
785 | |||
786 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
787 | |||
788 | if (cde_ctx->in_use) { | ||
789 | cde_ctx->in_use = false; | ||
790 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
791 | cde_app->ctx_usecount--; | ||
792 | } else { | ||
793 | gk20a_dbg_info("double release cde context %p", cde_ctx); | ||
794 | } | ||
795 | |||
796 | nvgpu_mutex_release(&cde_app->mutex); | ||
797 | } | ||
798 | |||
799 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
800 | __acquires(&cde_app->mutex) | ||
801 | __releases(&cde_app->mutex) | ||
802 | { | ||
803 | struct delayed_work *delay_work = to_delayed_work(work); | ||
804 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
805 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
806 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
807 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
808 | struct gk20a *g = &l->g; | ||
809 | int err; | ||
810 | |||
811 | /* someone has just taken it? engine deletion started? */ | ||
812 | if (cde_ctx->in_use || !cde_app->initialised) | ||
813 | return; | ||
814 | |||
815 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
816 | "cde: attempting to delete temporary %p", cde_ctx); | ||
817 | |||
818 | err = gk20a_busy(g); | ||
819 | if (err) { | ||
820 | /* this context would find new use anyway later, so not freeing | ||
821 | * here does not leak anything */ | ||
822 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
823 | " temp ctx deletion"); | ||
824 | return; | ||
825 | } | ||
826 | |||
827 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
828 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
829 | gk20a_dbg(gpu_dbg_cde_ctx, | ||
830 | "cde: context use raced, not deleting %p", | ||
831 | cde_ctx); | ||
832 | goto out; | ||
833 | } | ||
834 | |||
835 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
836 | "double pending %p", cde_ctx); | ||
837 | |||
838 | gk20a_cde_remove_ctx(cde_ctx); | ||
839 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
840 | "cde: destroyed %p count=%d use=%d max=%d", | ||
841 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
842 | cde_app->ctx_count_top); | ||
843 | |||
844 | out: | ||
845 | nvgpu_mutex_release(&cde_app->mutex); | ||
846 | gk20a_idle(g); | ||
847 | } | ||
848 | |||
849 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) | ||
850 | __must_hold(&cde_app->mutex) | ||
851 | { | ||
852 | struct gk20a *g = &l->g; | ||
853 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
854 | struct gk20a_cde_ctx *cde_ctx; | ||
855 | |||
856 | /* exhausted? */ | ||
857 | |||
858 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
859 | return ERR_PTR(-EAGAIN); | ||
860 | |||
861 | /* idle context available? */ | ||
862 | |||
863 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
864 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
865 | gk20a_cde_ctx, list); | ||
866 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
867 | "cde: got free %p count=%d use=%d max=%d", | ||
868 | cde_ctx, cde_app->ctx_count, | ||
869 | cde_app->ctx_usecount, | ||
870 | cde_app->ctx_count_top); | ||
871 | trace_gk20a_cde_get_context(cde_ctx); | ||
872 | |||
873 | /* deleter work may be scheduled, but in_use prevents it */ | ||
874 | cde_ctx->in_use = true; | ||
875 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
876 | cde_app->ctx_usecount++; | ||
877 | |||
878 | /* cancel any deletions now that ctx is in use */ | ||
879 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
880 | return cde_ctx; | ||
881 | } | ||
882 | |||
883 | /* no free contexts, get a temporary one */ | ||
884 | |||
885 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
886 | "cde: no free contexts, count=%d", | ||
887 | cde_app->ctx_count); | ||
888 | |||
889 | cde_ctx = gk20a_cde_allocate_context(l); | ||
890 | if (IS_ERR(cde_ctx)) { | ||
891 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
892 | PTR_ERR(cde_ctx)); | ||
893 | return cde_ctx; | ||
894 | } | ||
895 | |||
896 | trace_gk20a_cde_get_context(cde_ctx); | ||
897 | cde_ctx->in_use = true; | ||
898 | cde_ctx->is_temporary = true; | ||
899 | cde_app->ctx_usecount++; | ||
900 | cde_app->ctx_count++; | ||
901 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
902 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
903 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
904 | |||
905 | return cde_ctx; | ||
906 | } | ||
907 | |||
908 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) | ||
909 | __releases(&cde_app->mutex) | ||
910 | __acquires(&cde_app->mutex) | ||
911 | { | ||
912 | struct gk20a *g = &l->g; | ||
913 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
914 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
915 | struct nvgpu_timeout timeout; | ||
916 | |||
917 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
918 | NVGPU_TIMER_CPU_TIMER); | ||
919 | |||
920 | do { | ||
921 | cde_ctx = gk20a_cde_do_get_context(l); | ||
922 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
923 | break; | ||
924 | |||
925 | /* exhausted, retry */ | ||
926 | nvgpu_mutex_release(&cde_app->mutex); | ||
927 | cond_resched(); | ||
928 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
929 | } while (!nvgpu_timeout_expired(&timeout)); | ||
930 | |||
931 | return cde_ctx; | ||
932 | } | ||
933 | |||
934 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) | ||
935 | { | ||
936 | struct gk20a *g = &l->g; | ||
937 | struct gk20a_cde_ctx *cde_ctx; | ||
938 | int ret; | ||
939 | |||
940 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
941 | if (!cde_ctx) | ||
942 | return ERR_PTR(-ENOMEM); | ||
943 | |||
944 | cde_ctx->l = l; | ||
945 | cde_ctx->dev = dev_from_gk20a(g); | ||
946 | |||
947 | ret = gk20a_cde_load(cde_ctx); | ||
948 | if (ret) { | ||
949 | nvgpu_kfree(g, cde_ctx); | ||
950 | return ERR_PTR(ret); | ||
951 | } | ||
952 | |||
953 | nvgpu_init_list_node(&cde_ctx->list); | ||
954 | cde_ctx->is_temporary = false; | ||
955 | cde_ctx->in_use = false; | ||
956 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
957 | gk20a_cde_ctx_deleter_fn); | ||
958 | |||
959 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
960 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
961 | return cde_ctx; | ||
962 | } | ||
963 | |||
964 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
965 | struct dma_buf *compbits_scatter_buf, | ||
966 | u64 compbits_byte_offset, | ||
967 | u64 scatterbuffer_byte_offset, | ||
968 | struct nvgpu_fence *fence, | ||
969 | u32 __flags, struct gk20a_cde_param *params, | ||
970 | int num_params, struct gk20a_fence **fence_out) | ||
971 | __acquires(&l->cde_app->mutex) | ||
972 | __releases(&l->cde_app->mutex) | ||
973 | { | ||
974 | struct gk20a *g = &l->g; | ||
975 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
976 | struct gk20a_comptags comptags; | ||
977 | struct nvgpu_os_buffer os_buf = { | ||
978 | compbits_scatter_buf, | ||
979 | dev_from_gk20a(g) | ||
980 | }; | ||
981 | u64 mapped_compbits_offset = 0; | ||
982 | u64 compbits_size = 0; | ||
983 | u64 mapped_scatterbuffer_offset = 0; | ||
984 | u64 scatterbuffer_size = 0; | ||
985 | u64 map_vaddr = 0; | ||
986 | u64 map_offset = 0; | ||
987 | u64 map_size = 0; | ||
988 | u8 *surface = NULL; | ||
989 | u64 big_page_mask = 0; | ||
990 | u32 flags; | ||
991 | int err, i; | ||
992 | const s16 compbits_kind = 0; | ||
993 | |||
994 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
995 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
996 | |||
997 | /* scatter buffer must be after compbits buffer */ | ||
998 | if (scatterbuffer_byte_offset && | ||
999 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
1000 | return -EINVAL; | ||
1001 | |||
1002 | err = gk20a_busy(g); | ||
1003 | if (err) | ||
1004 | return err; | ||
1005 | |||
1006 | nvgpu_mutex_acquire(&l->cde_app.mutex); | ||
1007 | cde_ctx = gk20a_cde_get_context(l); | ||
1008 | nvgpu_mutex_release(&l->cde_app.mutex); | ||
1009 | if (IS_ERR(cde_ctx)) { | ||
1010 | err = PTR_ERR(cde_ctx); | ||
1011 | goto exit_idle; | ||
1012 | } | ||
1013 | |||
1014 | /* First, map the buffer to local va */ | ||
1015 | |||
1016 | /* ensure that the compbits buffer has drvdata */ | ||
1017 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
1018 | dev_from_gk20a(g)); | ||
1019 | if (err) | ||
1020 | goto exit_idle; | ||
1021 | |||
1022 | /* compbits don't start at page aligned offset, so we need to align | ||
1023 | the region to be mapped */ | ||
1024 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
1025 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
1026 | map_size = compbits_scatter_buf->size - map_offset; | ||
1027 | |||
1028 | |||
1029 | /* compute compbit start offset from the beginning of the mapped | ||
1030 | area */ | ||
1031 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
1032 | if (scatterbuffer_byte_offset) { | ||
1033 | compbits_size = scatterbuffer_byte_offset - | ||
1034 | compbits_byte_offset; | ||
1035 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
1036 | map_offset; | ||
1037 | scatterbuffer_size = compbits_scatter_buf->size - | ||
1038 | scatterbuffer_byte_offset; | ||
1039 | } else { | ||
1040 | compbits_size = compbits_scatter_buf->size - | ||
1041 | compbits_byte_offset; | ||
1042 | } | ||
1043 | |||
1044 | gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
1045 | map_offset, map_size); | ||
1046 | gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
1047 | mapped_compbits_offset, compbits_size); | ||
1048 | gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
1049 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
1050 | |||
1051 | |||
1052 | /* map the destination buffer */ | ||
1053 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map_linux */ | ||
1054 | err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0, | ||
1055 | NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE | | ||
1056 | NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL, | ||
1057 | NV_KIND_INVALID, | ||
1058 | compbits_kind, /* incompressible kind */ | ||
1059 | gk20a_mem_flag_none, | ||
1060 | map_offset, map_size, | ||
1061 | NULL, | ||
1062 | &map_vaddr); | ||
1063 | if (err) { | ||
1064 | dma_buf_put(compbits_scatter_buf); | ||
1065 | err = -EINVAL; | ||
1066 | goto exit_idle; | ||
1067 | } | ||
1068 | |||
1069 | if (scatterbuffer_byte_offset && | ||
1070 | l->ops.cde.need_scatter_buffer && | ||
1071 | l->ops.cde.need_scatter_buffer(g)) { | ||
1072 | struct sg_table *sgt; | ||
1073 | void *scatter_buffer; | ||
1074 | |||
1075 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1076 | if (IS_ERR(surface)) { | ||
1077 | nvgpu_warn(g, | ||
1078 | "dma_buf_vmap failed"); | ||
1079 | err = -EINVAL; | ||
1080 | goto exit_unmap_vaddr; | ||
1081 | } | ||
1082 | |||
1083 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1084 | |||
1085 | gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1086 | surface, scatter_buffer); | ||
1087 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf); | ||
1088 | if (IS_ERR(sgt)) { | ||
1089 | nvgpu_warn(g, | ||
1090 | "mm_pin failed"); | ||
1091 | err = -EINVAL; | ||
1092 | goto exit_unmap_surface; | ||
1093 | } else { | ||
1094 | err = l->ops.cde.populate_scatter_buffer(g, sgt, | ||
1095 | compbits_byte_offset, scatter_buffer, | ||
1096 | scatterbuffer_size); | ||
1097 | WARN_ON(err); | ||
1098 | |||
1099 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1100 | sgt); | ||
1101 | if (err) | ||
1102 | goto exit_unmap_surface; | ||
1103 | } | ||
1104 | |||
1105 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1106 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1107 | surface = NULL; | ||
1108 | } | ||
1109 | |||
1110 | /* store source buffer compression tags */ | ||
1111 | gk20a_get_comptags(&os_buf, &comptags); | ||
1112 | cde_ctx->surf_param_offset = comptags.offset; | ||
1113 | cde_ctx->surf_param_lines = comptags.lines; | ||
1114 | |||
1115 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
1116 | compbits live in the same surface, and we can get the alloc base | ||
1117 | address by using gpuva_to_iova_base, this will do */ | ||
1118 | cde_ctx->surf_vaddr = map_vaddr; | ||
1119 | |||
1120 | /* store information about destination */ | ||
1121 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
1122 | cde_ctx->compbit_size = compbits_size; | ||
1123 | |||
1124 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1125 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1126 | |||
1127 | /* remove existing argument data */ | ||
1128 | memset(cde_ctx->user_param_values, 0, | ||
1129 | sizeof(cde_ctx->user_param_values)); | ||
1130 | |||
1131 | /* read user space arguments for the conversion */ | ||
1132 | for (i = 0; i < num_params; i++) { | ||
1133 | struct gk20a_cde_param *param = params + i; | ||
1134 | int id = param->id - NUM_RESERVED_PARAMS; | ||
1135 | |||
1136 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
1137 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
1138 | err = -EINVAL; | ||
1139 | goto exit_unmap_surface; | ||
1140 | } | ||
1141 | cde_ctx->user_param_values[id] = param->value; | ||
1142 | } | ||
1143 | |||
1144 | /* patch data */ | ||
1145 | err = gk20a_cde_patch_params(cde_ctx); | ||
1146 | if (err) { | ||
1147 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
1148 | goto exit_unmap_surface; | ||
1149 | } | ||
1150 | |||
1151 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
1152 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
1153 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
1154 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
1155 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1156 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1157 | |||
1158 | /* take always the postfence as it is needed for protecting the | ||
1159 | * cde context */ | ||
1160 | flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
1161 | |||
1162 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
1163 | gk20a_idle(g); | ||
1164 | |||
1165 | /* execute the conversion buffer, combined with init first if it's the | ||
1166 | * first time */ | ||
1167 | err = gk20a_cde_execute_buffer(cde_ctx, | ||
1168 | cde_ctx->init_cmd_executed | ||
1169 | ? TYPE_BUF_COMMAND_CONVERT | ||
1170 | : TYPE_BUF_COMMAND_INIT, | ||
1171 | fence, flags, fence_out); | ||
1172 | |||
1173 | cde_ctx->init_cmd_executed = true; | ||
1174 | |||
1175 | /* unmap the buffers - channel holds references to them now */ | ||
1176 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1177 | |||
1178 | return err; | ||
1179 | |||
1180 | exit_unmap_surface: | ||
1181 | if (surface) | ||
1182 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1183 | exit_unmap_vaddr: | ||
1184 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL); | ||
1185 | exit_idle: | ||
1186 | gk20a_idle(g); | ||
1187 | return err; | ||
1188 | } | ||
1189 | |||
1190 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
1191 | __acquires(&cde_app->mutex) | ||
1192 | __releases(&cde_app->mutex) | ||
1193 | { | ||
1194 | struct gk20a_cde_ctx *cde_ctx = data; | ||
1195 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1196 | struct gk20a *g = &l->g; | ||
1197 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1198 | bool channel_idle; | ||
1199 | |||
1200 | channel_gk20a_joblist_lock(ch); | ||
1201 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
1202 | channel_gk20a_joblist_unlock(ch); | ||
1203 | |||
1204 | if (!channel_idle) | ||
1205 | return; | ||
1206 | |||
1207 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
1208 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
1209 | if (!cde_ctx->in_use) | ||
1210 | gk20a_dbg_info("double finish cde context %p on channel %p", | ||
1211 | cde_ctx, ch); | ||
1212 | |||
1213 | if (ch->has_timedout) { | ||
1214 | if (cde_ctx->is_temporary) { | ||
1215 | nvgpu_warn(g, | ||
1216 | "cde: channel had timed out" | ||
1217 | " (temporary channel)"); | ||
1218 | /* going to be deleted anyway */ | ||
1219 | } else { | ||
1220 | nvgpu_warn(g, | ||
1221 | "cde: channel had timed out" | ||
1222 | ", reloading"); | ||
1223 | /* mark it to be deleted, replace with a new one */ | ||
1224 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1225 | cde_ctx->is_temporary = true; | ||
1226 | if (gk20a_cde_create_context(l)) { | ||
1227 | nvgpu_err(g, "cde: can't replace context"); | ||
1228 | } | ||
1229 | nvgpu_mutex_release(&cde_app->mutex); | ||
1230 | } | ||
1231 | } | ||
1232 | |||
1233 | /* delete temporary contexts later (watch for doubles) */ | ||
1234 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
1235 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
1236 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
1237 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
1238 | } | ||
1239 | |||
1240 | if (!ch->has_timedout) | ||
1241 | gk20a_cde_ctx_release(cde_ctx); | ||
1242 | } | ||
1243 | |||
1244 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
1245 | { | ||
1246 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1247 | struct gk20a *g = &l->g; | ||
1248 | struct nvgpu_firmware *img; | ||
1249 | struct channel_gk20a *ch; | ||
1250 | struct gr_gk20a *gr = &g->gr; | ||
1251 | int err = 0; | ||
1252 | u64 vaddr; | ||
1253 | |||
1254 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
1255 | if (!img) { | ||
1256 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
1257 | return -ENOSYS; | ||
1258 | } | ||
1259 | |||
1260 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
1261 | cde_ctx, | ||
1262 | -1, | ||
1263 | false); | ||
1264 | if (!ch) { | ||
1265 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
1266 | err = -ENOMEM; | ||
1267 | goto err_get_gk20a_channel; | ||
1268 | } | ||
1269 | |||
1270 | /* bind the channel to the vm */ | ||
1271 | err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch); | ||
1272 | if (err) { | ||
1273 | nvgpu_warn(g, "cde: could not bind vm"); | ||
1274 | goto err_commit_va; | ||
1275 | } | ||
1276 | |||
1277 | /* allocate gpfifo (1024 should be more than enough) */ | ||
1278 | err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0); | ||
1279 | if (err) { | ||
1280 | nvgpu_warn(g, "cde: unable to allocate gpfifo"); | ||
1281 | goto err_alloc_gpfifo; | ||
1282 | } | ||
1283 | |||
1284 | /* map backing store to gpu virtual space */ | ||
1285 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
1286 | g->gr.compbit_store.mem.size, | ||
1287 | NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE, | ||
1288 | gk20a_mem_flag_read_only, | ||
1289 | false, | ||
1290 | gr->compbit_store.mem.aperture); | ||
1291 | |||
1292 | if (!vaddr) { | ||
1293 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
1294 | err = -ENOMEM; | ||
1295 | goto err_map_backingstore; | ||
1296 | } | ||
1297 | |||
1298 | /* store initialisation data */ | ||
1299 | cde_ctx->ch = ch; | ||
1300 | cde_ctx->vm = ch->vm; | ||
1301 | cde_ctx->backing_store_vaddr = vaddr; | ||
1302 | |||
1303 | /* initialise the firmware */ | ||
1304 | err = gk20a_init_cde_img(cde_ctx, img); | ||
1305 | if (err) { | ||
1306 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
1307 | goto err_init_cde_img; | ||
1308 | } | ||
1309 | |||
1310 | /* initialisation done */ | ||
1311 | nvgpu_release_firmware(g, img); | ||
1312 | |||
1313 | return 0; | ||
1314 | |||
1315 | err_init_cde_img: | ||
1316 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
1317 | err_map_backingstore: | ||
1318 | err_alloc_gpfifo: | ||
1319 | nvgpu_vm_put(ch->vm); | ||
1320 | err_commit_va: | ||
1321 | err_get_gk20a_channel: | ||
1322 | nvgpu_release_firmware(g, img); | ||
1323 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
1324 | return err; | ||
1325 | } | ||
1326 | |||
1327 | int gk20a_cde_reload(struct nvgpu_os_linux *l) | ||
1328 | __acquires(&l->cde_app->mutex) | ||
1329 | __releases(&l->cde_app->mutex) | ||
1330 | { | ||
1331 | struct gk20a *g = &l->g; | ||
1332 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1333 | int err; | ||
1334 | |||
1335 | if (!cde_app->initialised) | ||
1336 | return -ENOSYS; | ||
1337 | |||
1338 | err = gk20a_busy(g); | ||
1339 | if (err) | ||
1340 | return err; | ||
1341 | |||
1342 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1343 | |||
1344 | gk20a_cde_stop(l); | ||
1345 | |||
1346 | err = gk20a_cde_create_contexts(l); | ||
1347 | if (!err) | ||
1348 | cde_app->initialised = true; | ||
1349 | |||
1350 | nvgpu_mutex_release(&cde_app->mutex); | ||
1351 | |||
1352 | gk20a_idle(g); | ||
1353 | return err; | ||
1354 | } | ||
1355 | |||
1356 | int gk20a_init_cde_support(struct nvgpu_os_linux *l) | ||
1357 | __acquires(&cde_app->mutex) | ||
1358 | __releases(&cde_app->mutex) | ||
1359 | { | ||
1360 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1361 | int err; | ||
1362 | |||
1363 | if (cde_app->initialised) | ||
1364 | return 0; | ||
1365 | |||
1366 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1367 | |||
1368 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
1369 | if (err) | ||
1370 | return err; | ||
1371 | |||
1372 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1373 | |||
1374 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
1375 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
1376 | cde_app->ctx_count = 0; | ||
1377 | cde_app->ctx_count_top = 0; | ||
1378 | cde_app->ctx_usecount = 0; | ||
1379 | |||
1380 | err = gk20a_cde_create_contexts(l); | ||
1381 | if (!err) | ||
1382 | cde_app->initialised = true; | ||
1383 | |||
1384 | nvgpu_mutex_release(&cde_app->mutex); | ||
1385 | gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
1386 | |||
1387 | if (err) | ||
1388 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
1389 | |||
1390 | return err; | ||
1391 | } | ||
1392 | |||
1393 | enum cde_launch_patch_id { | ||
1394 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
1395 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
1396 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
1397 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
1398 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
1399 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
1400 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
1401 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
1402 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
1403 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
1404 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
1405 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
1406 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
1407 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
1408 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
1409 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
1410 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
1411 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
1412 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
1413 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
1414 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
1415 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
1416 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
1417 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
1418 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
1419 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
1420 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
1421 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
1422 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
1423 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
1424 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
1425 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
1426 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
1427 | }; | ||
1428 | |||
1429 | /* maximum number of WRITE_PATCHes in the below function */ | ||
1430 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
1431 | |||
1432 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
1433 | struct nvgpu_os_linux *l, | ||
1434 | struct dma_buf *dmabuf, u32 consumer, | ||
1435 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1436 | u64 scatterbuffer_offset, | ||
1437 | u32 width, u32 height, u32 block_height_log2, | ||
1438 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1439 | struct gk20a_buffer_state *state) | ||
1440 | { | ||
1441 | struct gk20a *g = &l->g; | ||
1442 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1443 | int param = 0; | ||
1444 | int err = 0; | ||
1445 | struct gk20a_fence *new_fence = NULL; | ||
1446 | const int wgx = 8; | ||
1447 | const int wgy = 8; | ||
1448 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1449 | const int xalign = compbits_per_byte * wgx; | ||
1450 | const int yalign = wgy; | ||
1451 | |||
1452 | /* Compute per launch parameters */ | ||
1453 | const int xtiles = (width + 7) >> 3; | ||
1454 | const int ytiles = (height + 7) >> 3; | ||
1455 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
1456 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
1457 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
1458 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
1459 | const int xblocks = (xtiles + 1) >> 1; | ||
1460 | const int voffset = compbits_voffset - compbits_hoffset; | ||
1461 | |||
1462 | int hprog = -1; | ||
1463 | int vprog = -1; | ||
1464 | |||
1465 | if (l->ops.cde.get_program_numbers) | ||
1466 | l->ops.cde.get_program_numbers(g, block_height_log2, | ||
1467 | l->cde_app.shader_parameter, | ||
1468 | &hprog, &vprog); | ||
1469 | else { | ||
1470 | nvgpu_warn(g, "cde: chip not supported"); | ||
1471 | return -ENOSYS; | ||
1472 | } | ||
1473 | |||
1474 | if (hprog < 0 || vprog < 0) { | ||
1475 | nvgpu_warn(g, "cde: could not determine programs"); | ||
1476 | return -ENOSYS; | ||
1477 | } | ||
1478 | |||
1479 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
1480 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1481 | xtiles, ytiles); | ||
1482 | |||
1483 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
1484 | width, height, block_height_log2, | ||
1485 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
1486 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1487 | width, height, xtiles, ytiles); | ||
1488 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
1489 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
1490 | gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
1491 | hprog, | ||
1492 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
1493 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
1494 | vprog, | ||
1495 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
1496 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1497 | |||
1498 | /* Write parameters */ | ||
1499 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1500 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1501 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
1502 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1503 | block_height_log2); | ||
1504 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1505 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1506 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1507 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1508 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1509 | |||
1510 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
1511 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
1512 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
1513 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
1514 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
1515 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1516 | |||
1517 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
1518 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
1519 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
1520 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
1521 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
1522 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1523 | |||
1524 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
1525 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
1526 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
1527 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
1528 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
1529 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
1530 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
1531 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1532 | |||
1533 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
1534 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1535 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1536 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1537 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1538 | } else { | ||
1539 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1540 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1541 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1542 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1543 | } | ||
1544 | |||
1545 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
1546 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1547 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1548 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1549 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1550 | } else { | ||
1551 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1552 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1553 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1554 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1555 | } | ||
1556 | #undef WRITE_PATCH | ||
1557 | |||
1558 | err = gk20a_cde_convert(l, dmabuf, | ||
1559 | compbits_hoffset, | ||
1560 | scatterbuffer_offset, | ||
1561 | fence_in, submit_flags, | ||
1562 | params, param, &new_fence); | ||
1563 | if (err) | ||
1564 | goto out; | ||
1565 | |||
1566 | /* compbits generated, update state & fence */ | ||
1567 | gk20a_fence_put(state->fence); | ||
1568 | state->fence = new_fence; | ||
1569 | state->valid_compbits |= consumer & | ||
1570 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1571 | out: | ||
1572 | return err; | ||
1573 | } | ||
1574 | |||
1575 | static int gk20a_buffer_convert_gpu_to_cde( | ||
1576 | struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, | ||
1577 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1578 | u64 scatterbuffer_offset, | ||
1579 | u32 width, u32 height, u32 block_height_log2, | ||
1580 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1581 | struct gk20a_buffer_state *state) | ||
1582 | { | ||
1583 | struct gk20a *g = &l->g; | ||
1584 | int err = 0; | ||
1585 | |||
1586 | if (!l->cde_app.initialised) | ||
1587 | return -ENOSYS; | ||
1588 | |||
1589 | gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", | ||
1590 | l->cde_app.firmware_version); | ||
1591 | |||
1592 | if (l->cde_app.firmware_version == 1) { | ||
1593 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
1594 | l, dmabuf, consumer, offset, compbits_hoffset, | ||
1595 | compbits_voffset, scatterbuffer_offset, | ||
1596 | width, height, block_height_log2, | ||
1597 | submit_flags, fence_in, state); | ||
1598 | } else { | ||
1599 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
1600 | l->cde_app.firmware_version); | ||
1601 | err = -EINVAL; | ||
1602 | } | ||
1603 | |||
1604 | return err; | ||
1605 | } | ||
1606 | |||
1607 | int gk20a_prepare_compressible_read( | ||
1608 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
1609 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1610 | u64 scatterbuffer_offset, | ||
1611 | u32 width, u32 height, u32 block_height_log2, | ||
1612 | u32 submit_flags, struct nvgpu_fence *fence, | ||
1613 | u32 *valid_compbits, u32 *zbc_color, | ||
1614 | struct gk20a_fence **fence_out) | ||
1615 | { | ||
1616 | struct gk20a *g = &l->g; | ||
1617 | int err = 0; | ||
1618 | struct gk20a_buffer_state *state; | ||
1619 | struct dma_buf *dmabuf; | ||
1620 | u32 missing_bits; | ||
1621 | |||
1622 | dmabuf = dma_buf_get(buffer_fd); | ||
1623 | if (IS_ERR(dmabuf)) | ||
1624 | return -EINVAL; | ||
1625 | |||
1626 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1627 | if (err) { | ||
1628 | dma_buf_put(dmabuf); | ||
1629 | return err; | ||
1630 | } | ||
1631 | |||
1632 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1633 | |||
1634 | nvgpu_mutex_acquire(&state->lock); | ||
1635 | |||
1636 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
1637 | |||
1638 | gk20a_fence_put(state->fence); | ||
1639 | state->fence = NULL; | ||
1640 | /* state->fence = decompress(); | ||
1641 | state->valid_compbits = 0; */ | ||
1642 | err = -EINVAL; | ||
1643 | goto out; | ||
1644 | } else if (missing_bits) { | ||
1645 | u32 missing_cde_bits = missing_bits & | ||
1646 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1647 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
1648 | missing_cde_bits) { | ||
1649 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1650 | l, dmabuf, | ||
1651 | missing_cde_bits, | ||
1652 | offset, compbits_hoffset, | ||
1653 | compbits_voffset, scatterbuffer_offset, | ||
1654 | width, height, block_height_log2, | ||
1655 | submit_flags, fence, | ||
1656 | state); | ||
1657 | if (err) | ||
1658 | goto out; | ||
1659 | } | ||
1660 | } | ||
1661 | |||
1662 | if (state->fence && fence_out) | ||
1663 | *fence_out = gk20a_fence_get(state->fence); | ||
1664 | |||
1665 | if (valid_compbits) | ||
1666 | *valid_compbits = state->valid_compbits; | ||
1667 | |||
1668 | if (zbc_color) | ||
1669 | *zbc_color = state->zbc_color; | ||
1670 | |||
1671 | out: | ||
1672 | nvgpu_mutex_release(&state->lock); | ||
1673 | dma_buf_put(dmabuf); | ||
1674 | return err; | ||
1675 | } | ||
1676 | |||
1677 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1678 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
1679 | { | ||
1680 | int err; | ||
1681 | struct gk20a_buffer_state *state; | ||
1682 | struct dma_buf *dmabuf; | ||
1683 | |||
1684 | dmabuf = dma_buf_get(buffer_fd); | ||
1685 | if (IS_ERR(dmabuf)) { | ||
1686 | nvgpu_err(g, "invalid dmabuf"); | ||
1687 | return -EINVAL; | ||
1688 | } | ||
1689 | |||
1690 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1691 | if (err) { | ||
1692 | nvgpu_err(g, "could not get state from dmabuf"); | ||
1693 | dma_buf_put(dmabuf); | ||
1694 | return err; | ||
1695 | } | ||
1696 | |||
1697 | nvgpu_mutex_acquire(&state->lock); | ||
1698 | |||
1699 | /* Update the compbits state. */ | ||
1700 | state->valid_compbits = valid_compbits; | ||
1701 | state->zbc_color = zbc_color; | ||
1702 | |||
1703 | /* Discard previous compbit job fence. */ | ||
1704 | gk20a_fence_put(state->fence); | ||
1705 | state->fence = NULL; | ||
1706 | |||
1707 | nvgpu_mutex_release(&state->lock); | ||
1708 | dma_buf_put(dmabuf); | ||
1709 | return 0; | ||
1710 | } | ||