diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/cde.c | 1693 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/cde.h | 309 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug_cde.c | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 14 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/os_linux.h | 2 |
6 files changed, 2025 insertions, 10 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c new file mode 100644 index 00000000..5b0fb910 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/cde.c | |||
@@ -0,0 +1,1693 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dma-mapping.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | |||
23 | #include <trace/events/gk20a.h> | ||
24 | |||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/gmmu.h> | ||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/nvgpu_common.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/log.h> | ||
31 | #include <nvgpu/bug.h> | ||
32 | #include <nvgpu/firmware.h> | ||
33 | |||
34 | #include "gk20a/gk20a.h" | ||
35 | #include "gk20a/channel_gk20a.h" | ||
36 | #include "gk20a/mm_gk20a.h" | ||
37 | #include "gk20a/fence_gk20a.h" | ||
38 | #include "gk20a/gr_gk20a.h" | ||
39 | |||
40 | #include "cde.h" | ||
41 | #include "os_linux.h" | ||
42 | |||
43 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
44 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
45 | |||
46 | /* | ||
47 | * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the | ||
48 | * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem. | ||
49 | */ | ||
50 | #include "common/linux/vm_priv.h" | ||
51 | |||
52 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
53 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); | ||
54 | |||
55 | #define CTX_DELETE_TIME 1000 | ||
56 | |||
57 | #define MAX_CTX_USE_COUNT 42 | ||
58 | #define MAX_CTX_RETRY_TIME 2000 | ||
59 | |||
60 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
61 | { | ||
62 | unsigned int i; | ||
63 | |||
64 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
65 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
66 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
67 | } | ||
68 | |||
69 | nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); | ||
70 | |||
71 | cde_ctx->convert_cmd = NULL; | ||
72 | cde_ctx->init_convert_cmd = NULL; | ||
73 | cde_ctx->num_bufs = 0; | ||
74 | cde_ctx->num_params = 0; | ||
75 | cde_ctx->init_cmd_num_entries = 0; | ||
76 | cde_ctx->convert_cmd_num_entries = 0; | ||
77 | cde_ctx->init_cmd_executed = false; | ||
78 | } | ||
79 | |||
80 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
81 | __must_hold(&cde_app->mutex) | ||
82 | { | ||
83 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
84 | struct gk20a *g = &l->g; | ||
85 | struct channel_gk20a *ch = cde_ctx->ch; | ||
86 | struct vm_gk20a *vm = ch->vm; | ||
87 | |||
88 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
89 | |||
90 | /* release mapped memory */ | ||
91 | gk20a_deinit_cde_img(cde_ctx); | ||
92 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
93 | cde_ctx->backing_store_vaddr); | ||
94 | |||
95 | /* free the channel */ | ||
96 | gk20a_channel_close(ch); | ||
97 | |||
98 | /* housekeeping on app */ | ||
99 | nvgpu_list_del(&cde_ctx->list); | ||
100 | l->cde_app.ctx_count--; | ||
101 | nvgpu_kfree(g, cde_ctx); | ||
102 | } | ||
103 | |||
104 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
105 | bool wait_finish) | ||
106 | __releases(&cde_app->mutex) | ||
107 | __acquires(&cde_app->mutex) | ||
108 | { | ||
109 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
110 | |||
111 | /* permanent contexts do not have deleter works */ | ||
112 | if (!cde_ctx->is_temporary) | ||
113 | return; | ||
114 | |||
115 | if (wait_finish) { | ||
116 | nvgpu_mutex_release(&cde_app->mutex); | ||
117 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
118 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
119 | } else { | ||
120 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
121 | } | ||
122 | } | ||
123 | |||
124 | static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) | ||
125 | __must_hold(&l->cde_app->mutex) | ||
126 | { | ||
127 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
128 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
129 | |||
130 | /* safe to go off the mutex in cancel_deleter since app is | ||
131 | * deinitialised; no new jobs are started. deleter works may be only at | ||
132 | * waiting for the mutex or before, going to abort */ | ||
133 | |||
134 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
135 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
136 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
137 | gk20a_cde_remove_ctx(cde_ctx); | ||
138 | } | ||
139 | |||
140 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
141 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
142 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
143 | gk20a_cde_remove_ctx(cde_ctx); | ||
144 | } | ||
145 | } | ||
146 | |||
147 | static void gk20a_cde_stop(struct nvgpu_os_linux *l) | ||
148 | __must_hold(&l->cde_app->mutex) | ||
149 | { | ||
150 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
151 | |||
152 | /* prevent further conversions and delayed works from working */ | ||
153 | cde_app->initialised = false; | ||
154 | /* free all data, empty the list */ | ||
155 | gk20a_cde_remove_contexts(l); | ||
156 | } | ||
157 | |||
158 | void gk20a_cde_destroy(struct nvgpu_os_linux *l) | ||
159 | __acquires(&l->cde_app->mutex) | ||
160 | __releases(&l->cde_app->mutex) | ||
161 | { | ||
162 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
163 | |||
164 | if (!cde_app->initialised) | ||
165 | return; | ||
166 | |||
167 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
168 | gk20a_cde_stop(l); | ||
169 | nvgpu_mutex_release(&cde_app->mutex); | ||
170 | |||
171 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
172 | } | ||
173 | |||
174 | void gk20a_cde_suspend(struct nvgpu_os_linux *l) | ||
175 | __acquires(&l->cde_app->mutex) | ||
176 | __releases(&l->cde_app->mutex) | ||
177 | { | ||
178 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
179 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
180 | |||
181 | if (!cde_app->initialised) | ||
182 | return; | ||
183 | |||
184 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
185 | |||
186 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
187 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
188 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
189 | } | ||
190 | |||
191 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
192 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
193 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
194 | } | ||
195 | |||
196 | nvgpu_mutex_release(&cde_app->mutex); | ||
197 | |||
198 | } | ||
199 | |||
200 | static int gk20a_cde_create_context(struct nvgpu_os_linux *l) | ||
201 | __must_hold(&l->cde_app->mutex) | ||
202 | { | ||
203 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
204 | struct gk20a_cde_ctx *cde_ctx; | ||
205 | |||
206 | cde_ctx = gk20a_cde_allocate_context(l); | ||
207 | if (IS_ERR(cde_ctx)) | ||
208 | return PTR_ERR(cde_ctx); | ||
209 | |||
210 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
211 | cde_app->ctx_count++; | ||
212 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
213 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
214 | |||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) | ||
219 | __must_hold(&l->cde_app->mutex) | ||
220 | { | ||
221 | int err; | ||
222 | int i; | ||
223 | |||
224 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
225 | err = gk20a_cde_create_context(l); | ||
226 | if (err) | ||
227 | goto out; | ||
228 | } | ||
229 | |||
230 | return 0; | ||
231 | out: | ||
232 | gk20a_cde_remove_contexts(l); | ||
233 | return err; | ||
234 | } | ||
235 | |||
236 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
237 | struct nvgpu_firmware *img, | ||
238 | struct gk20a_cde_hdr_buf *buf) | ||
239 | { | ||
240 | struct nvgpu_mem *mem; | ||
241 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
242 | struct gk20a *g = &l->g; | ||
243 | int err; | ||
244 | |||
245 | /* check that the file can hold the buf */ | ||
246 | if (buf->data_byte_offset != 0 && | ||
247 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
248 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
249 | cde_ctx->num_bufs); | ||
250 | return -EINVAL; | ||
251 | } | ||
252 | |||
253 | /* check that we have enough buf elems available */ | ||
254 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
255 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
256 | cde_ctx->num_bufs); | ||
257 | return -ENOMEM; | ||
258 | } | ||
259 | |||
260 | /* allocate buf */ | ||
261 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
262 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
263 | if (err) { | ||
264 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
265 | cde_ctx->num_bufs); | ||
266 | return -ENOMEM; | ||
267 | } | ||
268 | |||
269 | /* copy the content */ | ||
270 | if (buf->data_byte_offset != 0) | ||
271 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
272 | buf->num_bytes); | ||
273 | |||
274 | cde_ctx->num_bufs++; | ||
275 | |||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
280 | int type, s32 shift, u64 mask, u64 value) | ||
281 | { | ||
282 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
283 | struct gk20a *g = &l->g; | ||
284 | u32 *target_mem_ptr = target; | ||
285 | u64 *target_mem_ptr_u64 = target; | ||
286 | u64 current_value, new_value; | ||
287 | |||
288 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
289 | value &= mask; | ||
290 | |||
291 | /* read current data from the location */ | ||
292 | current_value = 0; | ||
293 | if (type == TYPE_PARAM_TYPE_U32) { | ||
294 | if (mask != 0xfffffffful) | ||
295 | current_value = *target_mem_ptr; | ||
296 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
297 | if (mask != ~0ul) | ||
298 | current_value = *target_mem_ptr_u64; | ||
299 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
300 | current_value = *target_mem_ptr_u64; | ||
301 | current_value = (u64)(current_value >> 32) | | ||
302 | (u64)(current_value << 32); | ||
303 | } else { | ||
304 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
305 | type); | ||
306 | return -EINVAL; | ||
307 | } | ||
308 | |||
309 | current_value &= ~mask; | ||
310 | new_value = current_value | value; | ||
311 | |||
312 | /* store the element data back */ | ||
313 | if (type == TYPE_PARAM_TYPE_U32) | ||
314 | *target_mem_ptr = (u32)new_value; | ||
315 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
316 | *target_mem_ptr_u64 = new_value; | ||
317 | else { | ||
318 | new_value = (u64)(new_value >> 32) | | ||
319 | (u64)(new_value << 32); | ||
320 | *target_mem_ptr_u64 = new_value; | ||
321 | } | ||
322 | |||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
327 | struct nvgpu_firmware *img, | ||
328 | struct gk20a_cde_hdr_replace *replace) | ||
329 | { | ||
330 | struct nvgpu_mem *source_mem; | ||
331 | struct nvgpu_mem *target_mem; | ||
332 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
333 | struct gk20a *g = &l->g; | ||
334 | u32 *target_mem_ptr; | ||
335 | u64 vaddr; | ||
336 | int err; | ||
337 | |||
338 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
339 | replace->source_buf >= cde_ctx->num_bufs) { | ||
340 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
341 | replace->target_buf, replace->source_buf, | ||
342 | cde_ctx->num_bufs); | ||
343 | return -EINVAL; | ||
344 | } | ||
345 | |||
346 | source_mem = cde_ctx->mem + replace->source_buf; | ||
347 | target_mem = cde_ctx->mem + replace->target_buf; | ||
348 | target_mem_ptr = target_mem->cpu_va; | ||
349 | |||
350 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
351 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
352 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
353 | replace->target_byte_offset, | ||
354 | replace->source_byte_offset, | ||
355 | source_mem->size, | ||
356 | target_mem->size); | ||
357 | return -EINVAL; | ||
358 | } | ||
359 | |||
360 | /* calculate the target pointer */ | ||
361 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
362 | |||
363 | /* determine patch value */ | ||
364 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
365 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
366 | replace->shift, replace->mask, | ||
367 | vaddr); | ||
368 | if (err) { | ||
369 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
370 | err, replace->target_buf, | ||
371 | replace->target_byte_offset, | ||
372 | replace->source_buf, | ||
373 | replace->source_byte_offset); | ||
374 | } | ||
375 | |||
376 | return err; | ||
377 | } | ||
378 | |||
379 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
380 | { | ||
381 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
382 | struct gk20a *g = &l->g; | ||
383 | struct nvgpu_mem *target_mem; | ||
384 | u32 *target_mem_ptr; | ||
385 | u64 new_data; | ||
386 | int user_id = 0, err; | ||
387 | unsigned int i; | ||
388 | |||
389 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
390 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
391 | target_mem = cde_ctx->mem + param->target_buf; | ||
392 | target_mem_ptr = target_mem->cpu_va; | ||
393 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
394 | |||
395 | switch (param->id) { | ||
396 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
397 | new_data = g->gr.comptags_per_cacheline; | ||
398 | break; | ||
399 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
400 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
401 | g->gr.cacheline_size; | ||
402 | break; | ||
403 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
404 | new_data = cde_ctx->surf_param_offset; | ||
405 | break; | ||
406 | case TYPE_PARAM_NUMPAGES: | ||
407 | new_data = cde_ctx->surf_param_lines; | ||
408 | break; | ||
409 | case TYPE_PARAM_BACKINGSTORE: | ||
410 | new_data = cde_ctx->backing_store_vaddr; | ||
411 | break; | ||
412 | case TYPE_PARAM_DESTINATION: | ||
413 | new_data = cde_ctx->compbit_vaddr; | ||
414 | break; | ||
415 | case TYPE_PARAM_DESTINATION_SIZE: | ||
416 | new_data = cde_ctx->compbit_size; | ||
417 | break; | ||
418 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
419 | new_data = g->gr.compbit_store.mem.size; | ||
420 | break; | ||
421 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
422 | new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm, | ||
423 | cde_ctx->surf_vaddr); | ||
424 | if (new_data == 0) | ||
425 | return -EINVAL; | ||
426 | break; | ||
427 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
428 | new_data = g->gr.compbit_store.base_hw; | ||
429 | break; | ||
430 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
431 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
432 | break; | ||
433 | case TYPE_PARAM_SCATTERBUFFER: | ||
434 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
435 | break; | ||
436 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
437 | new_data = cde_ctx->scatterbuffer_size; | ||
438 | break; | ||
439 | default: | ||
440 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
441 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
442 | continue; | ||
443 | new_data = cde_ctx->user_param_values[user_id]; | ||
444 | } | ||
445 | |||
446 | gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
447 | i, param->id, param->target_buf, | ||
448 | param->target_byte_offset, new_data, | ||
449 | param->data_offset, param->type, param->shift, | ||
450 | param->mask); | ||
451 | |||
452 | new_data += param->data_offset; | ||
453 | |||
454 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
455 | param->shift, param->mask, new_data); | ||
456 | |||
457 | if (err) { | ||
458 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
459 | err, i, param->id, param->target_buf, | ||
460 | param->target_byte_offset, new_data); | ||
461 | return err; | ||
462 | } | ||
463 | } | ||
464 | |||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
469 | struct nvgpu_firmware *img, | ||
470 | struct gk20a_cde_hdr_param *param) | ||
471 | { | ||
472 | struct nvgpu_mem *target_mem; | ||
473 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
474 | struct gk20a *g = &l->g; | ||
475 | |||
476 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
477 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
478 | cde_ctx->num_params, param->target_buf, | ||
479 | cde_ctx->num_bufs); | ||
480 | return -EINVAL; | ||
481 | } | ||
482 | |||
483 | target_mem = cde_ctx->mem + param->target_buf; | ||
484 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
485 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
486 | cde_ctx->num_params, param->target_byte_offset, | ||
487 | target_mem->size); | ||
488 | return -EINVAL; | ||
489 | } | ||
490 | |||
491 | /* does this parameter fit into our parameter structure */ | ||
492 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
493 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
494 | cde_ctx->num_params); | ||
495 | return -ENOMEM; | ||
496 | } | ||
497 | |||
498 | /* is the given id valid? */ | ||
499 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
500 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
501 | param->id, cde_ctx->num_params, | ||
502 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
503 | return -EINVAL; | ||
504 | } | ||
505 | |||
506 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
507 | cde_ctx->num_params++; | ||
508 | |||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
513 | struct nvgpu_firmware *img, | ||
514 | u32 required_class) | ||
515 | { | ||
516 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
517 | struct gk20a *g = &l->g; | ||
518 | struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx; | ||
519 | int err; | ||
520 | |||
521 | alloc_obj_ctx.class_num = required_class; | ||
522 | alloc_obj_ctx.flags = 0; | ||
523 | |||
524 | /* CDE enabled */ | ||
525 | cde_ctx->ch->cde = true; | ||
526 | |||
527 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); | ||
528 | if (err) { | ||
529 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
530 | err); | ||
531 | return err; | ||
532 | } | ||
533 | |||
534 | return 0; | ||
535 | } | ||
536 | |||
537 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
538 | struct nvgpu_firmware *img, | ||
539 | u32 op, | ||
540 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
541 | u32 num_elems) | ||
542 | { | ||
543 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
544 | struct gk20a *g = &l->g; | ||
545 | struct nvgpu_gpfifo **gpfifo, *gpfifo_elem; | ||
546 | u32 *num_entries; | ||
547 | unsigned int i; | ||
548 | |||
549 | /* check command type */ | ||
550 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
551 | gpfifo = &cde_ctx->init_convert_cmd; | ||
552 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
553 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
554 | gpfifo = &cde_ctx->convert_cmd; | ||
555 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
556 | } else { | ||
557 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
558 | op); | ||
559 | return -EINVAL; | ||
560 | } | ||
561 | |||
562 | /* allocate gpfifo entries to be pushed */ | ||
563 | *gpfifo = nvgpu_kzalloc(g, | ||
564 | sizeof(struct nvgpu_gpfifo) * num_elems); | ||
565 | if (!*gpfifo) { | ||
566 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
567 | return -ENOMEM; | ||
568 | } | ||
569 | |||
570 | gpfifo_elem = *gpfifo; | ||
571 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
572 | struct nvgpu_mem *target_mem; | ||
573 | |||
574 | /* validate the current entry */ | ||
575 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
576 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
577 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
578 | return -EINVAL; | ||
579 | } | ||
580 | |||
581 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
582 | if (target_mem->size< | ||
583 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
584 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
585 | target_mem->size, | ||
586 | cmd_elem->target_byte_offset, | ||
587 | cmd_elem->num_bytes); | ||
588 | return -EINVAL; | ||
589 | } | ||
590 | |||
591 | /* store the element into gpfifo */ | ||
592 | gpfifo_elem->entry0 = | ||
593 | u64_lo32(target_mem->gpu_va + | ||
594 | cmd_elem->target_byte_offset); | ||
595 | gpfifo_elem->entry1 = | ||
596 | u64_hi32(target_mem->gpu_va + | ||
597 | cmd_elem->target_byte_offset) | | ||
598 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
599 | sizeof(u32)); | ||
600 | } | ||
601 | |||
602 | *num_entries = num_elems; | ||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
607 | { | ||
608 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
609 | struct gk20a *g = &l->g; | ||
610 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
611 | sizeof(struct nvgpu_gpfifo); | ||
612 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
613 | sizeof(struct nvgpu_gpfifo); | ||
614 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
615 | struct nvgpu_gpfifo *combined_cmd; | ||
616 | |||
617 | /* allocate buffer that has space for both */ | ||
618 | combined_cmd = nvgpu_kzalloc(g, total_bytes); | ||
619 | if (!combined_cmd) { | ||
620 | nvgpu_warn(g, | ||
621 | "cde: could not allocate memory for gpfifo entries"); | ||
622 | return -ENOMEM; | ||
623 | } | ||
624 | |||
625 | /* move the original init here and append convert */ | ||
626 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
627 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
628 | cde_ctx->convert_cmd, conv_bytes); | ||
629 | |||
630 | nvgpu_kfree(g, cde_ctx->init_convert_cmd); | ||
631 | nvgpu_kfree(g, cde_ctx->convert_cmd); | ||
632 | |||
633 | cde_ctx->init_convert_cmd = combined_cmd; | ||
634 | cde_ctx->convert_cmd = combined_cmd | ||
635 | + cde_ctx->init_cmd_num_entries; | ||
636 | |||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
641 | struct nvgpu_firmware *img) | ||
642 | { | ||
643 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
644 | struct gk20a *g = &l->g; | ||
645 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
646 | u32 *data = (u32 *)img->data; | ||
647 | u32 num_of_elems; | ||
648 | struct gk20a_cde_hdr_elem *elem; | ||
649 | u32 min_size = 0; | ||
650 | int err = 0; | ||
651 | unsigned int i; | ||
652 | |||
653 | min_size += 2 * sizeof(u32); | ||
654 | if (img->size < min_size) { | ||
655 | nvgpu_warn(g, "cde: invalid image header"); | ||
656 | return -EINVAL; | ||
657 | } | ||
658 | |||
659 | cde_app->firmware_version = data[0]; | ||
660 | num_of_elems = data[1]; | ||
661 | |||
662 | min_size += num_of_elems * sizeof(*elem); | ||
663 | if (img->size < min_size) { | ||
664 | nvgpu_warn(g, "cde: bad image"); | ||
665 | return -EINVAL; | ||
666 | } | ||
667 | |||
668 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
669 | for (i = 0; i < num_of_elems; i++) { | ||
670 | int err = 0; | ||
671 | switch (elem->type) { | ||
672 | case TYPE_BUF: | ||
673 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
674 | break; | ||
675 | case TYPE_REPLACE: | ||
676 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
677 | &elem->replace); | ||
678 | break; | ||
679 | case TYPE_PARAM: | ||
680 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
681 | break; | ||
682 | case TYPE_REQUIRED_CLASS: | ||
683 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
684 | elem->required_class); | ||
685 | break; | ||
686 | case TYPE_COMMAND: | ||
687 | { | ||
688 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
689 | &img->data[elem->command.data_byte_offset]; | ||
690 | err = gk20a_init_cde_command(cde_ctx, img, | ||
691 | elem->command.op, cmd, | ||
692 | elem->command.num_entries); | ||
693 | break; | ||
694 | } | ||
695 | case TYPE_ARRAY: | ||
696 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
697 | elem->array.data, | ||
698 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
699 | break; | ||
700 | default: | ||
701 | nvgpu_warn(g, "cde: unknown header element"); | ||
702 | err = -EINVAL; | ||
703 | } | ||
704 | |||
705 | if (err) | ||
706 | goto deinit_image; | ||
707 | |||
708 | elem++; | ||
709 | } | ||
710 | |||
711 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
712 | nvgpu_warn(g, "cde: convert command not defined"); | ||
713 | err = -EINVAL; | ||
714 | goto deinit_image; | ||
715 | } | ||
716 | |||
717 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
718 | nvgpu_warn(g, "cde: convert command not defined"); | ||
719 | err = -EINVAL; | ||
720 | goto deinit_image; | ||
721 | } | ||
722 | |||
723 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
724 | if (err) | ||
725 | goto deinit_image; | ||
726 | |||
727 | return 0; | ||
728 | |||
729 | deinit_image: | ||
730 | gk20a_deinit_cde_img(cde_ctx); | ||
731 | return err; | ||
732 | } | ||
733 | |||
734 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
735 | u32 op, struct nvgpu_fence *fence, | ||
736 | u32 flags, struct gk20a_fence **fence_out) | ||
737 | { | ||
738 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
739 | struct gk20a *g = &l->g; | ||
740 | struct nvgpu_gpfifo *gpfifo = NULL; | ||
741 | int num_entries = 0; | ||
742 | |||
743 | /* check command type */ | ||
744 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
745 | /* both init and convert combined */ | ||
746 | gpfifo = cde_ctx->init_convert_cmd; | ||
747 | num_entries = cde_ctx->init_cmd_num_entries | ||
748 | + cde_ctx->convert_cmd_num_entries; | ||
749 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
750 | gpfifo = cde_ctx->convert_cmd; | ||
751 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
752 | } else { | ||
753 | nvgpu_warn(g, "cde: unknown buffer"); | ||
754 | return -EINVAL; | ||
755 | } | ||
756 | |||
757 | if (gpfifo == NULL || num_entries == 0) { | ||
758 | nvgpu_warn(g, "cde: buffer not available"); | ||
759 | return -ENOSYS; | ||
760 | } | ||
761 | |||
762 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, | ||
763 | num_entries, flags, fence, fence_out, true, | ||
764 | NULL); | ||
765 | } | ||
766 | |||
767 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
768 | __acquires(&cde_app->mutex) | ||
769 | __releases(&cde_app->mutex) | ||
770 | { | ||
771 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
772 | |||
773 | gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
774 | trace_gk20a_cde_release(cde_ctx); | ||
775 | |||
776 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
777 | |||
778 | if (cde_ctx->in_use) { | ||
779 | cde_ctx->in_use = false; | ||
780 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
781 | cde_app->ctx_usecount--; | ||
782 | } else { | ||
783 | gk20a_dbg_info("double release cde context %p", cde_ctx); | ||
784 | } | ||
785 | |||
786 | nvgpu_mutex_release(&cde_app->mutex); | ||
787 | } | ||
788 | |||
789 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
790 | __acquires(&cde_app->mutex) | ||
791 | __releases(&cde_app->mutex) | ||
792 | { | ||
793 | struct delayed_work *delay_work = to_delayed_work(work); | ||
794 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
795 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
796 | struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; | ||
797 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
798 | struct gk20a *g = &l->g; | ||
799 | int err; | ||
800 | |||
801 | /* someone has just taken it? engine deletion started? */ | ||
802 | if (cde_ctx->in_use || !cde_app->initialised) | ||
803 | return; | ||
804 | |||
805 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
806 | "cde: attempting to delete temporary %p", cde_ctx); | ||
807 | |||
808 | err = gk20a_busy(g); | ||
809 | if (err) { | ||
810 | /* this context would find new use anyway later, so not freeing | ||
811 | * here does not leak anything */ | ||
812 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
813 | " temp ctx deletion"); | ||
814 | return; | ||
815 | } | ||
816 | |||
817 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
818 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
819 | gk20a_dbg(gpu_dbg_cde_ctx, | ||
820 | "cde: context use raced, not deleting %p", | ||
821 | cde_ctx); | ||
822 | goto out; | ||
823 | } | ||
824 | |||
825 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
826 | "double pending %p", cde_ctx); | ||
827 | |||
828 | gk20a_cde_remove_ctx(cde_ctx); | ||
829 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
830 | "cde: destroyed %p count=%d use=%d max=%d", | ||
831 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
832 | cde_app->ctx_count_top); | ||
833 | |||
834 | out: | ||
835 | nvgpu_mutex_release(&cde_app->mutex); | ||
836 | gk20a_idle(g); | ||
837 | } | ||
838 | |||
839 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) | ||
840 | __must_hold(&cde_app->mutex) | ||
841 | { | ||
842 | struct gk20a *g = &l->g; | ||
843 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
844 | struct gk20a_cde_ctx *cde_ctx; | ||
845 | |||
846 | /* exhausted? */ | ||
847 | |||
848 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
849 | return ERR_PTR(-EAGAIN); | ||
850 | |||
851 | /* idle context available? */ | ||
852 | |||
853 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
854 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
855 | gk20a_cde_ctx, list); | ||
856 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
857 | "cde: got free %p count=%d use=%d max=%d", | ||
858 | cde_ctx, cde_app->ctx_count, | ||
859 | cde_app->ctx_usecount, | ||
860 | cde_app->ctx_count_top); | ||
861 | trace_gk20a_cde_get_context(cde_ctx); | ||
862 | |||
863 | /* deleter work may be scheduled, but in_use prevents it */ | ||
864 | cde_ctx->in_use = true; | ||
865 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
866 | cde_app->ctx_usecount++; | ||
867 | |||
868 | /* cancel any deletions now that ctx is in use */ | ||
869 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
870 | return cde_ctx; | ||
871 | } | ||
872 | |||
873 | /* no free contexts, get a temporary one */ | ||
874 | |||
875 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
876 | "cde: no free contexts, count=%d", | ||
877 | cde_app->ctx_count); | ||
878 | |||
879 | cde_ctx = gk20a_cde_allocate_context(l); | ||
880 | if (IS_ERR(cde_ctx)) { | ||
881 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
882 | PTR_ERR(cde_ctx)); | ||
883 | return cde_ctx; | ||
884 | } | ||
885 | |||
886 | trace_gk20a_cde_get_context(cde_ctx); | ||
887 | cde_ctx->in_use = true; | ||
888 | cde_ctx->is_temporary = true; | ||
889 | cde_app->ctx_usecount++; | ||
890 | cde_app->ctx_count++; | ||
891 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
892 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
893 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
894 | |||
895 | return cde_ctx; | ||
896 | } | ||
897 | |||
898 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) | ||
899 | __releases(&cde_app->mutex) | ||
900 | __acquires(&cde_app->mutex) | ||
901 | { | ||
902 | struct gk20a *g = &l->g; | ||
903 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
904 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
905 | struct nvgpu_timeout timeout; | ||
906 | |||
907 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
908 | NVGPU_TIMER_CPU_TIMER); | ||
909 | |||
910 | do { | ||
911 | cde_ctx = gk20a_cde_do_get_context(l); | ||
912 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
913 | break; | ||
914 | |||
915 | /* exhausted, retry */ | ||
916 | nvgpu_mutex_release(&cde_app->mutex); | ||
917 | cond_resched(); | ||
918 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
919 | } while (!nvgpu_timeout_expired(&timeout)); | ||
920 | |||
921 | return cde_ctx; | ||
922 | } | ||
923 | |||
924 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) | ||
925 | { | ||
926 | struct gk20a *g = &l->g; | ||
927 | struct gk20a_cde_ctx *cde_ctx; | ||
928 | int ret; | ||
929 | |||
930 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
931 | if (!cde_ctx) | ||
932 | return ERR_PTR(-ENOMEM); | ||
933 | |||
934 | cde_ctx->l = l; | ||
935 | cde_ctx->dev = dev_from_gk20a(g); | ||
936 | |||
937 | ret = gk20a_cde_load(cde_ctx); | ||
938 | if (ret) { | ||
939 | nvgpu_kfree(g, cde_ctx); | ||
940 | return ERR_PTR(ret); | ||
941 | } | ||
942 | |||
943 | nvgpu_init_list_node(&cde_ctx->list); | ||
944 | cde_ctx->is_temporary = false; | ||
945 | cde_ctx->in_use = false; | ||
946 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
947 | gk20a_cde_ctx_deleter_fn); | ||
948 | |||
949 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
950 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
951 | return cde_ctx; | ||
952 | } | ||
953 | |||
954 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
955 | struct dma_buf *compbits_scatter_buf, | ||
956 | u64 compbits_byte_offset, | ||
957 | u64 scatterbuffer_byte_offset, | ||
958 | struct nvgpu_fence *fence, | ||
959 | u32 __flags, struct gk20a_cde_param *params, | ||
960 | int num_params, struct gk20a_fence **fence_out) | ||
961 | __acquires(&l->cde_app->mutex) | ||
962 | __releases(&l->cde_app->mutex) | ||
963 | { | ||
964 | struct gk20a *g = &l->g; | ||
965 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
966 | struct gk20a_comptags comptags; | ||
967 | u64 mapped_compbits_offset = 0; | ||
968 | u64 compbits_size = 0; | ||
969 | u64 mapped_scatterbuffer_offset = 0; | ||
970 | u64 scatterbuffer_size = 0; | ||
971 | u64 map_vaddr = 0; | ||
972 | u64 map_offset = 0; | ||
973 | u64 map_size = 0; | ||
974 | u8 *surface = NULL; | ||
975 | u64 big_page_mask = 0; | ||
976 | u32 flags; | ||
977 | int err, i; | ||
978 | const s32 compbits_kind = 0; | ||
979 | |||
980 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
981 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
982 | |||
983 | /* scatter buffer must be after compbits buffer */ | ||
984 | if (scatterbuffer_byte_offset && | ||
985 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
986 | return -EINVAL; | ||
987 | |||
988 | err = gk20a_busy(g); | ||
989 | if (err) | ||
990 | return err; | ||
991 | |||
992 | nvgpu_mutex_acquire(&l->cde_app.mutex); | ||
993 | cde_ctx = gk20a_cde_get_context(l); | ||
994 | nvgpu_mutex_release(&l->cde_app.mutex); | ||
995 | if (IS_ERR(cde_ctx)) { | ||
996 | err = PTR_ERR(cde_ctx); | ||
997 | goto exit_idle; | ||
998 | } | ||
999 | |||
1000 | /* First, map the buffer to local va */ | ||
1001 | |||
1002 | /* ensure that the compbits buffer has drvdata */ | ||
1003 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
1004 | dev_from_gk20a(g)); | ||
1005 | if (err) | ||
1006 | goto exit_idle; | ||
1007 | |||
1008 | /* compbits don't start at page aligned offset, so we need to align | ||
1009 | the region to be mapped */ | ||
1010 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
1011 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
1012 | map_size = compbits_scatter_buf->size - map_offset; | ||
1013 | |||
1014 | |||
1015 | /* compute compbit start offset from the beginning of the mapped | ||
1016 | area */ | ||
1017 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
1018 | if (scatterbuffer_byte_offset) { | ||
1019 | compbits_size = scatterbuffer_byte_offset - | ||
1020 | compbits_byte_offset; | ||
1021 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
1022 | map_offset; | ||
1023 | scatterbuffer_size = compbits_scatter_buf->size - | ||
1024 | scatterbuffer_byte_offset; | ||
1025 | } else { | ||
1026 | compbits_size = compbits_scatter_buf->size - | ||
1027 | compbits_byte_offset; | ||
1028 | } | ||
1029 | |||
1030 | gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
1031 | map_offset, map_size); | ||
1032 | gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
1033 | mapped_compbits_offset, compbits_size); | ||
1034 | gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
1035 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
1036 | |||
1037 | |||
1038 | /* map the destination buffer */ | ||
1039 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ | ||
1040 | map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, | ||
1041 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1042 | compbits_kind, true, | ||
1043 | gk20a_mem_flag_none, | ||
1044 | map_offset, map_size, | ||
1045 | NULL); | ||
1046 | if (!map_vaddr) { | ||
1047 | dma_buf_put(compbits_scatter_buf); | ||
1048 | err = -EINVAL; | ||
1049 | goto exit_idle; | ||
1050 | } | ||
1051 | |||
1052 | if (scatterbuffer_byte_offset && | ||
1053 | g->ops.cde.need_scatter_buffer && | ||
1054 | g->ops.cde.need_scatter_buffer(g)) { | ||
1055 | struct sg_table *sgt; | ||
1056 | void *scatter_buffer; | ||
1057 | |||
1058 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1059 | if (IS_ERR(surface)) { | ||
1060 | nvgpu_warn(g, | ||
1061 | "dma_buf_vmap failed"); | ||
1062 | err = -EINVAL; | ||
1063 | goto exit_unmap_vaddr; | ||
1064 | } | ||
1065 | |||
1066 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1067 | |||
1068 | gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1069 | surface, scatter_buffer); | ||
1070 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf); | ||
1071 | if (IS_ERR(sgt)) { | ||
1072 | nvgpu_warn(g, | ||
1073 | "mm_pin failed"); | ||
1074 | err = -EINVAL; | ||
1075 | goto exit_unmap_surface; | ||
1076 | } else { | ||
1077 | err = g->ops.cde.populate_scatter_buffer(g, sgt, | ||
1078 | compbits_byte_offset, scatter_buffer, | ||
1079 | scatterbuffer_size); | ||
1080 | WARN_ON(err); | ||
1081 | |||
1082 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1083 | sgt); | ||
1084 | if (err) | ||
1085 | goto exit_unmap_surface; | ||
1086 | } | ||
1087 | |||
1088 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1089 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1090 | surface = NULL; | ||
1091 | } | ||
1092 | |||
1093 | /* store source buffer compression tags */ | ||
1094 | gk20a_get_comptags(dev_from_gk20a(g), compbits_scatter_buf, &comptags); | ||
1095 | cde_ctx->surf_param_offset = comptags.offset; | ||
1096 | cde_ctx->surf_param_lines = comptags.lines; | ||
1097 | |||
1098 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
1099 | compbits live in the same surface, and we can get the alloc base | ||
1100 | address by using gk20a_mm_gpuva_to_iova_base, this will do */ | ||
1101 | cde_ctx->surf_vaddr = map_vaddr; | ||
1102 | |||
1103 | /* store information about destination */ | ||
1104 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
1105 | cde_ctx->compbit_size = compbits_size; | ||
1106 | |||
1107 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1108 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1109 | |||
1110 | /* remove existing argument data */ | ||
1111 | memset(cde_ctx->user_param_values, 0, | ||
1112 | sizeof(cde_ctx->user_param_values)); | ||
1113 | |||
1114 | /* read user space arguments for the conversion */ | ||
1115 | for (i = 0; i < num_params; i++) { | ||
1116 | struct gk20a_cde_param *param = params + i; | ||
1117 | int id = param->id - NUM_RESERVED_PARAMS; | ||
1118 | |||
1119 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
1120 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
1121 | err = -EINVAL; | ||
1122 | goto exit_unmap_surface; | ||
1123 | } | ||
1124 | cde_ctx->user_param_values[id] = param->value; | ||
1125 | } | ||
1126 | |||
1127 | /* patch data */ | ||
1128 | err = gk20a_cde_patch_params(cde_ctx); | ||
1129 | if (err) { | ||
1130 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
1131 | goto exit_unmap_surface; | ||
1132 | } | ||
1133 | |||
1134 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
1135 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
1136 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
1137 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
1138 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1139 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1140 | |||
1141 | /* take always the postfence as it is needed for protecting the | ||
1142 | * cde context */ | ||
1143 | flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
1144 | |||
1145 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
1146 | gk20a_idle(g); | ||
1147 | |||
1148 | /* execute the conversion buffer, combined with init first if it's the | ||
1149 | * first time */ | ||
1150 | err = gk20a_cde_execute_buffer(cde_ctx, | ||
1151 | cde_ctx->init_cmd_executed | ||
1152 | ? TYPE_BUF_COMMAND_CONVERT | ||
1153 | : TYPE_BUF_COMMAND_INIT, | ||
1154 | fence, flags, fence_out); | ||
1155 | |||
1156 | cde_ctx->init_cmd_executed = true; | ||
1157 | |||
1158 | /* unmap the buffers - channel holds references to them now */ | ||
1159 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); | ||
1160 | |||
1161 | return err; | ||
1162 | |||
1163 | exit_unmap_surface: | ||
1164 | if (surface) | ||
1165 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1166 | exit_unmap_vaddr: | ||
1167 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); | ||
1168 | exit_idle: | ||
1169 | gk20a_idle(g); | ||
1170 | return err; | ||
1171 | } | ||
1172 | |||
1173 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
1174 | __acquires(&cde_app->mutex) | ||
1175 | __releases(&cde_app->mutex) | ||
1176 | { | ||
1177 | struct gk20a_cde_ctx *cde_ctx = data; | ||
1178 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1179 | struct gk20a *g = &l->g; | ||
1180 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1181 | bool channel_idle; | ||
1182 | |||
1183 | channel_gk20a_joblist_lock(ch); | ||
1184 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
1185 | channel_gk20a_joblist_unlock(ch); | ||
1186 | |||
1187 | if (!channel_idle) | ||
1188 | return; | ||
1189 | |||
1190 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
1191 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
1192 | if (!cde_ctx->in_use) | ||
1193 | gk20a_dbg_info("double finish cde context %p on channel %p", | ||
1194 | cde_ctx, ch); | ||
1195 | |||
1196 | if (ch->has_timedout) { | ||
1197 | if (cde_ctx->is_temporary) { | ||
1198 | nvgpu_warn(g, | ||
1199 | "cde: channel had timed out" | ||
1200 | " (temporary channel)"); | ||
1201 | /* going to be deleted anyway */ | ||
1202 | } else { | ||
1203 | nvgpu_warn(g, | ||
1204 | "cde: channel had timed out" | ||
1205 | ", reloading"); | ||
1206 | /* mark it to be deleted, replace with a new one */ | ||
1207 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1208 | cde_ctx->is_temporary = true; | ||
1209 | if (gk20a_cde_create_context(l)) { | ||
1210 | nvgpu_err(g, "cde: can't replace context"); | ||
1211 | } | ||
1212 | nvgpu_mutex_release(&cde_app->mutex); | ||
1213 | } | ||
1214 | } | ||
1215 | |||
1216 | /* delete temporary contexts later (watch for doubles) */ | ||
1217 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
1218 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
1219 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
1220 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
1221 | } | ||
1222 | |||
1223 | if (!ch->has_timedout) | ||
1224 | gk20a_cde_ctx_release(cde_ctx); | ||
1225 | } | ||
1226 | |||
1227 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
1228 | { | ||
1229 | struct nvgpu_os_linux *l = cde_ctx->l; | ||
1230 | struct gk20a *g = &l->g; | ||
1231 | struct nvgpu_firmware *img; | ||
1232 | struct channel_gk20a *ch; | ||
1233 | struct gr_gk20a *gr = &g->gr; | ||
1234 | int err = 0; | ||
1235 | u64 vaddr; | ||
1236 | |||
1237 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
1238 | if (!img) { | ||
1239 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
1240 | return -ENOSYS; | ||
1241 | } | ||
1242 | |||
1243 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
1244 | cde_ctx, | ||
1245 | -1, | ||
1246 | false); | ||
1247 | if (!ch) { | ||
1248 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
1249 | err = -ENOMEM; | ||
1250 | goto err_get_gk20a_channel; | ||
1251 | } | ||
1252 | |||
1253 | /* bind the channel to the vm */ | ||
1254 | err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch); | ||
1255 | if (err) { | ||
1256 | nvgpu_warn(g, "cde: could not bind vm"); | ||
1257 | goto err_commit_va; | ||
1258 | } | ||
1259 | |||
1260 | /* allocate gpfifo (1024 should be more than enough) */ | ||
1261 | err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0); | ||
1262 | if (err) { | ||
1263 | nvgpu_warn(g, "cde: unable to allocate gpfifo"); | ||
1264 | goto err_alloc_gpfifo; | ||
1265 | } | ||
1266 | |||
1267 | /* map backing store to gpu virtual space */ | ||
1268 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
1269 | g->gr.compbit_store.mem.size, | ||
1270 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1271 | gk20a_mem_flag_read_only, | ||
1272 | false, | ||
1273 | gr->compbit_store.mem.aperture); | ||
1274 | |||
1275 | if (!vaddr) { | ||
1276 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
1277 | err = -ENOMEM; | ||
1278 | goto err_map_backingstore; | ||
1279 | } | ||
1280 | |||
1281 | /* store initialisation data */ | ||
1282 | cde_ctx->ch = ch; | ||
1283 | cde_ctx->vm = ch->vm; | ||
1284 | cde_ctx->backing_store_vaddr = vaddr; | ||
1285 | |||
1286 | /* initialise the firmware */ | ||
1287 | err = gk20a_init_cde_img(cde_ctx, img); | ||
1288 | if (err) { | ||
1289 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
1290 | goto err_init_cde_img; | ||
1291 | } | ||
1292 | |||
1293 | /* initialisation done */ | ||
1294 | nvgpu_release_firmware(g, img); | ||
1295 | |||
1296 | return 0; | ||
1297 | |||
1298 | err_init_cde_img: | ||
1299 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
1300 | err_map_backingstore: | ||
1301 | err_alloc_gpfifo: | ||
1302 | nvgpu_vm_put(ch->vm); | ||
1303 | err_commit_va: | ||
1304 | err_get_gk20a_channel: | ||
1305 | nvgpu_release_firmware(g, img); | ||
1306 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
1307 | return err; | ||
1308 | } | ||
1309 | |||
1310 | int gk20a_cde_reload(struct nvgpu_os_linux *l) | ||
1311 | __acquires(&l->cde_app->mutex) | ||
1312 | __releases(&l->cde_app->mutex) | ||
1313 | { | ||
1314 | struct gk20a *g = &l->g; | ||
1315 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1316 | int err; | ||
1317 | |||
1318 | if (!cde_app->initialised) | ||
1319 | return -ENOSYS; | ||
1320 | |||
1321 | err = gk20a_busy(g); | ||
1322 | if (err) | ||
1323 | return err; | ||
1324 | |||
1325 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1326 | |||
1327 | gk20a_cde_stop(l); | ||
1328 | |||
1329 | err = gk20a_cde_create_contexts(l); | ||
1330 | if (!err) | ||
1331 | cde_app->initialised = true; | ||
1332 | |||
1333 | nvgpu_mutex_release(&cde_app->mutex); | ||
1334 | |||
1335 | gk20a_idle(g); | ||
1336 | return err; | ||
1337 | } | ||
1338 | |||
1339 | int gk20a_init_cde_support(struct nvgpu_os_linux *l) | ||
1340 | __acquires(&cde_app->mutex) | ||
1341 | __releases(&cde_app->mutex) | ||
1342 | { | ||
1343 | struct gk20a_cde_app *cde_app = &l->cde_app; | ||
1344 | int err; | ||
1345 | |||
1346 | if (cde_app->initialised) | ||
1347 | return 0; | ||
1348 | |||
1349 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1350 | |||
1351 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
1352 | if (err) | ||
1353 | return err; | ||
1354 | |||
1355 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1356 | |||
1357 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
1358 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
1359 | cde_app->ctx_count = 0; | ||
1360 | cde_app->ctx_count_top = 0; | ||
1361 | cde_app->ctx_usecount = 0; | ||
1362 | |||
1363 | err = gk20a_cde_create_contexts(l); | ||
1364 | if (!err) | ||
1365 | cde_app->initialised = true; | ||
1366 | |||
1367 | nvgpu_mutex_release(&cde_app->mutex); | ||
1368 | gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
1369 | |||
1370 | if (err) | ||
1371 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
1372 | |||
1373 | return err; | ||
1374 | } | ||
1375 | |||
1376 | enum cde_launch_patch_id { | ||
1377 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
1378 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
1379 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
1380 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
1381 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
1382 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
1383 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
1384 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
1385 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
1386 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
1387 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
1388 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
1389 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
1390 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
1391 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
1392 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
1393 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
1394 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
1395 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
1396 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
1397 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
1398 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
1399 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
1400 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
1401 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
1402 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
1403 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
1404 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
1405 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
1406 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
1407 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
1408 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
1409 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
1410 | }; | ||
1411 | |||
1412 | /* maximum number of WRITE_PATCHes in the below function */ | ||
1413 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
1414 | |||
1415 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
1416 | struct nvgpu_os_linux *l, | ||
1417 | struct dma_buf *dmabuf, u32 consumer, | ||
1418 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1419 | u64 scatterbuffer_offset, | ||
1420 | u32 width, u32 height, u32 block_height_log2, | ||
1421 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1422 | struct gk20a_buffer_state *state) | ||
1423 | { | ||
1424 | struct gk20a *g = &l->g; | ||
1425 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1426 | int param = 0; | ||
1427 | int err = 0; | ||
1428 | struct gk20a_fence *new_fence = NULL; | ||
1429 | const int wgx = 8; | ||
1430 | const int wgy = 8; | ||
1431 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1432 | const int xalign = compbits_per_byte * wgx; | ||
1433 | const int yalign = wgy; | ||
1434 | |||
1435 | /* Compute per launch parameters */ | ||
1436 | const int xtiles = (width + 7) >> 3; | ||
1437 | const int ytiles = (height + 7) >> 3; | ||
1438 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
1439 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
1440 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
1441 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
1442 | const int xblocks = (xtiles + 1) >> 1; | ||
1443 | const int voffset = compbits_voffset - compbits_hoffset; | ||
1444 | |||
1445 | int hprog = -1; | ||
1446 | int vprog = -1; | ||
1447 | |||
1448 | if (g->ops.cde.get_program_numbers) | ||
1449 | g->ops.cde.get_program_numbers(g, block_height_log2, | ||
1450 | l->cde_app.shader_parameter, | ||
1451 | &hprog, &vprog); | ||
1452 | else { | ||
1453 | nvgpu_warn(g, "cde: chip not supported"); | ||
1454 | return -ENOSYS; | ||
1455 | } | ||
1456 | |||
1457 | if (hprog < 0 || vprog < 0) { | ||
1458 | nvgpu_warn(g, "cde: could not determine programs"); | ||
1459 | return -ENOSYS; | ||
1460 | } | ||
1461 | |||
1462 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
1463 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1464 | xtiles, ytiles); | ||
1465 | |||
1466 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
1467 | width, height, block_height_log2, | ||
1468 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
1469 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1470 | width, height, xtiles, ytiles); | ||
1471 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
1472 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
1473 | gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
1474 | hprog, | ||
1475 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
1476 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
1477 | vprog, | ||
1478 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
1479 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1480 | |||
1481 | /* Write parameters */ | ||
1482 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1483 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1484 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
1485 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1486 | block_height_log2); | ||
1487 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1488 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1489 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1490 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1491 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1492 | |||
1493 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
1494 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
1495 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
1496 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
1497 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
1498 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1499 | |||
1500 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
1501 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
1502 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
1503 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
1504 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
1505 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1506 | |||
1507 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
1508 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
1509 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
1510 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
1511 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
1512 | l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
1513 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
1514 | l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1515 | |||
1516 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
1517 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1518 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1519 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1520 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1521 | } else { | ||
1522 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1523 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1524 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1525 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1526 | } | ||
1527 | |||
1528 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
1529 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1530 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1531 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1532 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1533 | } else { | ||
1534 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1535 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1536 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1537 | l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1538 | } | ||
1539 | #undef WRITE_PATCH | ||
1540 | |||
1541 | err = gk20a_cde_convert(l, dmabuf, | ||
1542 | compbits_hoffset, | ||
1543 | scatterbuffer_offset, | ||
1544 | fence_in, submit_flags, | ||
1545 | params, param, &new_fence); | ||
1546 | if (err) | ||
1547 | goto out; | ||
1548 | |||
1549 | /* compbits generated, update state & fence */ | ||
1550 | gk20a_fence_put(state->fence); | ||
1551 | state->fence = new_fence; | ||
1552 | state->valid_compbits |= consumer & | ||
1553 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1554 | out: | ||
1555 | return err; | ||
1556 | } | ||
1557 | |||
1558 | static int gk20a_buffer_convert_gpu_to_cde( | ||
1559 | struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, | ||
1560 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1561 | u64 scatterbuffer_offset, | ||
1562 | u32 width, u32 height, u32 block_height_log2, | ||
1563 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1564 | struct gk20a_buffer_state *state) | ||
1565 | { | ||
1566 | struct gk20a *g = &l->g; | ||
1567 | int err = 0; | ||
1568 | |||
1569 | if (!l->cde_app.initialised) | ||
1570 | return -ENOSYS; | ||
1571 | |||
1572 | gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", | ||
1573 | l->cde_app.firmware_version); | ||
1574 | |||
1575 | if (l->cde_app.firmware_version == 1) { | ||
1576 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
1577 | l, dmabuf, consumer, offset, compbits_hoffset, | ||
1578 | compbits_voffset, scatterbuffer_offset, | ||
1579 | width, height, block_height_log2, | ||
1580 | submit_flags, fence_in, state); | ||
1581 | } else { | ||
1582 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
1583 | l->cde_app.firmware_version); | ||
1584 | err = -EINVAL; | ||
1585 | } | ||
1586 | |||
1587 | return err; | ||
1588 | } | ||
1589 | |||
1590 | int gk20a_prepare_compressible_read( | ||
1591 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
1592 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1593 | u64 scatterbuffer_offset, | ||
1594 | u32 width, u32 height, u32 block_height_log2, | ||
1595 | u32 submit_flags, struct nvgpu_fence *fence, | ||
1596 | u32 *valid_compbits, u32 *zbc_color, | ||
1597 | struct gk20a_fence **fence_out) | ||
1598 | { | ||
1599 | struct gk20a *g = &l->g; | ||
1600 | int err = 0; | ||
1601 | struct gk20a_buffer_state *state; | ||
1602 | struct dma_buf *dmabuf; | ||
1603 | u32 missing_bits; | ||
1604 | |||
1605 | dmabuf = dma_buf_get(buffer_fd); | ||
1606 | if (IS_ERR(dmabuf)) | ||
1607 | return -EINVAL; | ||
1608 | |||
1609 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1610 | if (err) { | ||
1611 | dma_buf_put(dmabuf); | ||
1612 | return err; | ||
1613 | } | ||
1614 | |||
1615 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1616 | |||
1617 | nvgpu_mutex_acquire(&state->lock); | ||
1618 | |||
1619 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
1620 | |||
1621 | gk20a_fence_put(state->fence); | ||
1622 | state->fence = NULL; | ||
1623 | /* state->fence = decompress(); | ||
1624 | state->valid_compbits = 0; */ | ||
1625 | err = -EINVAL; | ||
1626 | goto out; | ||
1627 | } else if (missing_bits) { | ||
1628 | u32 missing_cde_bits = missing_bits & | ||
1629 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1630 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
1631 | missing_cde_bits) { | ||
1632 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1633 | l, dmabuf, | ||
1634 | missing_cde_bits, | ||
1635 | offset, compbits_hoffset, | ||
1636 | compbits_voffset, scatterbuffer_offset, | ||
1637 | width, height, block_height_log2, | ||
1638 | submit_flags, fence, | ||
1639 | state); | ||
1640 | if (err) | ||
1641 | goto out; | ||
1642 | } | ||
1643 | } | ||
1644 | |||
1645 | if (state->fence && fence_out) | ||
1646 | *fence_out = gk20a_fence_get(state->fence); | ||
1647 | |||
1648 | if (valid_compbits) | ||
1649 | *valid_compbits = state->valid_compbits; | ||
1650 | |||
1651 | if (zbc_color) | ||
1652 | *zbc_color = state->zbc_color; | ||
1653 | |||
1654 | out: | ||
1655 | nvgpu_mutex_release(&state->lock); | ||
1656 | dma_buf_put(dmabuf); | ||
1657 | return err; | ||
1658 | } | ||
1659 | |||
1660 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1661 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
1662 | { | ||
1663 | int err; | ||
1664 | struct gk20a_buffer_state *state; | ||
1665 | struct dma_buf *dmabuf; | ||
1666 | |||
1667 | dmabuf = dma_buf_get(buffer_fd); | ||
1668 | if (IS_ERR(dmabuf)) { | ||
1669 | nvgpu_err(g, "invalid dmabuf"); | ||
1670 | return -EINVAL; | ||
1671 | } | ||
1672 | |||
1673 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1674 | if (err) { | ||
1675 | nvgpu_err(g, "could not get state from dmabuf"); | ||
1676 | dma_buf_put(dmabuf); | ||
1677 | return err; | ||
1678 | } | ||
1679 | |||
1680 | nvgpu_mutex_acquire(&state->lock); | ||
1681 | |||
1682 | /* Update the compbits state. */ | ||
1683 | state->valid_compbits = valid_compbits; | ||
1684 | state->zbc_color = zbc_color; | ||
1685 | |||
1686 | /* Discard previous compbit job fence. */ | ||
1687 | gk20a_fence_put(state->fence); | ||
1688 | state->fence = NULL; | ||
1689 | |||
1690 | nvgpu_mutex_release(&state->lock); | ||
1691 | dma_buf_put(dmabuf); | ||
1692 | return 0; | ||
1693 | } | ||
diff --git a/drivers/gpu/nvgpu/common/linux/cde.h b/drivers/gpu/nvgpu/common/linux/cde.h new file mode 100644 index 00000000..22732a2a --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/cde.h | |||
@@ -0,0 +1,309 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #define MAX_CDE_BUFS 10 | ||
23 | #define MAX_CDE_PARAMS 64 | ||
24 | #define MAX_CDE_USER_PARAMS 40 | ||
25 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
26 | |||
27 | /* | ||
28 | * The size of the context ring buffer that is dedicated for handling cde | ||
29 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
30 | * wait on the previous job to that channel, so increasing this value | ||
31 | * reduces the likelihood of stalls. | ||
32 | */ | ||
33 | #define NUM_CDE_CONTEXTS 4 | ||
34 | |||
35 | struct dma_buf; | ||
36 | struct gk20a; | ||
37 | |||
38 | /* | ||
39 | * this element defines a buffer that is allocated and mapped into gpu address | ||
40 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
41 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
42 | * | ||
43 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
44 | */ | ||
45 | |||
46 | struct gk20a_cde_hdr_buf { | ||
47 | u64 data_byte_offset; | ||
48 | u64 num_bytes; | ||
49 | }; | ||
50 | |||
51 | /* | ||
52 | * this element defines a constant patching in buffers. It basically | ||
53 | * computes physical address to <source_buf>+source_byte_offset. The | ||
54 | * address is then modified into patch value as per: | ||
55 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
56 | * | ||
57 | * The type field defines the register size as: | ||
58 | * 0=u32, | ||
59 | * 1=u64 (little endian), | ||
60 | * 2=u64 (big endian) | ||
61 | */ | ||
62 | |||
63 | struct gk20a_cde_hdr_replace { | ||
64 | u32 target_buf; | ||
65 | u32 source_buf; | ||
66 | s32 shift; | ||
67 | u32 type; | ||
68 | u64 target_byte_offset; | ||
69 | u64 source_byte_offset; | ||
70 | u64 mask; | ||
71 | }; | ||
72 | |||
73 | enum { | ||
74 | TYPE_PARAM_TYPE_U32 = 0, | ||
75 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
76 | TYPE_PARAM_TYPE_U64_BIG | ||
77 | }; | ||
78 | |||
79 | /* | ||
80 | * this element defines a runtime patching in buffers. Parameters with id from | ||
81 | * 0 to 1024 are reserved for special usage as follows: | ||
82 | * 0 = comptags_per_cacheline, | ||
83 | * 1 = slices_per_fbp, | ||
84 | * 2 = num_fbps | ||
85 | * 3 = source buffer first page offset | ||
86 | * 4 = source buffer block height log2 | ||
87 | * 5 = backing store memory address | ||
88 | * 6 = destination memory address | ||
89 | * 7 = destination size (bytes) | ||
90 | * 8 = backing store size (bytes) | ||
91 | * 9 = cache line size | ||
92 | * | ||
93 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
94 | * parameters from user space should be placed in buffers, what is their | ||
95 | * type, etc. | ||
96 | * | ||
97 | * Once the value is available, we add data_offset to the value. | ||
98 | * | ||
99 | * The value address is then modified into patch value as per: | ||
100 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
101 | * | ||
102 | * The type field defines the register size as: | ||
103 | * 0=u32, | ||
104 | * 1=u64 (little endian), | ||
105 | * 2=u64 (big endian) | ||
106 | */ | ||
107 | |||
108 | struct gk20a_cde_hdr_param { | ||
109 | u32 id; | ||
110 | u32 target_buf; | ||
111 | s32 shift; | ||
112 | u32 type; | ||
113 | s64 data_offset; | ||
114 | u64 target_byte_offset; | ||
115 | u64 mask; | ||
116 | }; | ||
117 | |||
118 | enum { | ||
119 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
120 | TYPE_PARAM_GPU_CONFIGURATION, | ||
121 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
122 | TYPE_PARAM_NUMPAGES, | ||
123 | TYPE_PARAM_BACKINGSTORE, | ||
124 | TYPE_PARAM_DESTINATION, | ||
125 | TYPE_PARAM_DESTINATION_SIZE, | ||
126 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
127 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
128 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
129 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
130 | TYPE_PARAM_SCATTERBUFFER, | ||
131 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
132 | NUM_RESERVED_PARAMS = 1024, | ||
133 | }; | ||
134 | |||
135 | /* | ||
136 | * This header element defines a command. The op field determines whether the | ||
137 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
138 | * denotes the beginning address of command elements in the file. | ||
139 | */ | ||
140 | |||
141 | struct gk20a_cde_hdr_command { | ||
142 | u32 op; | ||
143 | u32 num_entries; | ||
144 | u64 data_byte_offset; | ||
145 | }; | ||
146 | |||
147 | enum { | ||
148 | TYPE_BUF_COMMAND_INIT = 0, | ||
149 | TYPE_BUF_COMMAND_CONVERT | ||
150 | }; | ||
151 | |||
152 | /* | ||
153 | * This is a command element defines one entry inside push buffer. target_buf | ||
154 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
155 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
156 | */ | ||
157 | |||
158 | struct gk20a_cde_cmd_elem { | ||
159 | u32 target_buf; | ||
160 | u32 padding; | ||
161 | u64 target_byte_offset; | ||
162 | u64 num_bytes; | ||
163 | }; | ||
164 | |||
165 | /* | ||
166 | * This element is used for storing a small array of data. | ||
167 | */ | ||
168 | |||
169 | enum { | ||
170 | ARRAY_PROGRAM_OFFSET = 0, | ||
171 | ARRAY_REGISTER_COUNT, | ||
172 | ARRAY_LAUNCH_COMMAND, | ||
173 | NUM_CDE_ARRAYS | ||
174 | }; | ||
175 | |||
176 | struct gk20a_cde_hdr_array { | ||
177 | u32 id; | ||
178 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
179 | }; | ||
180 | |||
181 | /* | ||
182 | * Following defines a single header element. Each element has a type and | ||
183 | * some of the data structures. | ||
184 | */ | ||
185 | |||
186 | struct gk20a_cde_hdr_elem { | ||
187 | u32 type; | ||
188 | u32 padding; | ||
189 | union { | ||
190 | struct gk20a_cde_hdr_buf buf; | ||
191 | struct gk20a_cde_hdr_replace replace; | ||
192 | struct gk20a_cde_hdr_param param; | ||
193 | u32 required_class; | ||
194 | struct gk20a_cde_hdr_command command; | ||
195 | struct gk20a_cde_hdr_array array; | ||
196 | }; | ||
197 | }; | ||
198 | |||
199 | enum { | ||
200 | TYPE_BUF = 0, | ||
201 | TYPE_REPLACE, | ||
202 | TYPE_PARAM, | ||
203 | TYPE_REQUIRED_CLASS, | ||
204 | TYPE_COMMAND, | ||
205 | TYPE_ARRAY | ||
206 | }; | ||
207 | |||
208 | struct gk20a_cde_param { | ||
209 | u32 id; | ||
210 | u32 padding; | ||
211 | u64 value; | ||
212 | }; | ||
213 | |||
214 | struct gk20a_cde_ctx { | ||
215 | struct nvgpu_os_linux *l; | ||
216 | struct device *dev; | ||
217 | |||
218 | /* channel related data */ | ||
219 | struct channel_gk20a *ch; | ||
220 | struct vm_gk20a *vm; | ||
221 | |||
222 | /* buf converter configuration */ | ||
223 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
224 | unsigned int num_bufs; | ||
225 | |||
226 | /* buffer patching params (where should patching be done) */ | ||
227 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
228 | unsigned int num_params; | ||
229 | |||
230 | /* storage for user space parameter values */ | ||
231 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
232 | |||
233 | u32 surf_param_offset; | ||
234 | u32 surf_param_lines; | ||
235 | u64 surf_vaddr; | ||
236 | |||
237 | u64 compbit_vaddr; | ||
238 | u64 compbit_size; | ||
239 | |||
240 | u64 scatterbuffer_vaddr; | ||
241 | u64 scatterbuffer_size; | ||
242 | |||
243 | u64 backing_store_vaddr; | ||
244 | |||
245 | struct nvgpu_gpfifo *init_convert_cmd; | ||
246 | int init_cmd_num_entries; | ||
247 | |||
248 | struct nvgpu_gpfifo *convert_cmd; | ||
249 | int convert_cmd_num_entries; | ||
250 | |||
251 | struct kobj_attribute attr; | ||
252 | |||
253 | bool init_cmd_executed; | ||
254 | |||
255 | struct nvgpu_list_node list; | ||
256 | bool is_temporary; | ||
257 | bool in_use; | ||
258 | struct delayed_work ctx_deleter_work; | ||
259 | }; | ||
260 | |||
261 | static inline struct gk20a_cde_ctx * | ||
262 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
263 | { | ||
264 | return (struct gk20a_cde_ctx *) | ||
265 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
266 | }; | ||
267 | |||
268 | struct gk20a_cde_app { | ||
269 | bool initialised; | ||
270 | struct nvgpu_mutex mutex; | ||
271 | |||
272 | struct nvgpu_list_node free_contexts; | ||
273 | struct nvgpu_list_node used_contexts; | ||
274 | unsigned int ctx_count; | ||
275 | unsigned int ctx_usecount; | ||
276 | unsigned int ctx_count_top; | ||
277 | |||
278 | u32 firmware_version; | ||
279 | |||
280 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
281 | |||
282 | u32 shader_parameter; | ||
283 | }; | ||
284 | |||
285 | void gk20a_cde_destroy(struct nvgpu_os_linux *l); | ||
286 | void gk20a_cde_suspend(struct nvgpu_os_linux *l); | ||
287 | int gk20a_init_cde_support(struct nvgpu_os_linux *l); | ||
288 | int gk20a_cde_reload(struct nvgpu_os_linux *l); | ||
289 | int gk20a_cde_convert(struct nvgpu_os_linux *l, | ||
290 | struct dma_buf *compbits_buf, | ||
291 | u64 compbits_byte_offset, | ||
292 | u64 scatterbuffer_byte_offset, | ||
293 | struct nvgpu_fence *fence, | ||
294 | u32 __flags, struct gk20a_cde_param *params, | ||
295 | int num_params, struct gk20a_fence **fence_out); | ||
296 | |||
297 | int gk20a_prepare_compressible_read( | ||
298 | struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, | ||
299 | u64 compbits_hoffset, u64 compbits_voffset, | ||
300 | u64 scatterbuffer_offset, | ||
301 | u32 width, u32 height, u32 block_height_log2, | ||
302 | u32 submit_flags, struct nvgpu_fence *fence, | ||
303 | u32 *valid_compbits, u32 *zbc_color, | ||
304 | struct gk20a_fence **fence_out); | ||
305 | int gk20a_mark_compressible_write( | ||
306 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
307 | u32 zbc_color); | ||
308 | |||
309 | #endif | ||
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c index 40cc64a4..cbea83b9 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_cde.c +++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c | |||
@@ -22,8 +22,8 @@ | |||
22 | static ssize_t gk20a_cde_reload_write(struct file *file, | 22 | static ssize_t gk20a_cde_reload_write(struct file *file, |
23 | const char __user *userbuf, size_t count, loff_t *ppos) | 23 | const char __user *userbuf, size_t count, loff_t *ppos) |
24 | { | 24 | { |
25 | struct gk20a *g = file->private_data; | 25 | struct nvgpu_os_linux *l = file->private_data; |
26 | gk20a_cde_reload(g); | 26 | gk20a_cde_reload(l); |
27 | return count; | 27 | return count; |
28 | } | 28 | } |
29 | 29 | ||
@@ -41,13 +41,13 @@ void gk20a_cde_debugfs_init(struct gk20a *g) | |||
41 | return; | 41 | return; |
42 | 42 | ||
43 | debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, | 43 | debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, |
44 | l->debugfs, &g->cde_app.shader_parameter); | 44 | l->debugfs, &l->cde_app.shader_parameter); |
45 | debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, | 45 | debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, |
46 | l->debugfs, &g->cde_app.ctx_count); | 46 | l->debugfs, &l->cde_app.ctx_count); |
47 | debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, | 47 | debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, |
48 | l->debugfs, &g->cde_app.ctx_usecount); | 48 | l->debugfs, &l->cde_app.ctx_usecount); |
49 | debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, | 49 | debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, |
50 | l->debugfs, &g->cde_app.ctx_count_top); | 50 | l->debugfs, &l->cde_app.ctx_count_top); |
51 | debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, | 51 | debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, |
52 | g, &gk20a_cde_reload_fops); | 52 | l, &gk20a_cde_reload_fops); |
53 | } | 53 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index 0d79b143..0357f098 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | |||
@@ -138,6 +138,7 @@ static int gk20a_ctrl_prepare_compressible_read( | |||
138 | struct gk20a *g, | 138 | struct gk20a *g, |
139 | struct nvgpu_gpu_prepare_compressible_read_args *args) | 139 | struct nvgpu_gpu_prepare_compressible_read_args *args) |
140 | { | 140 | { |
141 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
141 | struct nvgpu_fence fence; | 142 | struct nvgpu_fence fence; |
142 | struct gk20a_fence *fence_out = NULL; | 143 | struct gk20a_fence *fence_out = NULL; |
143 | int ret = 0; | 144 | int ret = 0; |
@@ -146,7 +147,7 @@ static int gk20a_ctrl_prepare_compressible_read( | |||
146 | fence.id = args->fence.syncpt_id; | 147 | fence.id = args->fence.syncpt_id; |
147 | fence.value = args->fence.syncpt_value; | 148 | fence.value = args->fence.syncpt_value; |
148 | 149 | ||
149 | ret = gk20a_prepare_compressible_read(g, args->handle, | 150 | ret = gk20a_prepare_compressible_read(l, args->handle, |
150 | args->request_compbits, args->offset, | 151 | args->request_compbits, args->offset, |
151 | args->compbits_hoffset, args->compbits_voffset, | 152 | args->compbits_hoffset, args->compbits_voffset, |
152 | args->scatterbuffer_offset, | 153 | args->scatterbuffer_offset, |
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 6a590baa..509930c7 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "pci.h" | 39 | #include "pci.h" |
40 | #include "module.h" | 40 | #include "module.h" |
41 | #include "intr.h" | 41 | #include "intr.h" |
42 | #include "cde.h" | ||
42 | #ifdef CONFIG_TEGRA_19x_GPU | 43 | #ifdef CONFIG_TEGRA_19x_GPU |
43 | #include "nvgpu_gpuid_t19x.h" | 44 | #include "nvgpu_gpuid_t19x.h" |
44 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 45 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
@@ -185,7 +186,7 @@ int gk20a_pm_finalize_poweron(struct device *dev) | |||
185 | gk20a_scale_resume(dev_from_gk20a(g)); | 186 | gk20a_scale_resume(dev_from_gk20a(g)); |
186 | 187 | ||
187 | if (platform->has_cde) | 188 | if (platform->has_cde) |
188 | gk20a_init_cde_support(g); | 189 | gk20a_init_cde_support(l); |
189 | 190 | ||
190 | done: | 191 | done: |
191 | if (err) | 192 | if (err) |
@@ -197,6 +198,7 @@ done: | |||
197 | static int gk20a_pm_prepare_poweroff(struct device *dev) | 198 | static int gk20a_pm_prepare_poweroff(struct device *dev) |
198 | { | 199 | { |
199 | struct gk20a *g = get_gk20a(dev); | 200 | struct gk20a *g = get_gk20a(dev); |
201 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
200 | int ret = 0; | 202 | int ret = 0; |
201 | struct gk20a_platform *platform = gk20a_get_platform(dev); | 203 | struct gk20a_platform *platform = gk20a_get_platform(dev); |
202 | 204 | ||
@@ -207,8 +209,15 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
207 | if (!g->power_on) | 209 | if (!g->power_on) |
208 | goto done; | 210 | goto done; |
209 | 211 | ||
212 | if (gk20a_fifo_is_engine_busy(g)) { | ||
213 | ret = -EBUSY; | ||
214 | goto done; | ||
215 | } | ||
216 | |||
210 | gk20a_scale_suspend(dev); | 217 | gk20a_scale_suspend(dev); |
211 | 218 | ||
219 | gk20a_cde_suspend(l); | ||
220 | |||
212 | ret = gk20a_prepare_poweroff(g); | 221 | ret = gk20a_prepare_poweroff(g); |
213 | if (ret) | 222 | if (ret) |
214 | goto error; | 223 | goto error; |
@@ -974,6 +983,7 @@ static int __exit gk20a_remove(struct platform_device *pdev) | |||
974 | { | 983 | { |
975 | struct device *dev = &pdev->dev; | 984 | struct device *dev = &pdev->dev; |
976 | struct gk20a *g = get_gk20a(dev); | 985 | struct gk20a *g = get_gk20a(dev); |
986 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
977 | struct gk20a_platform *platform = gk20a_get_platform(dev); | 987 | struct gk20a_platform *platform = gk20a_get_platform(dev); |
978 | 988 | ||
979 | gk20a_dbg_fn(""); | 989 | gk20a_dbg_fn(""); |
@@ -982,7 +992,7 @@ static int __exit gk20a_remove(struct platform_device *pdev) | |||
982 | return vgpu_remove(pdev); | 992 | return vgpu_remove(pdev); |
983 | 993 | ||
984 | if (platform->has_cde) | 994 | if (platform->has_cde) |
985 | gk20a_cde_destroy(g); | 995 | gk20a_cde_destroy(l); |
986 | 996 | ||
987 | gk20a_ctxsw_trace_cleanup(g); | 997 | gk20a_ctxsw_trace_cleanup(g); |
988 | 998 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h index ed8364a9..160a5738 100644 --- a/drivers/gpu/nvgpu/common/linux/os_linux.h +++ b/drivers/gpu/nvgpu/common/linux/os_linux.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/cdev.h> | 19 | #include <linux/cdev.h> |
20 | 20 | ||
21 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
22 | #include "cde.h" | ||
22 | 23 | ||
23 | struct nvgpu_os_linux { | 24 | struct nvgpu_os_linux { |
24 | struct gk20a g; | 25 | struct gk20a g; |
@@ -108,6 +109,7 @@ struct nvgpu_os_linux { | |||
108 | struct dentry *debugfs_force_preemption_gfxp; | 109 | struct dentry *debugfs_force_preemption_gfxp; |
109 | struct dentry *debugfs_dump_ctxsw_stats; | 110 | struct dentry *debugfs_dump_ctxsw_stats; |
110 | #endif | 111 | #endif |
112 | struct gk20a_cde_app cde_app; | ||
111 | }; | 113 | }; |
112 | 114 | ||
113 | static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) | 115 | static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) |