diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 1669 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 311 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 3 |
4 files changed, 1 insertions, 1985 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c deleted file mode 100644 index 506207f2..00000000 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ /dev/null | |||
@@ -1,1669 +0,0 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/dma-mapping.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/dma-buf.h> | ||
22 | |||
23 | #include <trace/events/gk20a.h> | ||
24 | |||
25 | #include <nvgpu/dma.h> | ||
26 | #include <nvgpu/gmmu.h> | ||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/nvgpu_common.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/log.h> | ||
31 | #include <nvgpu/bug.h> | ||
32 | #include <nvgpu/firmware.h> | ||
33 | |||
34 | #include "gk20a.h" | ||
35 | #include "channel_gk20a.h" | ||
36 | #include "mm_gk20a.h" | ||
37 | #include "cde_gk20a.h" | ||
38 | #include "fence_gk20a.h" | ||
39 | #include "gr_gk20a.h" | ||
40 | #include "common/linux/os_linux.h" | ||
41 | |||
42 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
43 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
44 | |||
45 | /* | ||
46 | * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the | ||
47 | * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem. | ||
48 | */ | ||
49 | #include "common/linux/vm_priv.h" | ||
50 | |||
51 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); | ||
52 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); | ||
53 | |||
54 | #define CTX_DELETE_TIME 1000 | ||
55 | |||
56 | #define MAX_CTX_USE_COUNT 42 | ||
57 | #define MAX_CTX_RETRY_TIME 2000 | ||
58 | |||
59 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
60 | { | ||
61 | unsigned int i; | ||
62 | |||
63 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
64 | struct nvgpu_mem *mem = cde_ctx->mem + i; | ||
65 | nvgpu_dma_unmap_free(cde_ctx->vm, mem); | ||
66 | } | ||
67 | |||
68 | nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); | ||
69 | |||
70 | cde_ctx->convert_cmd = NULL; | ||
71 | cde_ctx->init_convert_cmd = NULL; | ||
72 | cde_ctx->num_bufs = 0; | ||
73 | cde_ctx->num_params = 0; | ||
74 | cde_ctx->init_cmd_num_entries = 0; | ||
75 | cde_ctx->convert_cmd_num_entries = 0; | ||
76 | cde_ctx->init_cmd_executed = false; | ||
77 | } | ||
78 | |||
79 | static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) | ||
80 | __must_hold(&cde_app->mutex) | ||
81 | { | ||
82 | struct gk20a *g = cde_ctx->g; | ||
83 | struct channel_gk20a *ch = cde_ctx->ch; | ||
84 | struct vm_gk20a *vm = ch->vm; | ||
85 | |||
86 | trace_gk20a_cde_remove_ctx(cde_ctx); | ||
87 | |||
88 | /* release mapped memory */ | ||
89 | gk20a_deinit_cde_img(cde_ctx); | ||
90 | nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, | ||
91 | cde_ctx->backing_store_vaddr); | ||
92 | |||
93 | /* free the channel */ | ||
94 | gk20a_channel_close(ch); | ||
95 | |||
96 | /* housekeeping on app */ | ||
97 | nvgpu_list_del(&cde_ctx->list); | ||
98 | cde_ctx->g->cde_app.ctx_count--; | ||
99 | nvgpu_kfree(g, cde_ctx); | ||
100 | } | ||
101 | |||
102 | static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, | ||
103 | bool wait_finish) | ||
104 | __releases(&cde_app->mutex) | ||
105 | __acquires(&cde_app->mutex) | ||
106 | { | ||
107 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
108 | |||
109 | /* permanent contexts do not have deleter works */ | ||
110 | if (!cde_ctx->is_temporary) | ||
111 | return; | ||
112 | |||
113 | if (wait_finish) { | ||
114 | nvgpu_mutex_release(&cde_app->mutex); | ||
115 | cancel_delayed_work_sync(&cde_ctx->ctx_deleter_work); | ||
116 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
117 | } else { | ||
118 | cancel_delayed_work(&cde_ctx->ctx_deleter_work); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void gk20a_cde_remove_contexts(struct gk20a *g) | ||
123 | __must_hold(&cde_app->mutex) | ||
124 | { | ||
125 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
126 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
127 | |||
128 | /* safe to go off the mutex in cancel_deleter since app is | ||
129 | * deinitialised; no new jobs are started. deleter works may be only at | ||
130 | * waiting for the mutex or before, going to abort */ | ||
131 | |||
132 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
133 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
134 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
135 | gk20a_cde_remove_ctx(cde_ctx); | ||
136 | } | ||
137 | |||
138 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
139 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
140 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
141 | gk20a_cde_remove_ctx(cde_ctx); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static void gk20a_cde_stop(struct gk20a *g) | ||
146 | __must_hold(&cde_app->mutex) | ||
147 | { | ||
148 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
149 | |||
150 | /* prevent further conversions and delayed works from working */ | ||
151 | cde_app->initialised = false; | ||
152 | /* free all data, empty the list */ | ||
153 | gk20a_cde_remove_contexts(g); | ||
154 | } | ||
155 | |||
156 | void gk20a_cde_destroy(struct gk20a *g) | ||
157 | __acquires(&cde_app->mutex) | ||
158 | __releases(&cde_app->mutex) | ||
159 | { | ||
160 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
161 | |||
162 | if (!cde_app->initialised) | ||
163 | return; | ||
164 | |||
165 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
166 | gk20a_cde_stop(g); | ||
167 | nvgpu_mutex_release(&cde_app->mutex); | ||
168 | |||
169 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
170 | } | ||
171 | |||
172 | void gk20a_cde_suspend(struct gk20a *g) | ||
173 | __acquires(&cde_app->mutex) | ||
174 | __releases(&cde_app->mutex) | ||
175 | { | ||
176 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
177 | struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; | ||
178 | |||
179 | if (!cde_app->initialised) | ||
180 | return; | ||
181 | |||
182 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
183 | |||
184 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
185 | &cde_app->free_contexts, gk20a_cde_ctx, list) { | ||
186 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
187 | } | ||
188 | |||
189 | nvgpu_list_for_each_entry_safe(cde_ctx, cde_ctx_save, | ||
190 | &cde_app->used_contexts, gk20a_cde_ctx, list) { | ||
191 | gk20a_cde_cancel_deleter(cde_ctx, false); | ||
192 | } | ||
193 | |||
194 | nvgpu_mutex_release(&cde_app->mutex); | ||
195 | |||
196 | } | ||
197 | |||
198 | static int gk20a_cde_create_context(struct gk20a *g) | ||
199 | __must_hold(&cde_app->mutex) | ||
200 | { | ||
201 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
202 | struct gk20a_cde_ctx *cde_ctx; | ||
203 | |||
204 | cde_ctx = gk20a_cde_allocate_context(g); | ||
205 | if (IS_ERR(cde_ctx)) | ||
206 | return PTR_ERR(cde_ctx); | ||
207 | |||
208 | nvgpu_list_add(&cde_ctx->list, &cde_app->free_contexts); | ||
209 | cde_app->ctx_count++; | ||
210 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
211 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int gk20a_cde_create_contexts(struct gk20a *g) | ||
217 | __must_hold(&g->cde_app->mutex) | ||
218 | { | ||
219 | int err; | ||
220 | int i; | ||
221 | |||
222 | for (i = 0; i < NUM_CDE_CONTEXTS; i++) { | ||
223 | err = gk20a_cde_create_context(g); | ||
224 | if (err) | ||
225 | goto out; | ||
226 | } | ||
227 | |||
228 | return 0; | ||
229 | out: | ||
230 | gk20a_cde_remove_contexts(g); | ||
231 | return err; | ||
232 | } | ||
233 | |||
234 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
235 | struct nvgpu_firmware *img, | ||
236 | struct gk20a_cde_hdr_buf *buf) | ||
237 | { | ||
238 | struct nvgpu_mem *mem; | ||
239 | struct gk20a *g = cde_ctx->g; | ||
240 | int err; | ||
241 | |||
242 | /* check that the file can hold the buf */ | ||
243 | if (buf->data_byte_offset != 0 && | ||
244 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
245 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
246 | cde_ctx->num_bufs); | ||
247 | return -EINVAL; | ||
248 | } | ||
249 | |||
250 | /* check that we have enough buf elems available */ | ||
251 | if (cde_ctx->num_bufs >= MAX_CDE_BUFS) { | ||
252 | nvgpu_warn(g, "cde: invalid data section. buffer idx = %d", | ||
253 | cde_ctx->num_bufs); | ||
254 | return -ENOMEM; | ||
255 | } | ||
256 | |||
257 | /* allocate buf */ | ||
258 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
259 | err = nvgpu_dma_alloc_map_sys(cde_ctx->vm, buf->num_bytes, mem); | ||
260 | if (err) { | ||
261 | nvgpu_warn(g, "cde: could not allocate device memory. buffer idx = %d", | ||
262 | cde_ctx->num_bufs); | ||
263 | return -ENOMEM; | ||
264 | } | ||
265 | |||
266 | /* copy the content */ | ||
267 | if (buf->data_byte_offset != 0) | ||
268 | memcpy(mem->cpu_va, img->data + buf->data_byte_offset, | ||
269 | buf->num_bytes); | ||
270 | |||
271 | cde_ctx->num_bufs++; | ||
272 | |||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
277 | int type, s32 shift, u64 mask, u64 value) | ||
278 | { | ||
279 | struct gk20a *g = cde_ctx->g; | ||
280 | u32 *target_mem_ptr = target; | ||
281 | u64 *target_mem_ptr_u64 = target; | ||
282 | u64 current_value, new_value; | ||
283 | |||
284 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
285 | value &= mask; | ||
286 | |||
287 | /* read current data from the location */ | ||
288 | current_value = 0; | ||
289 | if (type == TYPE_PARAM_TYPE_U32) { | ||
290 | if (mask != 0xfffffffful) | ||
291 | current_value = *target_mem_ptr; | ||
292 | } else if (type == TYPE_PARAM_TYPE_U64_LITTLE) { | ||
293 | if (mask != ~0ul) | ||
294 | current_value = *target_mem_ptr_u64; | ||
295 | } else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
296 | current_value = *target_mem_ptr_u64; | ||
297 | current_value = (u64)(current_value >> 32) | | ||
298 | (u64)(current_value << 32); | ||
299 | } else { | ||
300 | nvgpu_warn(g, "cde: unknown type. type=%d", | ||
301 | type); | ||
302 | return -EINVAL; | ||
303 | } | ||
304 | |||
305 | current_value &= ~mask; | ||
306 | new_value = current_value | value; | ||
307 | |||
308 | /* store the element data back */ | ||
309 | if (type == TYPE_PARAM_TYPE_U32) | ||
310 | *target_mem_ptr = (u32)new_value; | ||
311 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
312 | *target_mem_ptr_u64 = new_value; | ||
313 | else { | ||
314 | new_value = (u64)(new_value >> 32) | | ||
315 | (u64)(new_value << 32); | ||
316 | *target_mem_ptr_u64 = new_value; | ||
317 | } | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
323 | struct nvgpu_firmware *img, | ||
324 | struct gk20a_cde_hdr_replace *replace) | ||
325 | { | ||
326 | struct nvgpu_mem *source_mem; | ||
327 | struct nvgpu_mem *target_mem; | ||
328 | struct gk20a *g = cde_ctx->g; | ||
329 | u32 *target_mem_ptr; | ||
330 | u64 vaddr; | ||
331 | int err; | ||
332 | |||
333 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
334 | replace->source_buf >= cde_ctx->num_bufs) { | ||
335 | nvgpu_warn(g, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
336 | replace->target_buf, replace->source_buf, | ||
337 | cde_ctx->num_bufs); | ||
338 | return -EINVAL; | ||
339 | } | ||
340 | |||
341 | source_mem = cde_ctx->mem + replace->source_buf; | ||
342 | target_mem = cde_ctx->mem + replace->target_buf; | ||
343 | target_mem_ptr = target_mem->cpu_va; | ||
344 | |||
345 | if (source_mem->size < (replace->source_byte_offset + 3) || | ||
346 | target_mem->size < (replace->target_byte_offset + 3)) { | ||
347 | nvgpu_warn(g, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
348 | replace->target_byte_offset, | ||
349 | replace->source_byte_offset, | ||
350 | source_mem->size, | ||
351 | target_mem->size); | ||
352 | return -EINVAL; | ||
353 | } | ||
354 | |||
355 | /* calculate the target pointer */ | ||
356 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
357 | |||
358 | /* determine patch value */ | ||
359 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
360 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
361 | replace->shift, replace->mask, | ||
362 | vaddr); | ||
363 | if (err) { | ||
364 | nvgpu_warn(g, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
365 | err, replace->target_buf, | ||
366 | replace->target_byte_offset, | ||
367 | replace->source_buf, | ||
368 | replace->source_byte_offset); | ||
369 | } | ||
370 | |||
371 | return err; | ||
372 | } | ||
373 | |||
374 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
375 | { | ||
376 | struct gk20a *g = cde_ctx->g; | ||
377 | struct nvgpu_mem *target_mem; | ||
378 | u32 *target_mem_ptr; | ||
379 | u64 new_data; | ||
380 | int user_id = 0, err; | ||
381 | unsigned int i; | ||
382 | |||
383 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
384 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
385 | target_mem = cde_ctx->mem + param->target_buf; | ||
386 | target_mem_ptr = target_mem->cpu_va; | ||
387 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
388 | |||
389 | switch (param->id) { | ||
390 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
391 | new_data = g->gr.comptags_per_cacheline; | ||
392 | break; | ||
393 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
394 | new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * | ||
395 | g->gr.cacheline_size; | ||
396 | break; | ||
397 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
398 | new_data = cde_ctx->surf_param_offset; | ||
399 | break; | ||
400 | case TYPE_PARAM_NUMPAGES: | ||
401 | new_data = cde_ctx->surf_param_lines; | ||
402 | break; | ||
403 | case TYPE_PARAM_BACKINGSTORE: | ||
404 | new_data = cde_ctx->backing_store_vaddr; | ||
405 | break; | ||
406 | case TYPE_PARAM_DESTINATION: | ||
407 | new_data = cde_ctx->compbit_vaddr; | ||
408 | break; | ||
409 | case TYPE_PARAM_DESTINATION_SIZE: | ||
410 | new_data = cde_ctx->compbit_size; | ||
411 | break; | ||
412 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
413 | new_data = g->gr.compbit_store.mem.size; | ||
414 | break; | ||
415 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
416 | new_data = gk20a_mm_gpuva_to_iova_base(cde_ctx->vm, | ||
417 | cde_ctx->surf_vaddr); | ||
418 | if (new_data == 0) | ||
419 | return -EINVAL; | ||
420 | break; | ||
421 | case TYPE_PARAM_BACKINGSTORE_BASE_HW: | ||
422 | new_data = g->gr.compbit_store.base_hw; | ||
423 | break; | ||
424 | case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: | ||
425 | new_data = g->gr.gobs_per_comptagline_per_slice; | ||
426 | break; | ||
427 | case TYPE_PARAM_SCATTERBUFFER: | ||
428 | new_data = cde_ctx->scatterbuffer_vaddr; | ||
429 | break; | ||
430 | case TYPE_PARAM_SCATTERBUFFER_SIZE: | ||
431 | new_data = cde_ctx->scatterbuffer_size; | ||
432 | break; | ||
433 | default: | ||
434 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
435 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
436 | continue; | ||
437 | new_data = cde_ctx->user_param_values[user_id]; | ||
438 | } | ||
439 | |||
440 | gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
441 | i, param->id, param->target_buf, | ||
442 | param->target_byte_offset, new_data, | ||
443 | param->data_offset, param->type, param->shift, | ||
444 | param->mask); | ||
445 | |||
446 | new_data += param->data_offset; | ||
447 | |||
448 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
449 | param->shift, param->mask, new_data); | ||
450 | |||
451 | if (err) { | ||
452 | nvgpu_warn(g, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
453 | err, i, param->id, param->target_buf, | ||
454 | param->target_byte_offset, new_data); | ||
455 | return err; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
463 | struct nvgpu_firmware *img, | ||
464 | struct gk20a_cde_hdr_param *param) | ||
465 | { | ||
466 | struct nvgpu_mem *target_mem; | ||
467 | struct gk20a *g = cde_ctx->g; | ||
468 | |||
469 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
470 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
471 | cde_ctx->num_params, param->target_buf, | ||
472 | cde_ctx->num_bufs); | ||
473 | return -EINVAL; | ||
474 | } | ||
475 | |||
476 | target_mem = cde_ctx->mem + param->target_buf; | ||
477 | if (target_mem->size < (param->target_byte_offset + 3)) { | ||
478 | nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
479 | cde_ctx->num_params, param->target_byte_offset, | ||
480 | target_mem->size); | ||
481 | return -EINVAL; | ||
482 | } | ||
483 | |||
484 | /* does this parameter fit into our parameter structure */ | ||
485 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
486 | nvgpu_warn(g, "cde: no room for new parameters param idx = %d", | ||
487 | cde_ctx->num_params); | ||
488 | return -ENOMEM; | ||
489 | } | ||
490 | |||
491 | /* is the given id valid? */ | ||
492 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
493 | nvgpu_warn(g, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
494 | param->id, cde_ctx->num_params, | ||
495 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
496 | return -EINVAL; | ||
497 | } | ||
498 | |||
499 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
500 | cde_ctx->num_params++; | ||
501 | |||
502 | return 0; | ||
503 | } | ||
504 | |||
505 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
506 | struct nvgpu_firmware *img, | ||
507 | u32 required_class) | ||
508 | { | ||
509 | struct gk20a *g = cde_ctx->g; | ||
510 | struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx; | ||
511 | int err; | ||
512 | |||
513 | alloc_obj_ctx.class_num = required_class; | ||
514 | alloc_obj_ctx.flags = 0; | ||
515 | |||
516 | /* CDE enabled */ | ||
517 | cde_ctx->ch->cde = true; | ||
518 | |||
519 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); | ||
520 | if (err) { | ||
521 | nvgpu_warn(g, "cde: failed to allocate ctx. err=%d", | ||
522 | err); | ||
523 | return err; | ||
524 | } | ||
525 | |||
526 | return 0; | ||
527 | } | ||
528 | |||
529 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
530 | struct nvgpu_firmware *img, | ||
531 | u32 op, | ||
532 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
533 | u32 num_elems) | ||
534 | { | ||
535 | struct gk20a *g = cde_ctx->g; | ||
536 | struct nvgpu_gpfifo **gpfifo, *gpfifo_elem; | ||
537 | u32 *num_entries; | ||
538 | unsigned int i; | ||
539 | |||
540 | /* check command type */ | ||
541 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
542 | gpfifo = &cde_ctx->init_convert_cmd; | ||
543 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
544 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
545 | gpfifo = &cde_ctx->convert_cmd; | ||
546 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
547 | } else { | ||
548 | nvgpu_warn(g, "cde: unknown command. op=%u", | ||
549 | op); | ||
550 | return -EINVAL; | ||
551 | } | ||
552 | |||
553 | /* allocate gpfifo entries to be pushed */ | ||
554 | *gpfifo = nvgpu_kzalloc(cde_ctx->g, | ||
555 | sizeof(struct nvgpu_gpfifo) * num_elems); | ||
556 | if (!*gpfifo) { | ||
557 | nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); | ||
558 | return -ENOMEM; | ||
559 | } | ||
560 | |||
561 | gpfifo_elem = *gpfifo; | ||
562 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
563 | struct nvgpu_mem *target_mem; | ||
564 | |||
565 | /* validate the current entry */ | ||
566 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
567 | nvgpu_warn(g, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
568 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
569 | return -EINVAL; | ||
570 | } | ||
571 | |||
572 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
573 | if (target_mem->size< | ||
574 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
575 | nvgpu_warn(g, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
576 | target_mem->size, | ||
577 | cmd_elem->target_byte_offset, | ||
578 | cmd_elem->num_bytes); | ||
579 | return -EINVAL; | ||
580 | } | ||
581 | |||
582 | /* store the element into gpfifo */ | ||
583 | gpfifo_elem->entry0 = | ||
584 | u64_lo32(target_mem->gpu_va + | ||
585 | cmd_elem->target_byte_offset); | ||
586 | gpfifo_elem->entry1 = | ||
587 | u64_hi32(target_mem->gpu_va + | ||
588 | cmd_elem->target_byte_offset) | | ||
589 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
590 | sizeof(u32)); | ||
591 | } | ||
592 | |||
593 | *num_entries = num_elems; | ||
594 | return 0; | ||
595 | } | ||
596 | |||
597 | static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) | ||
598 | { | ||
599 | struct gk20a *g = cde_ctx->g; | ||
600 | unsigned long init_bytes = cde_ctx->init_cmd_num_entries * | ||
601 | sizeof(struct nvgpu_gpfifo); | ||
602 | unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * | ||
603 | sizeof(struct nvgpu_gpfifo); | ||
604 | unsigned long total_bytes = init_bytes + conv_bytes; | ||
605 | struct nvgpu_gpfifo *combined_cmd; | ||
606 | |||
607 | /* allocate buffer that has space for both */ | ||
608 | combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes); | ||
609 | if (!combined_cmd) { | ||
610 | nvgpu_warn(g, | ||
611 | "cde: could not allocate memory for gpfifo entries"); | ||
612 | return -ENOMEM; | ||
613 | } | ||
614 | |||
615 | /* move the original init here and append convert */ | ||
616 | memcpy(combined_cmd, cde_ctx->init_convert_cmd, init_bytes); | ||
617 | memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, | ||
618 | cde_ctx->convert_cmd, conv_bytes); | ||
619 | |||
620 | nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); | ||
621 | nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd); | ||
622 | |||
623 | cde_ctx->init_convert_cmd = combined_cmd; | ||
624 | cde_ctx->convert_cmd = combined_cmd | ||
625 | + cde_ctx->init_cmd_num_entries; | ||
626 | |||
627 | return 0; | ||
628 | } | ||
629 | |||
630 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
631 | struct nvgpu_firmware *img) | ||
632 | { | ||
633 | struct gk20a *g = cde_ctx->g; | ||
634 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
635 | u32 *data = (u32 *)img->data; | ||
636 | u32 num_of_elems; | ||
637 | struct gk20a_cde_hdr_elem *elem; | ||
638 | u32 min_size = 0; | ||
639 | int err = 0; | ||
640 | unsigned int i; | ||
641 | |||
642 | min_size += 2 * sizeof(u32); | ||
643 | if (img->size < min_size) { | ||
644 | nvgpu_warn(g, "cde: invalid image header"); | ||
645 | return -EINVAL; | ||
646 | } | ||
647 | |||
648 | cde_app->firmware_version = data[0]; | ||
649 | num_of_elems = data[1]; | ||
650 | |||
651 | min_size += num_of_elems * sizeof(*elem); | ||
652 | if (img->size < min_size) { | ||
653 | nvgpu_warn(g, "cde: bad image"); | ||
654 | return -EINVAL; | ||
655 | } | ||
656 | |||
657 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
658 | for (i = 0; i < num_of_elems; i++) { | ||
659 | int err = 0; | ||
660 | switch (elem->type) { | ||
661 | case TYPE_BUF: | ||
662 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
663 | break; | ||
664 | case TYPE_REPLACE: | ||
665 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
666 | &elem->replace); | ||
667 | break; | ||
668 | case TYPE_PARAM: | ||
669 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
670 | break; | ||
671 | case TYPE_REQUIRED_CLASS: | ||
672 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
673 | elem->required_class); | ||
674 | break; | ||
675 | case TYPE_COMMAND: | ||
676 | { | ||
677 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
678 | &img->data[elem->command.data_byte_offset]; | ||
679 | err = gk20a_init_cde_command(cde_ctx, img, | ||
680 | elem->command.op, cmd, | ||
681 | elem->command.num_entries); | ||
682 | break; | ||
683 | } | ||
684 | case TYPE_ARRAY: | ||
685 | memcpy(&cde_app->arrays[elem->array.id][0], | ||
686 | elem->array.data, | ||
687 | MAX_CDE_ARRAY_ENTRIES*sizeof(u32)); | ||
688 | break; | ||
689 | default: | ||
690 | nvgpu_warn(g, "cde: unknown header element"); | ||
691 | err = -EINVAL; | ||
692 | } | ||
693 | |||
694 | if (err) | ||
695 | goto deinit_image; | ||
696 | |||
697 | elem++; | ||
698 | } | ||
699 | |||
700 | if (!cde_ctx->init_convert_cmd || !cde_ctx->init_cmd_num_entries) { | ||
701 | nvgpu_warn(g, "cde: convert command not defined"); | ||
702 | err = -EINVAL; | ||
703 | goto deinit_image; | ||
704 | } | ||
705 | |||
706 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
707 | nvgpu_warn(g, "cde: convert command not defined"); | ||
708 | err = -EINVAL; | ||
709 | goto deinit_image; | ||
710 | } | ||
711 | |||
712 | err = gk20a_cde_pack_cmdbufs(cde_ctx); | ||
713 | if (err) | ||
714 | goto deinit_image; | ||
715 | |||
716 | return 0; | ||
717 | |||
718 | deinit_image: | ||
719 | gk20a_deinit_cde_img(cde_ctx); | ||
720 | return err; | ||
721 | } | ||
722 | |||
723 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
724 | u32 op, struct nvgpu_fence *fence, | ||
725 | u32 flags, struct gk20a_fence **fence_out) | ||
726 | { | ||
727 | struct gk20a *g = cde_ctx->g; | ||
728 | struct nvgpu_gpfifo *gpfifo = NULL; | ||
729 | int num_entries = 0; | ||
730 | |||
731 | /* check command type */ | ||
732 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
733 | /* both init and convert combined */ | ||
734 | gpfifo = cde_ctx->init_convert_cmd; | ||
735 | num_entries = cde_ctx->init_cmd_num_entries | ||
736 | + cde_ctx->convert_cmd_num_entries; | ||
737 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
738 | gpfifo = cde_ctx->convert_cmd; | ||
739 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
740 | } else { | ||
741 | nvgpu_warn(g, "cde: unknown buffer"); | ||
742 | return -EINVAL; | ||
743 | } | ||
744 | |||
745 | if (gpfifo == NULL || num_entries == 0) { | ||
746 | nvgpu_warn(g, "cde: buffer not available"); | ||
747 | return -ENOSYS; | ||
748 | } | ||
749 | |||
750 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, | ||
751 | num_entries, flags, fence, fence_out, true, | ||
752 | NULL); | ||
753 | } | ||
754 | |||
755 | static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) | ||
756 | __acquires(&cde_app->mutex) | ||
757 | __releases(&cde_app->mutex) | ||
758 | { | ||
759 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
760 | |||
761 | gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); | ||
762 | trace_gk20a_cde_release(cde_ctx); | ||
763 | |||
764 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
765 | |||
766 | if (cde_ctx->in_use) { | ||
767 | cde_ctx->in_use = false; | ||
768 | nvgpu_list_move(&cde_ctx->list, &cde_app->free_contexts); | ||
769 | cde_app->ctx_usecount--; | ||
770 | } else { | ||
771 | gk20a_dbg_info("double release cde context %p", cde_ctx); | ||
772 | } | ||
773 | |||
774 | nvgpu_mutex_release(&cde_app->mutex); | ||
775 | } | ||
776 | |||
777 | static void gk20a_cde_ctx_deleter_fn(struct work_struct *work) | ||
778 | __acquires(&cde_app->mutex) | ||
779 | __releases(&cde_app->mutex) | ||
780 | { | ||
781 | struct delayed_work *delay_work = to_delayed_work(work); | ||
782 | struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, | ||
783 | struct gk20a_cde_ctx, ctx_deleter_work); | ||
784 | struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; | ||
785 | struct gk20a *g = cde_ctx->g; | ||
786 | int err; | ||
787 | |||
788 | /* someone has just taken it? engine deletion started? */ | ||
789 | if (cde_ctx->in_use || !cde_app->initialised) | ||
790 | return; | ||
791 | |||
792 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
793 | "cde: attempting to delete temporary %p", cde_ctx); | ||
794 | |||
795 | err = gk20a_busy(g); | ||
796 | if (err) { | ||
797 | /* this context would find new use anyway later, so not freeing | ||
798 | * here does not leak anything */ | ||
799 | nvgpu_warn(g, "cde: cannot set gk20a on, postponing" | ||
800 | " temp ctx deletion"); | ||
801 | return; | ||
802 | } | ||
803 | |||
804 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
805 | if (cde_ctx->in_use || !cde_app->initialised) { | ||
806 | gk20a_dbg(gpu_dbg_cde_ctx, | ||
807 | "cde: context use raced, not deleting %p", | ||
808 | cde_ctx); | ||
809 | goto out; | ||
810 | } | ||
811 | |||
812 | WARN(delayed_work_pending(&cde_ctx->ctx_deleter_work), | ||
813 | "double pending %p", cde_ctx); | ||
814 | |||
815 | gk20a_cde_remove_ctx(cde_ctx); | ||
816 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
817 | "cde: destroyed %p count=%d use=%d max=%d", | ||
818 | cde_ctx, cde_app->ctx_count, cde_app->ctx_usecount, | ||
819 | cde_app->ctx_count_top); | ||
820 | |||
821 | out: | ||
822 | nvgpu_mutex_release(&cde_app->mutex); | ||
823 | gk20a_idle(g); | ||
824 | } | ||
825 | |||
826 | static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g) | ||
827 | __must_hold(&cde_app->mutex) | ||
828 | { | ||
829 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
830 | struct gk20a_cde_ctx *cde_ctx; | ||
831 | |||
832 | /* exhausted? */ | ||
833 | |||
834 | if (cde_app->ctx_usecount >= MAX_CTX_USE_COUNT) | ||
835 | return ERR_PTR(-EAGAIN); | ||
836 | |||
837 | /* idle context available? */ | ||
838 | |||
839 | if (!nvgpu_list_empty(&cde_app->free_contexts)) { | ||
840 | cde_ctx = nvgpu_list_first_entry(&cde_app->free_contexts, | ||
841 | gk20a_cde_ctx, list); | ||
842 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
843 | "cde: got free %p count=%d use=%d max=%d", | ||
844 | cde_ctx, cde_app->ctx_count, | ||
845 | cde_app->ctx_usecount, | ||
846 | cde_app->ctx_count_top); | ||
847 | trace_gk20a_cde_get_context(cde_ctx); | ||
848 | |||
849 | /* deleter work may be scheduled, but in_use prevents it */ | ||
850 | cde_ctx->in_use = true; | ||
851 | nvgpu_list_move(&cde_ctx->list, &cde_app->used_contexts); | ||
852 | cde_app->ctx_usecount++; | ||
853 | |||
854 | /* cancel any deletions now that ctx is in use */ | ||
855 | gk20a_cde_cancel_deleter(cde_ctx, true); | ||
856 | return cde_ctx; | ||
857 | } | ||
858 | |||
859 | /* no free contexts, get a temporary one */ | ||
860 | |||
861 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, | ||
862 | "cde: no free contexts, count=%d", | ||
863 | cde_app->ctx_count); | ||
864 | |||
865 | cde_ctx = gk20a_cde_allocate_context(g); | ||
866 | if (IS_ERR(cde_ctx)) { | ||
867 | nvgpu_warn(g, "cde: cannot allocate context: %ld", | ||
868 | PTR_ERR(cde_ctx)); | ||
869 | return cde_ctx; | ||
870 | } | ||
871 | |||
872 | trace_gk20a_cde_get_context(cde_ctx); | ||
873 | cde_ctx->in_use = true; | ||
874 | cde_ctx->is_temporary = true; | ||
875 | cde_app->ctx_usecount++; | ||
876 | cde_app->ctx_count++; | ||
877 | if (cde_app->ctx_count > cde_app->ctx_count_top) | ||
878 | cde_app->ctx_count_top = cde_app->ctx_count; | ||
879 | nvgpu_list_add(&cde_ctx->list, &cde_app->used_contexts); | ||
880 | |||
881 | return cde_ctx; | ||
882 | } | ||
883 | |||
884 | static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) | ||
885 | __releases(&cde_app->mutex) | ||
886 | __acquires(&cde_app->mutex) | ||
887 | { | ||
888 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
889 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
890 | struct nvgpu_timeout timeout; | ||
891 | |||
892 | nvgpu_timeout_init(g, &timeout, MAX_CTX_RETRY_TIME, | ||
893 | NVGPU_TIMER_CPU_TIMER); | ||
894 | |||
895 | do { | ||
896 | cde_ctx = gk20a_cde_do_get_context(g); | ||
897 | if (PTR_ERR(cde_ctx) != -EAGAIN) | ||
898 | break; | ||
899 | |||
900 | /* exhausted, retry */ | ||
901 | nvgpu_mutex_release(&cde_app->mutex); | ||
902 | cond_resched(); | ||
903 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
904 | } while (!nvgpu_timeout_expired(&timeout)); | ||
905 | |||
906 | return cde_ctx; | ||
907 | } | ||
908 | |||
909 | static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) | ||
910 | { | ||
911 | struct gk20a_cde_ctx *cde_ctx; | ||
912 | int ret; | ||
913 | |||
914 | cde_ctx = nvgpu_kzalloc(g, sizeof(*cde_ctx)); | ||
915 | if (!cde_ctx) | ||
916 | return ERR_PTR(-ENOMEM); | ||
917 | |||
918 | cde_ctx->g = g; | ||
919 | cde_ctx->dev = dev_from_gk20a(g); | ||
920 | |||
921 | ret = gk20a_cde_load(cde_ctx); | ||
922 | if (ret) { | ||
923 | nvgpu_kfree(g, cde_ctx); | ||
924 | return ERR_PTR(ret); | ||
925 | } | ||
926 | |||
927 | nvgpu_init_list_node(&cde_ctx->list); | ||
928 | cde_ctx->is_temporary = false; | ||
929 | cde_ctx->in_use = false; | ||
930 | INIT_DELAYED_WORK(&cde_ctx->ctx_deleter_work, | ||
931 | gk20a_cde_ctx_deleter_fn); | ||
932 | |||
933 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: allocated %p", cde_ctx); | ||
934 | trace_gk20a_cde_allocate_context(cde_ctx); | ||
935 | return cde_ctx; | ||
936 | } | ||
937 | |||
938 | int gk20a_cde_convert(struct gk20a *g, | ||
939 | struct dma_buf *compbits_scatter_buf, | ||
940 | u64 compbits_byte_offset, | ||
941 | u64 scatterbuffer_byte_offset, | ||
942 | struct nvgpu_fence *fence, | ||
943 | u32 __flags, struct gk20a_cde_param *params, | ||
944 | int num_params, struct gk20a_fence **fence_out) | ||
945 | __acquires(&cde_app->mutex) | ||
946 | __releases(&cde_app->mutex) | ||
947 | { | ||
948 | struct gk20a_cde_ctx *cde_ctx = NULL; | ||
949 | struct gk20a_comptags comptags; | ||
950 | u64 mapped_compbits_offset = 0; | ||
951 | u64 compbits_size = 0; | ||
952 | u64 mapped_scatterbuffer_offset = 0; | ||
953 | u64 scatterbuffer_size = 0; | ||
954 | u64 map_vaddr = 0; | ||
955 | u64 map_offset = 0; | ||
956 | u64 map_size = 0; | ||
957 | u8 *surface = NULL; | ||
958 | u64 big_page_mask = 0; | ||
959 | u32 flags; | ||
960 | int err, i; | ||
961 | const s32 compbits_kind = 0; | ||
962 | |||
963 | gk20a_dbg(gpu_dbg_cde, "compbits_byte_offset=%llu scatterbuffer_byte_offset=%llu", | ||
964 | compbits_byte_offset, scatterbuffer_byte_offset); | ||
965 | |||
966 | /* scatter buffer must be after compbits buffer */ | ||
967 | if (scatterbuffer_byte_offset && | ||
968 | scatterbuffer_byte_offset < compbits_byte_offset) | ||
969 | return -EINVAL; | ||
970 | |||
971 | err = gk20a_busy(g); | ||
972 | if (err) | ||
973 | return err; | ||
974 | |||
975 | nvgpu_mutex_acquire(&g->cde_app.mutex); | ||
976 | cde_ctx = gk20a_cde_get_context(g); | ||
977 | nvgpu_mutex_release(&g->cde_app.mutex); | ||
978 | if (IS_ERR(cde_ctx)) { | ||
979 | err = PTR_ERR(cde_ctx); | ||
980 | goto exit_idle; | ||
981 | } | ||
982 | |||
983 | /* First, map the buffer to local va */ | ||
984 | |||
985 | /* ensure that the compbits buffer has drvdata */ | ||
986 | err = gk20a_dmabuf_alloc_drvdata(compbits_scatter_buf, | ||
987 | dev_from_gk20a(g)); | ||
988 | if (err) | ||
989 | goto exit_idle; | ||
990 | |||
991 | /* compbits don't start at page aligned offset, so we need to align | ||
992 | the region to be mapped */ | ||
993 | big_page_mask = cde_ctx->vm->big_page_size - 1; | ||
994 | map_offset = compbits_byte_offset & ~big_page_mask; | ||
995 | map_size = compbits_scatter_buf->size - map_offset; | ||
996 | |||
997 | |||
998 | /* compute compbit start offset from the beginning of the mapped | ||
999 | area */ | ||
1000 | mapped_compbits_offset = compbits_byte_offset - map_offset; | ||
1001 | if (scatterbuffer_byte_offset) { | ||
1002 | compbits_size = scatterbuffer_byte_offset - | ||
1003 | compbits_byte_offset; | ||
1004 | mapped_scatterbuffer_offset = scatterbuffer_byte_offset - | ||
1005 | map_offset; | ||
1006 | scatterbuffer_size = compbits_scatter_buf->size - | ||
1007 | scatterbuffer_byte_offset; | ||
1008 | } else { | ||
1009 | compbits_size = compbits_scatter_buf->size - | ||
1010 | compbits_byte_offset; | ||
1011 | } | ||
1012 | |||
1013 | gk20a_dbg(gpu_dbg_cde, "map_offset=%llu map_size=%llu", | ||
1014 | map_offset, map_size); | ||
1015 | gk20a_dbg(gpu_dbg_cde, "mapped_compbits_offset=%llu compbits_size=%llu", | ||
1016 | mapped_compbits_offset, compbits_size); | ||
1017 | gk20a_dbg(gpu_dbg_cde, "mapped_scatterbuffer_offset=%llu scatterbuffer_size=%llu", | ||
1018 | mapped_scatterbuffer_offset, scatterbuffer_size); | ||
1019 | |||
1020 | |||
1021 | /* map the destination buffer */ | ||
1022 | get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ | ||
1023 | map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, | ||
1024 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1025 | compbits_kind, true, | ||
1026 | gk20a_mem_flag_none, | ||
1027 | map_offset, map_size, | ||
1028 | NULL); | ||
1029 | if (!map_vaddr) { | ||
1030 | dma_buf_put(compbits_scatter_buf); | ||
1031 | err = -EINVAL; | ||
1032 | goto exit_idle; | ||
1033 | } | ||
1034 | |||
1035 | if (scatterbuffer_byte_offset && | ||
1036 | g->ops.cde.need_scatter_buffer && | ||
1037 | g->ops.cde.need_scatter_buffer(g)) { | ||
1038 | struct sg_table *sgt; | ||
1039 | void *scatter_buffer; | ||
1040 | |||
1041 | surface = dma_buf_vmap(compbits_scatter_buf); | ||
1042 | if (IS_ERR(surface)) { | ||
1043 | nvgpu_warn(g, | ||
1044 | "dma_buf_vmap failed"); | ||
1045 | err = -EINVAL; | ||
1046 | goto exit_unmap_vaddr; | ||
1047 | } | ||
1048 | |||
1049 | scatter_buffer = surface + scatterbuffer_byte_offset; | ||
1050 | |||
1051 | gk20a_dbg(gpu_dbg_cde, "surface=0x%p scatterBuffer=0x%p", | ||
1052 | surface, scatter_buffer); | ||
1053 | sgt = gk20a_mm_pin(dev_from_gk20a(g), compbits_scatter_buf); | ||
1054 | if (IS_ERR(sgt)) { | ||
1055 | nvgpu_warn(g, | ||
1056 | "mm_pin failed"); | ||
1057 | err = -EINVAL; | ||
1058 | goto exit_unmap_surface; | ||
1059 | } else { | ||
1060 | err = g->ops.cde.populate_scatter_buffer(g, sgt, | ||
1061 | compbits_byte_offset, scatter_buffer, | ||
1062 | scatterbuffer_size); | ||
1063 | WARN_ON(err); | ||
1064 | |||
1065 | gk20a_mm_unpin(dev_from_gk20a(g), compbits_scatter_buf, | ||
1066 | sgt); | ||
1067 | if (err) | ||
1068 | goto exit_unmap_surface; | ||
1069 | } | ||
1070 | |||
1071 | __cpuc_flush_dcache_area(scatter_buffer, scatterbuffer_size); | ||
1072 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1073 | surface = NULL; | ||
1074 | } | ||
1075 | |||
1076 | /* store source buffer compression tags */ | ||
1077 | gk20a_get_comptags(dev_from_gk20a(g), compbits_scatter_buf, &comptags); | ||
1078 | cde_ctx->surf_param_offset = comptags.offset; | ||
1079 | cde_ctx->surf_param_lines = comptags.lines; | ||
1080 | |||
1081 | /* store surface vaddr. This is actually compbit vaddr, but since | ||
1082 | compbits live in the same surface, and we can get the alloc base | ||
1083 | address by using gk20a_mm_gpuva_to_iova_base, this will do */ | ||
1084 | cde_ctx->surf_vaddr = map_vaddr; | ||
1085 | |||
1086 | /* store information about destination */ | ||
1087 | cde_ctx->compbit_vaddr = map_vaddr + mapped_compbits_offset; | ||
1088 | cde_ctx->compbit_size = compbits_size; | ||
1089 | |||
1090 | cde_ctx->scatterbuffer_vaddr = map_vaddr + mapped_scatterbuffer_offset; | ||
1091 | cde_ctx->scatterbuffer_size = scatterbuffer_size; | ||
1092 | |||
1093 | /* remove existing argument data */ | ||
1094 | memset(cde_ctx->user_param_values, 0, | ||
1095 | sizeof(cde_ctx->user_param_values)); | ||
1096 | |||
1097 | /* read user space arguments for the conversion */ | ||
1098 | for (i = 0; i < num_params; i++) { | ||
1099 | struct gk20a_cde_param *param = params + i; | ||
1100 | int id = param->id - NUM_RESERVED_PARAMS; | ||
1101 | |||
1102 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
1103 | nvgpu_warn(g, "cde: unknown user parameter"); | ||
1104 | err = -EINVAL; | ||
1105 | goto exit_unmap_surface; | ||
1106 | } | ||
1107 | cde_ctx->user_param_values[id] = param->value; | ||
1108 | } | ||
1109 | |||
1110 | /* patch data */ | ||
1111 | err = gk20a_cde_patch_params(cde_ctx); | ||
1112 | if (err) { | ||
1113 | nvgpu_warn(g, "cde: failed to patch parameters"); | ||
1114 | goto exit_unmap_surface; | ||
1115 | } | ||
1116 | |||
1117 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
1118 | g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); | ||
1119 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", | ||
1120 | cde_ctx->compbit_size, cde_ctx->compbit_vaddr); | ||
1121 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", | ||
1122 | cde_ctx->scatterbuffer_size, cde_ctx->scatterbuffer_vaddr); | ||
1123 | |||
1124 | /* take always the postfence as it is needed for protecting the | ||
1125 | * cde context */ | ||
1126 | flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
1127 | |||
1128 | /* gk20a_cde_execute_buffer() will grab a power reference of it's own */ | ||
1129 | gk20a_idle(g); | ||
1130 | |||
1131 | /* execute the conversion buffer, combined with init first if it's the | ||
1132 | * first time */ | ||
1133 | err = gk20a_cde_execute_buffer(cde_ctx, | ||
1134 | cde_ctx->init_cmd_executed | ||
1135 | ? TYPE_BUF_COMMAND_CONVERT | ||
1136 | : TYPE_BUF_COMMAND_INIT, | ||
1137 | fence, flags, fence_out); | ||
1138 | |||
1139 | cde_ctx->init_cmd_executed = true; | ||
1140 | |||
1141 | /* unmap the buffers - channel holds references to them now */ | ||
1142 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); | ||
1143 | |||
1144 | return err; | ||
1145 | |||
1146 | exit_unmap_surface: | ||
1147 | if (surface) | ||
1148 | dma_buf_vunmap(compbits_scatter_buf, surface); | ||
1149 | exit_unmap_vaddr: | ||
1150 | nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); | ||
1151 | exit_idle: | ||
1152 | gk20a_idle(g); | ||
1153 | return err; | ||
1154 | } | ||
1155 | |||
1156 | static void gk20a_cde_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
1157 | __acquires(&cde_app->mutex) | ||
1158 | __releases(&cde_app->mutex) | ||
1159 | { | ||
1160 | struct gk20a_cde_ctx *cde_ctx = data; | ||
1161 | struct gk20a *g = cde_ctx->g; | ||
1162 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
1163 | bool channel_idle; | ||
1164 | |||
1165 | channel_gk20a_joblist_lock(ch); | ||
1166 | channel_idle = channel_gk20a_joblist_is_empty(ch); | ||
1167 | channel_gk20a_joblist_unlock(ch); | ||
1168 | |||
1169 | if (!channel_idle) | ||
1170 | return; | ||
1171 | |||
1172 | trace_gk20a_cde_finished_ctx_cb(cde_ctx); | ||
1173 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: finished %p", cde_ctx); | ||
1174 | if (!cde_ctx->in_use) | ||
1175 | gk20a_dbg_info("double finish cde context %p on channel %p", | ||
1176 | cde_ctx, ch); | ||
1177 | |||
1178 | if (ch->has_timedout) { | ||
1179 | if (cde_ctx->is_temporary) { | ||
1180 | nvgpu_warn(g, | ||
1181 | "cde: channel had timed out" | ||
1182 | " (temporary channel)"); | ||
1183 | /* going to be deleted anyway */ | ||
1184 | } else { | ||
1185 | nvgpu_warn(g, | ||
1186 | "cde: channel had timed out" | ||
1187 | ", reloading"); | ||
1188 | /* mark it to be deleted, replace with a new one */ | ||
1189 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1190 | cde_ctx->is_temporary = true; | ||
1191 | if (gk20a_cde_create_context(g)) { | ||
1192 | nvgpu_err(g, "cde: can't replace context"); | ||
1193 | } | ||
1194 | nvgpu_mutex_release(&cde_app->mutex); | ||
1195 | } | ||
1196 | } | ||
1197 | |||
1198 | /* delete temporary contexts later (watch for doubles) */ | ||
1199 | if (cde_ctx->is_temporary && cde_ctx->in_use) { | ||
1200 | WARN_ON(delayed_work_pending(&cde_ctx->ctx_deleter_work)); | ||
1201 | schedule_delayed_work(&cde_ctx->ctx_deleter_work, | ||
1202 | msecs_to_jiffies(CTX_DELETE_TIME)); | ||
1203 | } | ||
1204 | |||
1205 | if (!ch->has_timedout) | ||
1206 | gk20a_cde_ctx_release(cde_ctx); | ||
1207 | } | ||
1208 | |||
1209 | static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
1210 | { | ||
1211 | struct gk20a *g = cde_ctx->g; | ||
1212 | struct nvgpu_firmware *img; | ||
1213 | struct channel_gk20a *ch; | ||
1214 | struct gr_gk20a *gr = &g->gr; | ||
1215 | int err = 0; | ||
1216 | u64 vaddr; | ||
1217 | |||
1218 | img = nvgpu_request_firmware(g, "gpu2cde.bin", 0); | ||
1219 | if (!img) { | ||
1220 | nvgpu_err(g, "cde: could not fetch the firmware"); | ||
1221 | return -ENOSYS; | ||
1222 | } | ||
1223 | |||
1224 | ch = gk20a_open_new_channel_with_cb(g, gk20a_cde_finished_ctx_cb, | ||
1225 | cde_ctx, | ||
1226 | -1, | ||
1227 | false); | ||
1228 | if (!ch) { | ||
1229 | nvgpu_warn(g, "cde: gk20a channel not available"); | ||
1230 | err = -ENOMEM; | ||
1231 | goto err_get_gk20a_channel; | ||
1232 | } | ||
1233 | |||
1234 | /* bind the channel to the vm */ | ||
1235 | err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch); | ||
1236 | if (err) { | ||
1237 | nvgpu_warn(g, "cde: could not bind vm"); | ||
1238 | goto err_commit_va; | ||
1239 | } | ||
1240 | |||
1241 | /* allocate gpfifo (1024 should be more than enough) */ | ||
1242 | err = gk20a_channel_alloc_gpfifo(ch, 1024, 0, 0); | ||
1243 | if (err) { | ||
1244 | nvgpu_warn(g, "cde: unable to allocate gpfifo"); | ||
1245 | goto err_alloc_gpfifo; | ||
1246 | } | ||
1247 | |||
1248 | /* map backing store to gpu virtual space */ | ||
1249 | vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, | ||
1250 | g->gr.compbit_store.mem.size, | ||
1251 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1252 | gk20a_mem_flag_read_only, | ||
1253 | false, | ||
1254 | gr->compbit_store.mem.aperture); | ||
1255 | |||
1256 | if (!vaddr) { | ||
1257 | nvgpu_warn(g, "cde: cannot map compression bit backing store"); | ||
1258 | err = -ENOMEM; | ||
1259 | goto err_map_backingstore; | ||
1260 | } | ||
1261 | |||
1262 | /* store initialisation data */ | ||
1263 | cde_ctx->ch = ch; | ||
1264 | cde_ctx->vm = ch->vm; | ||
1265 | cde_ctx->backing_store_vaddr = vaddr; | ||
1266 | |||
1267 | /* initialise the firmware */ | ||
1268 | err = gk20a_init_cde_img(cde_ctx, img); | ||
1269 | if (err) { | ||
1270 | nvgpu_warn(g, "cde: image initialisation failed"); | ||
1271 | goto err_init_cde_img; | ||
1272 | } | ||
1273 | |||
1274 | /* initialisation done */ | ||
1275 | nvgpu_release_firmware(g, img); | ||
1276 | |||
1277 | return 0; | ||
1278 | |||
1279 | err_init_cde_img: | ||
1280 | nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); | ||
1281 | err_map_backingstore: | ||
1282 | err_alloc_gpfifo: | ||
1283 | nvgpu_vm_put(ch->vm); | ||
1284 | err_commit_va: | ||
1285 | err_get_gk20a_channel: | ||
1286 | nvgpu_release_firmware(g, img); | ||
1287 | nvgpu_err(g, "cde: couldn't initialise buffer converter: %d", err); | ||
1288 | return err; | ||
1289 | } | ||
1290 | |||
1291 | int gk20a_cde_reload(struct gk20a *g) | ||
1292 | __acquires(&cde_app->mutex) | ||
1293 | __releases(&cde_app->mutex) | ||
1294 | { | ||
1295 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
1296 | int err; | ||
1297 | |||
1298 | if (!cde_app->initialised) | ||
1299 | return -ENOSYS; | ||
1300 | |||
1301 | err = gk20a_busy(g); | ||
1302 | if (err) | ||
1303 | return err; | ||
1304 | |||
1305 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1306 | |||
1307 | gk20a_cde_stop(g); | ||
1308 | |||
1309 | err = gk20a_cde_create_contexts(g); | ||
1310 | if (!err) | ||
1311 | cde_app->initialised = true; | ||
1312 | |||
1313 | nvgpu_mutex_release(&cde_app->mutex); | ||
1314 | |||
1315 | gk20a_idle(g); | ||
1316 | return err; | ||
1317 | } | ||
1318 | |||
1319 | int gk20a_init_cde_support(struct gk20a *g) | ||
1320 | __acquires(&cde_app->mutex) | ||
1321 | __releases(&cde_app->mutex) | ||
1322 | { | ||
1323 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
1324 | int err; | ||
1325 | |||
1326 | if (cde_app->initialised) | ||
1327 | return 0; | ||
1328 | |||
1329 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_cde_ctx, "cde: init"); | ||
1330 | |||
1331 | err = nvgpu_mutex_init(&cde_app->mutex); | ||
1332 | if (err) | ||
1333 | return err; | ||
1334 | |||
1335 | nvgpu_mutex_acquire(&cde_app->mutex); | ||
1336 | |||
1337 | nvgpu_init_list_node(&cde_app->free_contexts); | ||
1338 | nvgpu_init_list_node(&cde_app->used_contexts); | ||
1339 | cde_app->ctx_count = 0; | ||
1340 | cde_app->ctx_count_top = 0; | ||
1341 | cde_app->ctx_usecount = 0; | ||
1342 | |||
1343 | err = gk20a_cde_create_contexts(g); | ||
1344 | if (!err) | ||
1345 | cde_app->initialised = true; | ||
1346 | |||
1347 | nvgpu_mutex_release(&cde_app->mutex); | ||
1348 | gk20a_dbg(gpu_dbg_cde_ctx, "cde: init finished: %d", err); | ||
1349 | |||
1350 | if (err) | ||
1351 | nvgpu_mutex_destroy(&cde_app->mutex); | ||
1352 | |||
1353 | return err; | ||
1354 | } | ||
1355 | |||
1356 | enum cde_launch_patch_id { | ||
1357 | PATCH_H_QMD_CTA_RASTER_WIDTH_ID = 1024, | ||
1358 | PATCH_H_QMD_CTA_RASTER_HEIGHT_ID = 1025, | ||
1359 | PATCH_QMD_CTA_RASTER_DEPTH_ID = 1026, /* for firmware v0 only */ | ||
1360 | PATCH_QMD_CTA_THREAD_DIMENSION0_ID = 1027, | ||
1361 | PATCH_QMD_CTA_THREAD_DIMENSION1_ID = 1028, | ||
1362 | PATCH_QMD_CTA_THREAD_DIMENSION2_ID = 1029, /* for firmware v0 only */ | ||
1363 | PATCH_USER_CONST_XTILES_ID = 1030, /* for firmware v0 only */ | ||
1364 | PATCH_USER_CONST_YTILES_ID = 1031, /* for firmware v0 only */ | ||
1365 | PATCH_USER_CONST_BLOCKHEIGHTLOG2_ID = 1032, | ||
1366 | PATCH_USER_CONST_DSTPITCH_ID = 1033, /* for firmware v0 only */ | ||
1367 | PATCH_H_USER_CONST_FLAGS_ID = 1034, /* for firmware v0 only */ | ||
1368 | PATCH_H_VPC_CURRENT_GRID_SIZE_X_ID = 1035, | ||
1369 | PATCH_H_VPC_CURRENT_GRID_SIZE_Y_ID = 1036, | ||
1370 | PATCH_H_VPC_CURRENT_GRID_SIZE_Z_ID = 1037, | ||
1371 | PATCH_VPC_CURRENT_GROUP_SIZE_X_ID = 1038, | ||
1372 | PATCH_VPC_CURRENT_GROUP_SIZE_Y_ID = 1039, | ||
1373 | PATCH_VPC_CURRENT_GROUP_SIZE_Z_ID = 1040, | ||
1374 | PATCH_USER_CONST_XBLOCKS_ID = 1041, | ||
1375 | PATCH_H_USER_CONST_DSTOFFSET_ID = 1042, | ||
1376 | PATCH_V_QMD_CTA_RASTER_WIDTH_ID = 1043, | ||
1377 | PATCH_V_QMD_CTA_RASTER_HEIGHT_ID = 1044, | ||
1378 | PATCH_V_USER_CONST_DSTOFFSET_ID = 1045, | ||
1379 | PATCH_V_VPC_CURRENT_GRID_SIZE_X_ID = 1046, | ||
1380 | PATCH_V_VPC_CURRENT_GRID_SIZE_Y_ID = 1047, | ||
1381 | PATCH_V_VPC_CURRENT_GRID_SIZE_Z_ID = 1048, | ||
1382 | PATCH_H_LAUNCH_WORD1_ID = 1049, | ||
1383 | PATCH_H_LAUNCH_WORD2_ID = 1050, | ||
1384 | PATCH_V_LAUNCH_WORD1_ID = 1051, | ||
1385 | PATCH_V_LAUNCH_WORD2_ID = 1052, | ||
1386 | PATCH_H_QMD_PROGRAM_OFFSET_ID = 1053, | ||
1387 | PATCH_H_QMD_REGISTER_COUNT_ID = 1054, | ||
1388 | PATCH_V_QMD_PROGRAM_OFFSET_ID = 1055, | ||
1389 | PATCH_V_QMD_REGISTER_COUNT_ID = 1056, | ||
1390 | }; | ||
1391 | |||
1392 | /* maximum number of WRITE_PATCHes in the below function */ | ||
1393 | #define MAX_CDE_LAUNCH_PATCHES 32 | ||
1394 | |||
1395 | static int gk20a_buffer_convert_gpu_to_cde_v1( | ||
1396 | struct gk20a *g, | ||
1397 | struct dma_buf *dmabuf, u32 consumer, | ||
1398 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1399 | u64 scatterbuffer_offset, | ||
1400 | u32 width, u32 height, u32 block_height_log2, | ||
1401 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1402 | struct gk20a_buffer_state *state) | ||
1403 | { | ||
1404 | struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; | ||
1405 | int param = 0; | ||
1406 | int err = 0; | ||
1407 | struct gk20a_fence *new_fence = NULL; | ||
1408 | const int wgx = 8; | ||
1409 | const int wgy = 8; | ||
1410 | const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */ | ||
1411 | const int xalign = compbits_per_byte * wgx; | ||
1412 | const int yalign = wgy; | ||
1413 | |||
1414 | /* Compute per launch parameters */ | ||
1415 | const int xtiles = (width + 7) >> 3; | ||
1416 | const int ytiles = (height + 7) >> 3; | ||
1417 | const int gridw_h = roundup(xtiles, xalign) / xalign; | ||
1418 | const int gridh_h = roundup(ytiles, yalign) / yalign; | ||
1419 | const int gridw_v = roundup(ytiles, xalign) / xalign; | ||
1420 | const int gridh_v = roundup(xtiles, yalign) / yalign; | ||
1421 | const int xblocks = (xtiles + 1) >> 1; | ||
1422 | const int voffset = compbits_voffset - compbits_hoffset; | ||
1423 | |||
1424 | int hprog = -1; | ||
1425 | int vprog = -1; | ||
1426 | |||
1427 | if (g->ops.cde.get_program_numbers) | ||
1428 | g->ops.cde.get_program_numbers(g, block_height_log2, | ||
1429 | &hprog, &vprog); | ||
1430 | else { | ||
1431 | nvgpu_warn(g, "cde: chip not supported"); | ||
1432 | return -ENOSYS; | ||
1433 | } | ||
1434 | |||
1435 | if (hprog < 0 || vprog < 0) { | ||
1436 | nvgpu_warn(g, "cde: could not determine programs"); | ||
1437 | return -ENOSYS; | ||
1438 | } | ||
1439 | |||
1440 | if (xtiles > 8192 / 8 || ytiles > 8192 / 8) | ||
1441 | nvgpu_warn(g, "cde: surface is exceptionally large (xtiles=%d, ytiles=%d)", | ||
1442 | xtiles, ytiles); | ||
1443 | |||
1444 | gk20a_dbg(gpu_dbg_cde, "w=%d, h=%d, bh_log2=%d, compbits_hoffset=0x%llx, compbits_voffset=0x%llx, scatterbuffer_offset=0x%llx", | ||
1445 | width, height, block_height_log2, | ||
1446 | compbits_hoffset, compbits_voffset, scatterbuffer_offset); | ||
1447 | gk20a_dbg(gpu_dbg_cde, "resolution (%d, %d) tiles (%d, %d)", | ||
1448 | width, height, xtiles, ytiles); | ||
1449 | gk20a_dbg(gpu_dbg_cde, "group (%d, %d) gridH (%d, %d) gridV (%d, %d)", | ||
1450 | wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); | ||
1451 | gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", | ||
1452 | hprog, | ||
1453 | g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], | ||
1454 | g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], | ||
1455 | vprog, | ||
1456 | g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], | ||
1457 | g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1458 | |||
1459 | /* Write parameters */ | ||
1460 | #define WRITE_PATCH(NAME, VALUE) \ | ||
1461 | params[param++] = (struct gk20a_cde_param){NAME##_ID, 0, VALUE} | ||
1462 | WRITE_PATCH(PATCH_USER_CONST_XBLOCKS, xblocks); | ||
1463 | WRITE_PATCH(PATCH_USER_CONST_BLOCKHEIGHTLOG2, | ||
1464 | block_height_log2); | ||
1465 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION0, wgx); | ||
1466 | WRITE_PATCH(PATCH_QMD_CTA_THREAD_DIMENSION1, wgy); | ||
1467 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_X, wgx); | ||
1468 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Y, wgy); | ||
1469 | WRITE_PATCH(PATCH_VPC_CURRENT_GROUP_SIZE_Z, 1); | ||
1470 | |||
1471 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_WIDTH, gridw_h); | ||
1472 | WRITE_PATCH(PATCH_H_QMD_CTA_RASTER_HEIGHT, gridh_h); | ||
1473 | WRITE_PATCH(PATCH_H_USER_CONST_DSTOFFSET, 0); | ||
1474 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_X, gridw_h); | ||
1475 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Y, gridh_h); | ||
1476 | WRITE_PATCH(PATCH_H_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1477 | |||
1478 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_WIDTH, gridw_v); | ||
1479 | WRITE_PATCH(PATCH_V_QMD_CTA_RASTER_HEIGHT, gridh_v); | ||
1480 | WRITE_PATCH(PATCH_V_USER_CONST_DSTOFFSET, voffset); | ||
1481 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_X, gridw_v); | ||
1482 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Y, gridh_v); | ||
1483 | WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); | ||
1484 | |||
1485 | WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, | ||
1486 | g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); | ||
1487 | WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, | ||
1488 | g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); | ||
1489 | WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, | ||
1490 | g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); | ||
1491 | WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, | ||
1492 | g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); | ||
1493 | |||
1494 | if (consumer & NVGPU_GPU_COMPBITS_CDEH) { | ||
1495 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1496 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1497 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1498 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1499 | } else { | ||
1500 | WRITE_PATCH(PATCH_H_LAUNCH_WORD1, | ||
1501 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1502 | WRITE_PATCH(PATCH_H_LAUNCH_WORD2, | ||
1503 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1504 | } | ||
1505 | |||
1506 | if (consumer & NVGPU_GPU_COMPBITS_CDEV) { | ||
1507 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1508 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); | ||
1509 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1510 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); | ||
1511 | } else { | ||
1512 | WRITE_PATCH(PATCH_V_LAUNCH_WORD1, | ||
1513 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); | ||
1514 | WRITE_PATCH(PATCH_V_LAUNCH_WORD2, | ||
1515 | g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); | ||
1516 | } | ||
1517 | #undef WRITE_PATCH | ||
1518 | |||
1519 | err = gk20a_cde_convert(g, dmabuf, | ||
1520 | compbits_hoffset, | ||
1521 | scatterbuffer_offset, | ||
1522 | fence_in, submit_flags, | ||
1523 | params, param, &new_fence); | ||
1524 | if (err) | ||
1525 | goto out; | ||
1526 | |||
1527 | /* compbits generated, update state & fence */ | ||
1528 | gk20a_fence_put(state->fence); | ||
1529 | state->fence = new_fence; | ||
1530 | state->valid_compbits |= consumer & | ||
1531 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1532 | out: | ||
1533 | return err; | ||
1534 | } | ||
1535 | |||
1536 | static int gk20a_buffer_convert_gpu_to_cde( | ||
1537 | struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, | ||
1538 | u64 offset, u64 compbits_hoffset, u64 compbits_voffset, | ||
1539 | u64 scatterbuffer_offset, | ||
1540 | u32 width, u32 height, u32 block_height_log2, | ||
1541 | u32 submit_flags, struct nvgpu_fence *fence_in, | ||
1542 | struct gk20a_buffer_state *state) | ||
1543 | { | ||
1544 | int err = 0; | ||
1545 | |||
1546 | if (!g->cde_app.initialised) | ||
1547 | return -ENOSYS; | ||
1548 | |||
1549 | gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", | ||
1550 | g->cde_app.firmware_version); | ||
1551 | |||
1552 | if (g->cde_app.firmware_version == 1) { | ||
1553 | err = gk20a_buffer_convert_gpu_to_cde_v1( | ||
1554 | g, dmabuf, consumer, offset, compbits_hoffset, | ||
1555 | compbits_voffset, scatterbuffer_offset, | ||
1556 | width, height, block_height_log2, | ||
1557 | submit_flags, fence_in, state); | ||
1558 | } else { | ||
1559 | nvgpu_err(g, "unsupported CDE firmware version %d", | ||
1560 | g->cde_app.firmware_version); | ||
1561 | err = -EINVAL; | ||
1562 | } | ||
1563 | |||
1564 | return err; | ||
1565 | } | ||
1566 | |||
1567 | int gk20a_prepare_compressible_read( | ||
1568 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | ||
1569 | u64 compbits_hoffset, u64 compbits_voffset, | ||
1570 | u64 scatterbuffer_offset, | ||
1571 | u32 width, u32 height, u32 block_height_log2, | ||
1572 | u32 submit_flags, struct nvgpu_fence *fence, | ||
1573 | u32 *valid_compbits, u32 *zbc_color, | ||
1574 | struct gk20a_fence **fence_out) | ||
1575 | { | ||
1576 | int err = 0; | ||
1577 | struct gk20a_buffer_state *state; | ||
1578 | struct dma_buf *dmabuf; | ||
1579 | u32 missing_bits; | ||
1580 | |||
1581 | dmabuf = dma_buf_get(buffer_fd); | ||
1582 | if (IS_ERR(dmabuf)) | ||
1583 | return -EINVAL; | ||
1584 | |||
1585 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1586 | if (err) { | ||
1587 | dma_buf_put(dmabuf); | ||
1588 | return err; | ||
1589 | } | ||
1590 | |||
1591 | missing_bits = (state->valid_compbits ^ request) & request; | ||
1592 | |||
1593 | nvgpu_mutex_acquire(&state->lock); | ||
1594 | |||
1595 | if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) { | ||
1596 | |||
1597 | gk20a_fence_put(state->fence); | ||
1598 | state->fence = NULL; | ||
1599 | /* state->fence = decompress(); | ||
1600 | state->valid_compbits = 0; */ | ||
1601 | err = -EINVAL; | ||
1602 | goto out; | ||
1603 | } else if (missing_bits) { | ||
1604 | u32 missing_cde_bits = missing_bits & | ||
1605 | (NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV); | ||
1606 | if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && | ||
1607 | missing_cde_bits) { | ||
1608 | err = gk20a_buffer_convert_gpu_to_cde( | ||
1609 | g, dmabuf, | ||
1610 | missing_cde_bits, | ||
1611 | offset, compbits_hoffset, | ||
1612 | compbits_voffset, scatterbuffer_offset, | ||
1613 | width, height, block_height_log2, | ||
1614 | submit_flags, fence, | ||
1615 | state); | ||
1616 | if (err) | ||
1617 | goto out; | ||
1618 | } | ||
1619 | } | ||
1620 | |||
1621 | if (state->fence && fence_out) | ||
1622 | *fence_out = gk20a_fence_get(state->fence); | ||
1623 | |||
1624 | if (valid_compbits) | ||
1625 | *valid_compbits = state->valid_compbits; | ||
1626 | |||
1627 | if (zbc_color) | ||
1628 | *zbc_color = state->zbc_color; | ||
1629 | |||
1630 | out: | ||
1631 | nvgpu_mutex_release(&state->lock); | ||
1632 | dma_buf_put(dmabuf); | ||
1633 | return err; | ||
1634 | } | ||
1635 | |||
1636 | int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd, | ||
1637 | u32 valid_compbits, u64 offset, u32 zbc_color) | ||
1638 | { | ||
1639 | int err; | ||
1640 | struct gk20a_buffer_state *state; | ||
1641 | struct dma_buf *dmabuf; | ||
1642 | |||
1643 | dmabuf = dma_buf_get(buffer_fd); | ||
1644 | if (IS_ERR(dmabuf)) { | ||
1645 | nvgpu_err(g, "invalid dmabuf"); | ||
1646 | return -EINVAL; | ||
1647 | } | ||
1648 | |||
1649 | err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state); | ||
1650 | if (err) { | ||
1651 | nvgpu_err(g, "could not get state from dmabuf"); | ||
1652 | dma_buf_put(dmabuf); | ||
1653 | return err; | ||
1654 | } | ||
1655 | |||
1656 | nvgpu_mutex_acquire(&state->lock); | ||
1657 | |||
1658 | /* Update the compbits state. */ | ||
1659 | state->valid_compbits = valid_compbits; | ||
1660 | state->zbc_color = zbc_color; | ||
1661 | |||
1662 | /* Discard previous compbit job fence. */ | ||
1663 | gk20a_fence_put(state->fence); | ||
1664 | state->fence = NULL; | ||
1665 | |||
1666 | nvgpu_mutex_release(&state->lock); | ||
1667 | dma_buf_put(dmabuf); | ||
1668 | return 0; | ||
1669 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h deleted file mode 100644 index 4f400bf3..00000000 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ /dev/null | |||
@@ -1,311 +0,0 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #include "mm_gk20a.h" | ||
23 | |||
24 | #define MAX_CDE_BUFS 10 | ||
25 | #define MAX_CDE_PARAMS 64 | ||
26 | #define MAX_CDE_USER_PARAMS 40 | ||
27 | #define MAX_CDE_ARRAY_ENTRIES 9 | ||
28 | |||
29 | /* | ||
30 | * The size of the context ring buffer that is dedicated for handling cde | ||
31 | * jobs. Re-using a context (=channel) for a differnt cde job forces a cpu | ||
32 | * wait on the previous job to that channel, so increasing this value | ||
33 | * reduces the likelihood of stalls. | ||
34 | */ | ||
35 | #define NUM_CDE_CONTEXTS 4 | ||
36 | |||
37 | struct dma_buf; | ||
38 | struct gk20a; | ||
39 | |||
40 | /* | ||
41 | * this element defines a buffer that is allocated and mapped into gpu address | ||
42 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
43 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
44 | * | ||
45 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
46 | */ | ||
47 | |||
48 | struct gk20a_cde_hdr_buf { | ||
49 | u64 data_byte_offset; | ||
50 | u64 num_bytes; | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * this element defines a constant patching in buffers. It basically | ||
55 | * computes physical address to <source_buf>+source_byte_offset. The | ||
56 | * address is then modified into patch value as per: | ||
57 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
58 | * | ||
59 | * The type field defines the register size as: | ||
60 | * 0=u32, | ||
61 | * 1=u64 (little endian), | ||
62 | * 2=u64 (big endian) | ||
63 | */ | ||
64 | |||
65 | struct gk20a_cde_hdr_replace { | ||
66 | u32 target_buf; | ||
67 | u32 source_buf; | ||
68 | s32 shift; | ||
69 | u32 type; | ||
70 | u64 target_byte_offset; | ||
71 | u64 source_byte_offset; | ||
72 | u64 mask; | ||
73 | }; | ||
74 | |||
75 | enum { | ||
76 | TYPE_PARAM_TYPE_U32 = 0, | ||
77 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
78 | TYPE_PARAM_TYPE_U64_BIG | ||
79 | }; | ||
80 | |||
81 | /* | ||
82 | * this element defines a runtime patching in buffers. Parameters with id from | ||
83 | * 0 to 1024 are reserved for special usage as follows: | ||
84 | * 0 = comptags_per_cacheline, | ||
85 | * 1 = slices_per_fbp, | ||
86 | * 2 = num_fbps | ||
87 | * 3 = source buffer first page offset | ||
88 | * 4 = source buffer block height log2 | ||
89 | * 5 = backing store memory address | ||
90 | * 6 = destination memory address | ||
91 | * 7 = destination size (bytes) | ||
92 | * 8 = backing store size (bytes) | ||
93 | * 9 = cache line size | ||
94 | * | ||
95 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
96 | * parameters from user space should be placed in buffers, what is their | ||
97 | * type, etc. | ||
98 | * | ||
99 | * Once the value is available, we add data_offset to the value. | ||
100 | * | ||
101 | * The value address is then modified into patch value as per: | ||
102 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
103 | * | ||
104 | * The type field defines the register size as: | ||
105 | * 0=u32, | ||
106 | * 1=u64 (little endian), | ||
107 | * 2=u64 (big endian) | ||
108 | */ | ||
109 | |||
110 | struct gk20a_cde_hdr_param { | ||
111 | u32 id; | ||
112 | u32 target_buf; | ||
113 | s32 shift; | ||
114 | u32 type; | ||
115 | s64 data_offset; | ||
116 | u64 target_byte_offset; | ||
117 | u64 mask; | ||
118 | }; | ||
119 | |||
120 | enum { | ||
121 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
122 | TYPE_PARAM_GPU_CONFIGURATION, | ||
123 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
124 | TYPE_PARAM_NUMPAGES, | ||
125 | TYPE_PARAM_BACKINGSTORE, | ||
126 | TYPE_PARAM_DESTINATION, | ||
127 | TYPE_PARAM_DESTINATION_SIZE, | ||
128 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
129 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
130 | TYPE_PARAM_BACKINGSTORE_BASE_HW, | ||
131 | TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE, | ||
132 | TYPE_PARAM_SCATTERBUFFER, | ||
133 | TYPE_PARAM_SCATTERBUFFER_SIZE, | ||
134 | NUM_RESERVED_PARAMS = 1024, | ||
135 | }; | ||
136 | |||
137 | /* | ||
138 | * This header element defines a command. The op field determines whether the | ||
139 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
140 | * denotes the beginning address of command elements in the file. | ||
141 | */ | ||
142 | |||
143 | struct gk20a_cde_hdr_command { | ||
144 | u32 op; | ||
145 | u32 num_entries; | ||
146 | u64 data_byte_offset; | ||
147 | }; | ||
148 | |||
149 | enum { | ||
150 | TYPE_BUF_COMMAND_INIT = 0, | ||
151 | TYPE_BUF_COMMAND_CONVERT | ||
152 | }; | ||
153 | |||
154 | /* | ||
155 | * This is a command element defines one entry inside push buffer. target_buf | ||
156 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
157 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
158 | */ | ||
159 | |||
160 | struct gk20a_cde_cmd_elem { | ||
161 | u32 target_buf; | ||
162 | u32 padding; | ||
163 | u64 target_byte_offset; | ||
164 | u64 num_bytes; | ||
165 | }; | ||
166 | |||
167 | /* | ||
168 | * This element is used for storing a small array of data. | ||
169 | */ | ||
170 | |||
171 | enum { | ||
172 | ARRAY_PROGRAM_OFFSET = 0, | ||
173 | ARRAY_REGISTER_COUNT, | ||
174 | ARRAY_LAUNCH_COMMAND, | ||
175 | NUM_CDE_ARRAYS | ||
176 | }; | ||
177 | |||
178 | struct gk20a_cde_hdr_array { | ||
179 | u32 id; | ||
180 | u32 data[MAX_CDE_ARRAY_ENTRIES]; | ||
181 | }; | ||
182 | |||
183 | /* | ||
184 | * Following defines a single header element. Each element has a type and | ||
185 | * some of the data structures. | ||
186 | */ | ||
187 | |||
188 | struct gk20a_cde_hdr_elem { | ||
189 | u32 type; | ||
190 | u32 padding; | ||
191 | union { | ||
192 | struct gk20a_cde_hdr_buf buf; | ||
193 | struct gk20a_cde_hdr_replace replace; | ||
194 | struct gk20a_cde_hdr_param param; | ||
195 | u32 required_class; | ||
196 | struct gk20a_cde_hdr_command command; | ||
197 | struct gk20a_cde_hdr_array array; | ||
198 | }; | ||
199 | }; | ||
200 | |||
201 | enum { | ||
202 | TYPE_BUF = 0, | ||
203 | TYPE_REPLACE, | ||
204 | TYPE_PARAM, | ||
205 | TYPE_REQUIRED_CLASS, | ||
206 | TYPE_COMMAND, | ||
207 | TYPE_ARRAY | ||
208 | }; | ||
209 | |||
210 | struct gk20a_cde_param { | ||
211 | u32 id; | ||
212 | u32 padding; | ||
213 | u64 value; | ||
214 | }; | ||
215 | |||
216 | struct gk20a_cde_ctx { | ||
217 | struct gk20a *g; | ||
218 | struct device *dev; | ||
219 | |||
220 | /* channel related data */ | ||
221 | struct channel_gk20a *ch; | ||
222 | struct vm_gk20a *vm; | ||
223 | |||
224 | /* buf converter configuration */ | ||
225 | struct nvgpu_mem mem[MAX_CDE_BUFS]; | ||
226 | unsigned int num_bufs; | ||
227 | |||
228 | /* buffer patching params (where should patching be done) */ | ||
229 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
230 | unsigned int num_params; | ||
231 | |||
232 | /* storage for user space parameter values */ | ||
233 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
234 | |||
235 | u32 surf_param_offset; | ||
236 | u32 surf_param_lines; | ||
237 | u64 surf_vaddr; | ||
238 | |||
239 | u64 compbit_vaddr; | ||
240 | u64 compbit_size; | ||
241 | |||
242 | u64 scatterbuffer_vaddr; | ||
243 | u64 scatterbuffer_size; | ||
244 | |||
245 | u64 backing_store_vaddr; | ||
246 | |||
247 | struct nvgpu_gpfifo *init_convert_cmd; | ||
248 | int init_cmd_num_entries; | ||
249 | |||
250 | struct nvgpu_gpfifo *convert_cmd; | ||
251 | int convert_cmd_num_entries; | ||
252 | |||
253 | struct kobj_attribute attr; | ||
254 | |||
255 | bool init_cmd_executed; | ||
256 | |||
257 | struct nvgpu_list_node list; | ||
258 | bool is_temporary; | ||
259 | bool in_use; | ||
260 | struct delayed_work ctx_deleter_work; | ||
261 | }; | ||
262 | |||
263 | static inline struct gk20a_cde_ctx * | ||
264 | gk20a_cde_ctx_from_list(struct nvgpu_list_node *node) | ||
265 | { | ||
266 | return (struct gk20a_cde_ctx *) | ||
267 | ((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list)); | ||
268 | }; | ||
269 | |||
270 | struct gk20a_cde_app { | ||
271 | bool initialised; | ||
272 | struct nvgpu_mutex mutex; | ||
273 | |||
274 | struct nvgpu_list_node free_contexts; | ||
275 | struct nvgpu_list_node used_contexts; | ||
276 | unsigned int ctx_count; | ||
277 | unsigned int ctx_usecount; | ||
278 | unsigned int ctx_count_top; | ||
279 | |||
280 | u32 firmware_version; | ||
281 | |||
282 | u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES]; | ||
283 | |||
284 | u32 shader_parameter; | ||
285 | }; | ||
286 | |||
287 | void gk20a_cde_destroy(struct gk20a *g); | ||
288 | void gk20a_cde_suspend(struct gk20a *g); | ||
289 | int gk20a_init_cde_support(struct gk20a *g); | ||
290 | int gk20a_cde_reload(struct gk20a *g); | ||
291 | int gk20a_cde_convert(struct gk20a *g, | ||
292 | struct dma_buf *compbits_buf, | ||
293 | u64 compbits_byte_offset, | ||
294 | u64 scatterbuffer_byte_offset, | ||
295 | struct nvgpu_fence *fence, | ||
296 | u32 __flags, struct gk20a_cde_param *params, | ||
297 | int num_params, struct gk20a_fence **fence_out); | ||
298 | |||
299 | int gk20a_prepare_compressible_read( | ||
300 | struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, | ||
301 | u64 compbits_hoffset, u64 compbits_voffset, | ||
302 | u64 scatterbuffer_offset, | ||
303 | u32 width, u32 height, u32 block_height_log2, | ||
304 | u32 submit_flags, struct nvgpu_fence *fence, | ||
305 | u32 *valid_compbits, u32 *zbc_color, | ||
306 | struct gk20a_fence **fence_out); | ||
307 | int gk20a_mark_compressible_write( | ||
308 | struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, | ||
309 | u32 zbc_color); | ||
310 | |||
311 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 0cd314d6..63ea5bc4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -117,9 +117,6 @@ int gk20a_prepare_poweroff(struct gk20a *g) | |||
117 | if (gk20a_fifo_is_engine_busy(g)) | 117 | if (gk20a_fifo_is_engine_busy(g)) |
118 | return -EBUSY; | 118 | return -EBUSY; |
119 | 119 | ||
120 | /* cancel any pending cde work */ | ||
121 | gk20a_cde_suspend(g); | ||
122 | |||
123 | gk20a_ce_suspend(g); | 120 | gk20a_ce_suspend(g); |
124 | 121 | ||
125 | ret = gk20a_channel_suspend(g); | 122 | ret = gk20a_channel_suspend(g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ab715bdc..69cb2253 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -60,7 +60,6 @@ struct nvgpu_cpu_time_correlation_sample; | |||
60 | #include "pmu_gk20a.h" | 60 | #include "pmu_gk20a.h" |
61 | #include "priv_ring_gk20a.h" | 61 | #include "priv_ring_gk20a.h" |
62 | #include "therm_gk20a.h" | 62 | #include "therm_gk20a.h" |
63 | #include "cde_gk20a.h" | ||
64 | #include "sched_gk20a.h" | 63 | #include "sched_gk20a.h" |
65 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC | 64 | #ifdef CONFIG_ARCH_TEGRA_18x_SOC |
66 | #include "clk/clk.h" | 65 | #include "clk/clk.h" |
@@ -928,6 +927,7 @@ struct gpu_ops { | |||
928 | struct { | 927 | struct { |
929 | void (*get_program_numbers)(struct gk20a *g, | 928 | void (*get_program_numbers)(struct gk20a *g, |
930 | u32 block_height_log2, | 929 | u32 block_height_log2, |
930 | u32 shader_parameter, | ||
931 | int *hprog, int *vprog); | 931 | int *hprog, int *vprog); |
932 | bool (*need_scatter_buffer)(struct gk20a *g); | 932 | bool (*need_scatter_buffer)(struct gk20a *g); |
933 | int (*populate_scatter_buffer)(struct gk20a *g, | 933 | int (*populate_scatter_buffer)(struct gk20a *g, |
@@ -1217,7 +1217,6 @@ struct gk20a { | |||
1217 | 1217 | ||
1218 | struct gk20a_sched_ctrl sched_ctrl; | 1218 | struct gk20a_sched_ctrl sched_ctrl; |
1219 | 1219 | ||
1220 | struct gk20a_cde_app cde_app; | ||
1221 | bool mmu_debug_ctrl; | 1220 | bool mmu_debug_ctrl; |
1222 | 1221 | ||
1223 | u32 tpc_fs_mask_user; | 1222 | u32 tpc_fs_mask_user; |