diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/Kconfig | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 924 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.h | 254 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 |
6 files changed, 1194 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig index 315c4683..b863077a 100644 --- a/drivers/gpu/nvgpu/Kconfig +++ b/drivers/gpu/nvgpu/Kconfig | |||
@@ -12,6 +12,14 @@ config GK20A_DEFAULT_TIMEOUT | |||
12 | help | 12 | help |
13 | Default timeout for jobs in milliseconds. Set to zero for no timeout. | 13 | Default timeout for jobs in milliseconds. Set to zero for no timeout. |
14 | 14 | ||
15 | config GK20A_CDE | ||
16 | depends on GK20A | ||
17 | bool "Support compression bit switzzling through CDE" | ||
18 | default n | ||
19 | help | ||
20 | Say Y to allow compression bit swizzling | ||
21 | using pre-compiled shader. | ||
22 | |||
15 | config GK20A_PMU | 23 | config GK20A_PMU |
16 | bool "Support GK20A PMU" | 24 | bool "Support GK20A PMU" |
17 | depends on GK20A | 25 | depends on GK20A |
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile index 246f9447..aa9237b4 100644 --- a/drivers/gpu/nvgpu/gk20a/Makefile +++ b/drivers/gpu/nvgpu/gk20a/Makefile | |||
@@ -34,6 +34,7 @@ nvgpu-y := \ | |||
34 | hal.o \ | 34 | hal.o \ |
35 | hal_gk20a.o \ | 35 | hal_gk20a.o \ |
36 | gk20a_allocator.o \ | 36 | gk20a_allocator.o \ |
37 | cde_gk20a.o \ | ||
37 | platform_gk20a_generic.o \ | 38 | platform_gk20a_generic.o \ |
38 | tsg_gk20a.o | 39 | tsg_gk20a.o |
39 | nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o | 40 | nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o |
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c new file mode 100644 index 00000000..d01426be --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -0,0 +1,924 @@ | |||
1 | /* | ||
2 | * Color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #include <linux/nvhost.h> | ||
20 | #include <linux/dma-mapping.h> | ||
21 | #include <linux/firmware.h> | ||
22 | #include <linux/debugfs.h> | ||
23 | #include <linux/dma-buf.h> | ||
24 | |||
25 | #include "gk20a.h" | ||
26 | #include "channel_gk20a.h" | ||
27 | #include "mm_gk20a.h" | ||
28 | #include "cde_gk20a.h" | ||
29 | #include "fence_gk20a.h" | ||
30 | #include "gr_gk20a.h" | ||
31 | #include "debug_gk20a.h" | ||
32 | |||
33 | #include "hw_ccsr_gk20a.h" | ||
34 | #include "hw_pbdma_gk20a.h" | ||
35 | |||
36 | void gk20a_cde_dump(struct gk20a_cde_ctx *cde_ctx) | ||
37 | { | ||
38 | int i; | ||
39 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
40 | struct gk20a_cde_mem_desc *target_mem = cde_ctx->mem + i; | ||
41 | u32 *target_mem_ptr = target_mem->cpuva; | ||
42 | int j = 0; | ||
43 | |||
44 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=%d, size=%zu, gpuva=%llx\n", | ||
45 | i, target_mem->num_bytes, target_mem->gpu_va); | ||
46 | |||
47 | for (j = 0; j < target_mem->num_bytes / sizeof(u32); j++) | ||
48 | gk20a_dbg(gpu_dbg_cde, "0x%08x ", target_mem_ptr[j]); | ||
49 | gk20a_dbg(gpu_dbg_cde, "\n\n"); | ||
50 | } | ||
51 | } | ||
52 | |||
53 | static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) | ||
54 | { | ||
55 | struct device *dev = &cde_ctx->pdev->dev; | ||
56 | int i; | ||
57 | |||
58 | for (i = 0; i < cde_ctx->num_bufs; i++) { | ||
59 | struct gk20a_cde_mem_desc *mem = cde_ctx->mem + i; | ||
60 | gk20a_gmmu_unmap(cde_ctx->vm, mem->gpu_va, mem->num_bytes, 1); | ||
61 | gk20a_free_sgtable(&mem->sgt); | ||
62 | dma_free_coherent(dev, mem->num_bytes, mem->cpuva, mem->iova); | ||
63 | } | ||
64 | |||
65 | for (i = 0; i < cde_ctx->num_obj_ids; i++) | ||
66 | gk20a_free_obj_ctx(cde_ctx->ch, | ||
67 | &(struct nvhost_free_obj_ctx_args) | ||
68 | { cde_ctx->obj_ids[i] }); | ||
69 | |||
70 | kfree(cde_ctx->init_cmd); | ||
71 | kfree(cde_ctx->convert_cmd); | ||
72 | |||
73 | cde_ctx->convert_cmd = NULL; | ||
74 | cde_ctx->init_cmd = NULL; | ||
75 | cde_ctx->num_bufs = 0; | ||
76 | cde_ctx->num_obj_ids = 0; | ||
77 | cde_ctx->num_params = 0; | ||
78 | cde_ctx->init_cmd_num_entries = 0; | ||
79 | cde_ctx->convert_cmd_num_entries = 0; | ||
80 | } | ||
81 | |||
82 | static int gk20a_cde_remove(struct gk20a_cde_ctx *cde_ctx) | ||
83 | { | ||
84 | struct gk20a *g = cde_ctx->g; | ||
85 | struct channel_gk20a *ch = cde_ctx->ch; | ||
86 | struct vm_gk20a *vm = ch->vm; | ||
87 | |||
88 | /* free the channel */ | ||
89 | gk20a_free_channel(cde_ctx->ch, true); | ||
90 | |||
91 | /* ..then release mapped memory */ | ||
92 | gk20a_deinit_cde_img(cde_ctx); | ||
93 | gk20a_gmmu_unmap(vm, cde_ctx->backing_store_vaddr, | ||
94 | g->gr.compbit_store.size, 1); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | int gk20a_cde_destroy(struct gk20a *g) | ||
100 | { | ||
101 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
102 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | ||
103 | int ret, i; | ||
104 | |||
105 | if (!cde_app->initialised) | ||
106 | return 0; | ||
107 | |||
108 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) | ||
109 | ret = gk20a_cde_remove(cde_ctx); | ||
110 | |||
111 | cde_app->initialised = false; | ||
112 | return ret; | ||
113 | } | ||
114 | |||
115 | static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, | ||
116 | const struct firmware *img, | ||
117 | struct gk20a_cde_hdr_buf *buf) | ||
118 | { | ||
119 | struct device *dev = &cde_ctx->pdev->dev; | ||
120 | struct gk20a_cde_mem_desc *mem; | ||
121 | int err; | ||
122 | |||
123 | /* check that the file can hold the buf */ | ||
124 | if (buf->data_byte_offset != 0 && | ||
125 | buf->data_byte_offset + buf->num_bytes > img->size) { | ||
126 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid data section. buffer idx = %d", | ||
127 | cde_ctx->num_bufs); | ||
128 | return -EINVAL; | ||
129 | } | ||
130 | |||
131 | /* check that we have enough buf elems available */ | ||
132 | if (cde_ctx->num_bufs > MAX_CDE_BUFS) { | ||
133 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid data section. buffer idx = %d", | ||
134 | cde_ctx->num_bufs); | ||
135 | return -ENOMEM; | ||
136 | } | ||
137 | |||
138 | /* allocate buf */ | ||
139 | mem = cde_ctx->mem + cde_ctx->num_bufs; | ||
140 | mem->num_bytes = buf->num_bytes; | ||
141 | mem->cpuva = dma_alloc_coherent(dev, mem->num_bytes, &mem->iova, | ||
142 | GFP_KERNEL); | ||
143 | if (!mem->cpuva) { | ||
144 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate device memory. buffer idx = %d", | ||
145 | cde_ctx->num_bufs); | ||
146 | return -ENOMEM; | ||
147 | } | ||
148 | |||
149 | err = gk20a_get_sgtable(dev, &mem->sgt, mem->cpuva, mem->iova, | ||
150 | mem->num_bytes); | ||
151 | if (err) { | ||
152 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not get sg table. buffer idx = %d", | ||
153 | cde_ctx->num_bufs); | ||
154 | err = -ENOMEM; | ||
155 | goto err_get_sgtable; | ||
156 | } | ||
157 | |||
158 | mem->gpu_va = gk20a_gmmu_map(cde_ctx->vm, &mem->sgt, mem->num_bytes, 0, | ||
159 | gk20a_mem_flag_none); | ||
160 | if (!mem->gpu_va) { | ||
161 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not map buffer to gpuva. buffer idx = %d", | ||
162 | cde_ctx->num_bufs); | ||
163 | err = -ENOMEM; | ||
164 | goto err_map_buffer; | ||
165 | } | ||
166 | |||
167 | /* copy the content */ | ||
168 | if (buf->data_byte_offset != 0) | ||
169 | memcpy(mem->cpuva, img->data + buf->data_byte_offset, | ||
170 | buf->num_bytes); | ||
171 | |||
172 | cde_ctx->num_bufs++; | ||
173 | |||
174 | return 0; | ||
175 | |||
176 | err_map_buffer: | ||
177 | gk20a_free_sgtable(&mem->sgt); | ||
178 | kfree(mem->sgt); | ||
179 | err_get_sgtable: | ||
180 | dma_free_coherent(dev, mem->num_bytes, &mem->cpuva, mem->iova); | ||
181 | return err; | ||
182 | } | ||
183 | |||
184 | static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, | ||
185 | int type, s32 shift, u64 mask, u64 value) | ||
186 | { | ||
187 | u32 *target_mem_ptr = target; | ||
188 | u64 *target_mem_ptr_u64 = target; | ||
189 | u64 current_value, new_value; | ||
190 | |||
191 | value = (shift >= 0) ? value << shift : value >> -shift; | ||
192 | value &= mask; | ||
193 | |||
194 | /* read current data from the location */ | ||
195 | if (type == TYPE_PARAM_TYPE_U32) | ||
196 | current_value = *target_mem_ptr; | ||
197 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
198 | current_value = *target_mem_ptr_u64; | ||
199 | else if (type == TYPE_PARAM_TYPE_U64_BIG) { | ||
200 | current_value = *target_mem_ptr_u64; | ||
201 | current_value = (u64)(current_value >> 32) | | ||
202 | (u64)(current_value << 32); | ||
203 | } else { | ||
204 | gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown type. type=%d", | ||
205 | type); | ||
206 | return -EINVAL; | ||
207 | } | ||
208 | |||
209 | current_value &= ~mask; | ||
210 | new_value = current_value | value; | ||
211 | |||
212 | /* store the element data back */ | ||
213 | if (type == TYPE_PARAM_TYPE_U32) | ||
214 | *target_mem_ptr = (u32)new_value; | ||
215 | else if (type == TYPE_PARAM_TYPE_U64_LITTLE) | ||
216 | *target_mem_ptr_u64 = new_value; | ||
217 | else { | ||
218 | new_value = (u64)(new_value >> 32) | | ||
219 | (u64)(new_value << 32); | ||
220 | *target_mem_ptr_u64 = new_value; | ||
221 | } | ||
222 | |||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, | ||
227 | const struct firmware *img, | ||
228 | struct gk20a_cde_hdr_replace *replace) | ||
229 | { | ||
230 | struct gk20a_cde_mem_desc *source_mem; | ||
231 | struct gk20a_cde_mem_desc *target_mem; | ||
232 | u32 *target_mem_ptr; | ||
233 | u64 vaddr; | ||
234 | int err; | ||
235 | |||
236 | if (replace->target_buf >= cde_ctx->num_bufs || | ||
237 | replace->source_buf >= cde_ctx->num_bufs) { | ||
238 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer. target_buf=%u, source_buf=%u, num_bufs=%d", | ||
239 | replace->target_buf, replace->source_buf, | ||
240 | cde_ctx->num_bufs); | ||
241 | return -EINVAL; | ||
242 | } | ||
243 | |||
244 | source_mem = cde_ctx->mem + replace->source_buf; | ||
245 | target_mem = cde_ctx->mem + replace->target_buf; | ||
246 | target_mem_ptr = target_mem->cpuva; | ||
247 | |||
248 | if (source_mem->num_bytes < (replace->source_byte_offset + 3) || | ||
249 | target_mem->num_bytes < (replace->target_byte_offset + 3)) { | ||
250 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer offsets. target_buf_offs=%lld, source_buf_offs=%lld, source_buf_size=%zu, dest_buf_size=%zu", | ||
251 | replace->target_byte_offset, | ||
252 | replace->source_byte_offset, | ||
253 | source_mem->num_bytes, | ||
254 | target_mem->num_bytes); | ||
255 | return -EINVAL; | ||
256 | } | ||
257 | |||
258 | /* calculate the target pointer */ | ||
259 | target_mem_ptr += (replace->target_byte_offset / sizeof(u32)); | ||
260 | |||
261 | /* determine patch value */ | ||
262 | vaddr = source_mem->gpu_va + replace->source_byte_offset; | ||
263 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, replace->type, | ||
264 | replace->shift, replace->mask, | ||
265 | vaddr); | ||
266 | if (err) { | ||
267 | gk20a_warn(&cde_ctx->pdev->dev, "cde: replace failed. err=%d, target_buf=%u, target_buf_offs=%lld, source_buf=%u, source_buf_offs=%lld", | ||
268 | err, replace->target_buf, | ||
269 | replace->target_byte_offset, | ||
270 | replace->source_buf, | ||
271 | replace->source_byte_offset); | ||
272 | } | ||
273 | |||
274 | return err; | ||
275 | } | ||
276 | |||
277 | static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) | ||
278 | { | ||
279 | struct gk20a *g = cde_ctx->g; | ||
280 | struct gk20a_cde_mem_desc *target_mem; | ||
281 | u32 *target_mem_ptr; | ||
282 | u64 new_data; | ||
283 | int user_id = 0, i, err; | ||
284 | |||
285 | for (i = 0; i < cde_ctx->num_params; i++) { | ||
286 | struct gk20a_cde_hdr_param *param = cde_ctx->params + i; | ||
287 | target_mem = cde_ctx->mem + param->target_buf; | ||
288 | target_mem_ptr = target_mem->cpuva; | ||
289 | target_mem_ptr += (param->target_byte_offset / sizeof(u32)); | ||
290 | |||
291 | switch (param->id) { | ||
292 | case TYPE_PARAM_COMPTAGS_PER_CACHELINE: | ||
293 | new_data = g->gr.comptags_per_cacheline; | ||
294 | break; | ||
295 | case TYPE_PARAM_GPU_CONFIGURATION: | ||
296 | new_data = g->ltc_count * g->gr.slices_per_ltc * | ||
297 | g->gr.cacheline_size; | ||
298 | break; | ||
299 | case TYPE_PARAM_FIRSTPAGEOFFSET: | ||
300 | new_data = cde_ctx->src_param_offset; | ||
301 | break; | ||
302 | case TYPE_PARAM_NUMPAGES: | ||
303 | new_data = cde_ctx->src_param_lines; | ||
304 | break; | ||
305 | case TYPE_PARAM_BACKINGSTORE: | ||
306 | new_data = cde_ctx->backing_store_vaddr; | ||
307 | break; | ||
308 | case TYPE_PARAM_DESTINATION: | ||
309 | new_data = cde_ctx->dest_vaddr; | ||
310 | break; | ||
311 | case TYPE_PARAM_DESTINATION_SIZE: | ||
312 | new_data = cde_ctx->dest_size; | ||
313 | break; | ||
314 | case TYPE_PARAM_BACKINGSTORE_SIZE: | ||
315 | new_data = g->gr.compbit_store.size; | ||
316 | break; | ||
317 | case TYPE_PARAM_SOURCE_SMMU_ADDR: | ||
318 | new_data = gk20a_mm_gpuva_to_iova(cde_ctx->vm, | ||
319 | cde_ctx->src_vaddr); | ||
320 | if (new_data == 0) | ||
321 | err = -EINVAL; | ||
322 | break; | ||
323 | default: | ||
324 | user_id = param->id - NUM_RESERVED_PARAMS; | ||
325 | if (user_id < 0 || user_id >= MAX_CDE_USER_PARAMS) | ||
326 | continue; | ||
327 | new_data = cde_ctx->user_param_values[user_id]; | ||
328 | } | ||
329 | |||
330 | gk20a_dbg(gpu_dbg_cde, "cde: patch: idx_in_file=%d param_id=%d target_buf=%u target_byte_offset=%lld data_value=0x%llx data_offset/data_diff=%lld data_type=%d data_shift=%d data_mask=0x%llx", | ||
331 | i, param->id, param->target_buf, | ||
332 | param->target_byte_offset, new_data, | ||
333 | param->data_offset, param->type, param->shift, | ||
334 | param->mask); | ||
335 | |||
336 | new_data += param->data_offset; | ||
337 | |||
338 | err = gk20a_replace_data(cde_ctx, target_mem_ptr, param->type, | ||
339 | param->shift, param->mask, new_data); | ||
340 | |||
341 | if (err) { | ||
342 | gk20a_warn(&cde_ctx->pdev->dev, "cde: patch failed. err=%d, idx=%d, id=%d, target_buf=%u, target_buf_offs=%lld, patch_value=%llu", | ||
343 | err, i, param->id, param->target_buf, | ||
344 | param->target_byte_offset, new_data); | ||
345 | return err; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | return 0; | ||
350 | } | ||
351 | |||
352 | static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, | ||
353 | const struct firmware *img, | ||
354 | struct gk20a_cde_hdr_param *param) | ||
355 | { | ||
356 | struct gk20a_cde_mem_desc *target_mem; | ||
357 | |||
358 | if (param->target_buf >= cde_ctx->num_bufs) { | ||
359 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", | ||
360 | cde_ctx->num_params, param->target_buf, | ||
361 | cde_ctx->num_bufs); | ||
362 | return -EINVAL; | ||
363 | } | ||
364 | |||
365 | target_mem = cde_ctx->mem + param->target_buf; | ||
366 | if (target_mem->num_bytes < (param->target_byte_offset + 3)) { | ||
367 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid buffer parameter. param idx = %d, target_buf_offs=%lld, target_buf_size=%zu", | ||
368 | cde_ctx->num_params, param->target_byte_offset, | ||
369 | target_mem->num_bytes); | ||
370 | return -EINVAL; | ||
371 | } | ||
372 | |||
373 | /* does this parameter fit into our parameter structure */ | ||
374 | if (cde_ctx->num_params >= MAX_CDE_PARAMS) { | ||
375 | gk20a_warn(&cde_ctx->pdev->dev, "cde: no room for new parameters param idx = %d", | ||
376 | cde_ctx->num_params); | ||
377 | return -ENOMEM; | ||
378 | } | ||
379 | |||
380 | /* is the given id valid? */ | ||
381 | if (param->id >= NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS) { | ||
382 | gk20a_warn(&cde_ctx->pdev->dev, "cde: parameter id is not valid. param idx = %d, id=%u, max=%u", | ||
383 | param->id, cde_ctx->num_params, | ||
384 | NUM_RESERVED_PARAMS + MAX_CDE_USER_PARAMS); | ||
385 | return -EINVAL; | ||
386 | } | ||
387 | |||
388 | cde_ctx->params[cde_ctx->num_params] = *param; | ||
389 | cde_ctx->num_params++; | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, | ||
395 | const struct firmware *img, | ||
396 | u32 required_class) | ||
397 | { | ||
398 | struct nvhost_alloc_obj_ctx_args alloc_obj_ctx; | ||
399 | int err; | ||
400 | |||
401 | if (cde_ctx->num_obj_ids >= MAX_CDE_OBJ_IDS) { | ||
402 | gk20a_warn(&cde_ctx->pdev->dev, "cde: running out of class ids"); | ||
403 | return -ENOMEM; | ||
404 | } | ||
405 | |||
406 | alloc_obj_ctx.class_num = required_class; | ||
407 | alloc_obj_ctx.padding = 0; | ||
408 | |||
409 | err = gk20a_alloc_obj_ctx(cde_ctx->ch, &alloc_obj_ctx); | ||
410 | if (err) { | ||
411 | gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to allocate ctx. err=%d", | ||
412 | err); | ||
413 | return err; | ||
414 | } | ||
415 | |||
416 | cde_ctx->obj_ids[cde_ctx->num_obj_ids] = alloc_obj_ctx.obj_id; | ||
417 | cde_ctx->num_obj_ids++; | ||
418 | |||
419 | return 0; | ||
420 | } | ||
421 | |||
422 | static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, | ||
423 | const struct firmware *img, | ||
424 | u32 op, | ||
425 | struct gk20a_cde_cmd_elem *cmd_elem, | ||
426 | u32 num_elems) | ||
427 | { | ||
428 | struct nvhost_gpfifo **gpfifo, *gpfifo_elem; | ||
429 | u32 *num_entries; | ||
430 | int i; | ||
431 | |||
432 | /* check command type */ | ||
433 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
434 | gpfifo = &cde_ctx->init_cmd; | ||
435 | num_entries = &cde_ctx->init_cmd_num_entries; | ||
436 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
437 | gpfifo = &cde_ctx->convert_cmd; | ||
438 | num_entries = &cde_ctx->convert_cmd_num_entries; | ||
439 | } else { | ||
440 | gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown command. op=%u", | ||
441 | op); | ||
442 | return -EINVAL; | ||
443 | } | ||
444 | |||
445 | /* allocate gpfifo entries to be pushed */ | ||
446 | *gpfifo = kzalloc(sizeof(struct nvhost_gpfifo) * num_elems, | ||
447 | GFP_KERNEL); | ||
448 | if (!*gpfifo) { | ||
449 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not allocate memory for gpfifo entries"); | ||
450 | return -ENOMEM; | ||
451 | } | ||
452 | |||
453 | gpfifo_elem = *gpfifo; | ||
454 | for (i = 0; i < num_elems; i++, cmd_elem++, gpfifo_elem++) { | ||
455 | struct gk20a_cde_mem_desc *target_mem; | ||
456 | |||
457 | /* validate the current entry */ | ||
458 | if (cmd_elem->target_buf >= cde_ctx->num_bufs) { | ||
459 | gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer is not available (target=%u, num_bufs=%u)", | ||
460 | cmd_elem->target_buf, cde_ctx->num_bufs); | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | target_mem = cde_ctx->mem + cmd_elem->target_buf; | ||
465 | if (target_mem->num_bytes < | ||
466 | cmd_elem->target_byte_offset + cmd_elem->num_bytes) { | ||
467 | gk20a_warn(&cde_ctx->pdev->dev, "cde: target buffer cannot hold all entries (target_size=%zu, target_byte_offset=%lld, num_bytes=%llu)", | ||
468 | target_mem->num_bytes, | ||
469 | cmd_elem->target_byte_offset, | ||
470 | cmd_elem->num_bytes); | ||
471 | return -EINVAL; | ||
472 | } | ||
473 | |||
474 | /* store the element into gpfifo */ | ||
475 | gpfifo_elem->entry0 = | ||
476 | u64_lo32(target_mem->gpu_va + | ||
477 | cmd_elem->target_byte_offset); | ||
478 | gpfifo_elem->entry1 = | ||
479 | u64_hi32(target_mem->gpu_va + | ||
480 | cmd_elem->target_byte_offset) | | ||
481 | pbdma_gp_entry1_length_f(cmd_elem->num_bytes / | ||
482 | sizeof(u32)); | ||
483 | } | ||
484 | |||
485 | *num_entries = num_elems; | ||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, | ||
490 | const struct firmware *img) | ||
491 | { | ||
492 | u32 *data = (u32 *)img->data; | ||
493 | u32 version, num_of_elems; | ||
494 | struct gk20a_cde_hdr_elem *elem; | ||
495 | u32 min_size = 0; | ||
496 | int err = 0; | ||
497 | int i; | ||
498 | |||
499 | min_size += 2 * sizeof(u32); | ||
500 | if (img->size < min_size) { | ||
501 | gk20a_warn(&cde_ctx->pdev->dev, "cde: invalid image header"); | ||
502 | return -EINVAL; | ||
503 | } | ||
504 | |||
505 | version = data[0]; | ||
506 | num_of_elems = data[1]; | ||
507 | |||
508 | min_size += num_of_elems * sizeof(*elem); | ||
509 | if (img->size < min_size) { | ||
510 | gk20a_warn(&cde_ctx->pdev->dev, "cde: bad image"); | ||
511 | return -EINVAL; | ||
512 | } | ||
513 | |||
514 | elem = (struct gk20a_cde_hdr_elem *)&data[2]; | ||
515 | for (i = 0; i < num_of_elems; i++) { | ||
516 | int err = 0; | ||
517 | switch (elem->type) { | ||
518 | case TYPE_BUF: | ||
519 | err = gk20a_init_cde_buf(cde_ctx, img, &elem->buf); | ||
520 | break; | ||
521 | case TYPE_REPLACE: | ||
522 | err = gk20a_init_cde_replace(cde_ctx, img, | ||
523 | &elem->replace); | ||
524 | break; | ||
525 | case TYPE_PARAM: | ||
526 | err = gk20a_init_cde_param(cde_ctx, img, &elem->param); | ||
527 | break; | ||
528 | case TYPE_REQUIRED_CLASS: | ||
529 | err = gk20a_init_cde_required_class(cde_ctx, img, | ||
530 | elem->required_class); | ||
531 | break; | ||
532 | case TYPE_COMMAND: | ||
533 | { | ||
534 | struct gk20a_cde_cmd_elem *cmd = (void *) | ||
535 | &img->data[elem->command.data_byte_offset]; | ||
536 | err = gk20a_init_cde_command(cde_ctx, img, | ||
537 | elem->command.op, cmd, | ||
538 | elem->command.num_entries); | ||
539 | break; | ||
540 | } | ||
541 | default: | ||
542 | gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown header element"); | ||
543 | err = -EINVAL; | ||
544 | } | ||
545 | |||
546 | if (err) | ||
547 | goto deinit_image; | ||
548 | |||
549 | elem++; | ||
550 | } | ||
551 | |||
552 | if (!cde_ctx->init_cmd || !cde_ctx->init_cmd_num_entries) { | ||
553 | gk20a_warn(&cde_ctx->pdev->dev, "cde: convert command not defined"); | ||
554 | err = -EINVAL; | ||
555 | goto deinit_image; | ||
556 | } | ||
557 | |||
558 | if (!cde_ctx->convert_cmd || !cde_ctx->convert_cmd_num_entries) { | ||
559 | gk20a_warn(&cde_ctx->pdev->dev, "cde: convert command not defined"); | ||
560 | err = -EINVAL; | ||
561 | goto deinit_image; | ||
562 | } | ||
563 | |||
564 | return 0; | ||
565 | |||
566 | deinit_image: | ||
567 | gk20a_deinit_cde_img(cde_ctx); | ||
568 | return err; | ||
569 | } | ||
570 | |||
571 | static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, | ||
572 | u32 op, struct nvhost_fence *fence, | ||
573 | u32 flags, struct gk20a_fence **fence_out) | ||
574 | { | ||
575 | struct nvhost_gpfifo *gpfifo = NULL; | ||
576 | int num_entries = 0; | ||
577 | |||
578 | /* check command type */ | ||
579 | if (op == TYPE_BUF_COMMAND_INIT) { | ||
580 | gpfifo = cde_ctx->init_cmd; | ||
581 | num_entries = cde_ctx->init_cmd_num_entries; | ||
582 | } else if (op == TYPE_BUF_COMMAND_CONVERT) { | ||
583 | gpfifo = cde_ctx->convert_cmd; | ||
584 | num_entries = cde_ctx->convert_cmd_num_entries; | ||
585 | } else { | ||
586 | gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown buffer"); | ||
587 | return -EINVAL; | ||
588 | } | ||
589 | |||
590 | if (gpfifo == NULL || num_entries == 0) { | ||
591 | gk20a_warn(&cde_ctx->pdev->dev, "cde: buffer not available"); | ||
592 | return -ENOSYS; | ||
593 | } | ||
594 | |||
595 | return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, | ||
596 | num_entries, flags, fence, fence_out); | ||
597 | } | ||
598 | |||
599 | int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | ||
600 | s32 dst_kind, u64 dst_byte_offset, | ||
601 | u32 dst_size, struct nvhost_fence *fence, | ||
602 | u32 __flags, struct gk20a_cde_param *params, | ||
603 | int num_params, struct gk20a_fence **fence_out) | ||
604 | { | ||
605 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
606 | struct gk20a_comptags comptags; | ||
607 | struct gk20a_cde_ctx *cde_ctx; | ||
608 | struct dma_buf *src = NULL, *dst = NULL; | ||
609 | u64 dst_vaddr = 0, src_vaddr = 0; | ||
610 | u32 flags; | ||
611 | int err, i; | ||
612 | |||
613 | if (!cde_app->initialised) { | ||
614 | gk20a_warn(&g->dev->dev, "cde: conversion requrest but no image has been provided"); | ||
615 | return -ENOSYS; | ||
616 | } | ||
617 | |||
618 | mutex_lock(&cde_app->mutex); | ||
619 | |||
620 | /* pick next free cde context */ | ||
621 | cde_ctx = cde_app->cde_ctx + cde_app->cde_ctx_ptr; | ||
622 | cde_app->cde_ctx_ptr = (cde_app->cde_ctx_ptr + 1) % | ||
623 | ARRAY_SIZE(cde_app->cde_ctx); | ||
624 | |||
625 | /* First, get buffer references and map the buffers to local va */ | ||
626 | |||
627 | dst = dma_buf_get(dst_fd); | ||
628 | if (IS_ERR(src)) { | ||
629 | dst = NULL; | ||
630 | err = -EINVAL; | ||
631 | goto exit_unlock; | ||
632 | } | ||
633 | |||
634 | /* ensure that the dst buffer has drvdata */ | ||
635 | err = gk20a_dmabuf_alloc_drvdata(dst, &g->dev->dev); | ||
636 | if (err) | ||
637 | goto exit_unlock; | ||
638 | |||
639 | /* map the destination buffer */ | ||
640 | dst_vaddr = gk20a_vm_map(g->cde_app.vm, dst, 0, | ||
641 | 0, dst_kind, NULL, true, | ||
642 | gk20a_mem_flag_none, | ||
643 | 0, 0); | ||
644 | if (!dst_vaddr) { | ||
645 | err = -EINVAL; | ||
646 | goto exit_unlock; | ||
647 | } | ||
648 | |||
649 | src = dma_buf_get(src_fd); | ||
650 | if (IS_ERR(src)) { | ||
651 | src = NULL; | ||
652 | err = -EINVAL; | ||
653 | goto exit_unlock; | ||
654 | } | ||
655 | |||
656 | /* ensure that the src buffer has drvdata */ | ||
657 | err = gk20a_dmabuf_alloc_drvdata(src, &g->dev->dev); | ||
658 | if (err) | ||
659 | goto exit_unlock; | ||
660 | |||
661 | /* map the source buffer to prevent premature release */ | ||
662 | src_vaddr = gk20a_vm_map(g->cde_app.vm, src, 0, | ||
663 | 0, dst_kind, NULL, true, | ||
664 | gk20a_mem_flag_none, | ||
665 | 0, 0); | ||
666 | if (!src_vaddr) { | ||
667 | err = -EINVAL; | ||
668 | goto exit_unlock; | ||
669 | } | ||
670 | |||
671 | if (!dst_size) | ||
672 | dst_size = dst->size - dst_byte_offset; | ||
673 | |||
674 | /* reload buffer converter if it has failed */ | ||
675 | if (cde_ctx->ch->has_timedout) { | ||
676 | mutex_unlock(&cde_app->mutex); | ||
677 | err = gk20a_cde_reload(g); | ||
678 | if (err) | ||
679 | return err; | ||
680 | mutex_lock(&cde_app->mutex); | ||
681 | } | ||
682 | |||
683 | /* wait for channel idle */ | ||
684 | err = gk20a_channel_finish(cde_ctx->ch, 2000); | ||
685 | if (err) { | ||
686 | gk20a_warn(&cde_ctx->pdev->dev, "cde: old work could not be finished"); | ||
687 | goto exit_unlock; | ||
688 | } | ||
689 | |||
690 | /* disable the channel */ | ||
691 | gk20a_writel(g, ccsr_channel_r(cde_ctx->ch->hw_chid), | ||
692 | gk20a_readl(g, ccsr_channel_r(cde_ctx->ch->hw_chid)) | | ||
693 | ccsr_channel_enable_clr_true_f()); | ||
694 | gk20a_fifo_preempt_channel(g, cde_ctx->ch->hw_chid); | ||
695 | channel_gk20a_unbind(&g->fifo.channel[cde_ctx->ch->hw_chid]); | ||
696 | |||
697 | /* reinitialise the graphics context of the channel */ | ||
698 | gr_gk20a_load_golden_ctx_image(g, cde_ctx->ch); | ||
699 | |||
700 | /* re-enable the channel */ | ||
701 | g->ops.fifo.bind_channel(&g->fifo.channel[cde_ctx->ch->hw_chid]); | ||
702 | gk20a_writel(g, ccsr_channel_r(cde_ctx->ch->hw_chid), | ||
703 | gk20a_readl(g, ccsr_channel_r(cde_ctx->ch->hw_chid)) | | ||
704 | ccsr_channel_enable_set_true_f()); | ||
705 | |||
706 | /* store source buffer compression tags */ | ||
707 | gk20a_get_comptags(&g->dev->dev, src, &comptags); | ||
708 | cde_ctx->src_vaddr = src_vaddr; | ||
709 | cde_ctx->src_param_offset = comptags.offset; | ||
710 | cde_ctx->src_param_lines = comptags.lines; | ||
711 | |||
712 | /* store information about destination */ | ||
713 | cde_ctx->dest_vaddr = dst_vaddr + dst_byte_offset; | ||
714 | cde_ctx->dest_size = dst_size; | ||
715 | |||
716 | /* remove existing argument data */ | ||
717 | memset(cde_ctx->user_param_values, 0, | ||
718 | sizeof(cde_ctx->user_param_values)); | ||
719 | |||
720 | /* read user space arguments for the conversion */ | ||
721 | for (i = 0; i < num_params; i++) { | ||
722 | struct gk20a_cde_param *param = params + i; | ||
723 | int id = param->id - NUM_RESERVED_PARAMS; | ||
724 | |||
725 | if (id < 0 || id >= MAX_CDE_USER_PARAMS) { | ||
726 | gk20a_warn(&cde_ctx->pdev->dev, "cde: unknown user parameter"); | ||
727 | err = -EINVAL; | ||
728 | goto exit_unlock; | ||
729 | } | ||
730 | cde_ctx->user_param_values[id] = param->value; | ||
731 | } | ||
732 | |||
733 | /* patch data */ | ||
734 | err = gk20a_cde_patch_params(cde_ctx); | ||
735 | if (err) { | ||
736 | gk20a_warn(&cde_ctx->pdev->dev, "cde: failed to patch parameters"); | ||
737 | goto exit_unlock; | ||
738 | } | ||
739 | |||
740 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", | ||
741 | g->gr.compbit_store.size, cde_ctx->backing_store_vaddr); | ||
742 | gk20a_dbg(gpu_dbg_cde, "cde: buffer=dst, size=%llu, gpuva=%llx\n", | ||
743 | cde_ctx->dest_size, cde_ctx->dest_vaddr); | ||
744 | gk20a_cde_dump(cde_ctx); | ||
745 | |||
746 | /* execute the init push buffer */ | ||
747 | err = gk20a_cde_execute_buffer(cde_ctx, TYPE_BUF_COMMAND_INIT, | ||
748 | NULL, 0, NULL); | ||
749 | if (err) | ||
750 | goto exit_unlock; | ||
751 | |||
752 | /* take always the postfence as it is needed for protecting the | ||
753 | * cde context */ | ||
754 | flags = __flags | NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
755 | |||
756 | /* execute the conversion buffer */ | ||
757 | err = gk20a_cde_execute_buffer(cde_ctx, TYPE_BUF_COMMAND_CONVERT, | ||
758 | fence, flags, fence_out); | ||
759 | |||
760 | exit_unlock: | ||
761 | |||
762 | /* unmap the buffers - channel holds references to them now */ | ||
763 | if (dst_vaddr) | ||
764 | gk20a_vm_unmap(g->cde_app.vm, dst_vaddr); | ||
765 | if (src_vaddr) | ||
766 | gk20a_vm_unmap(g->cde_app.vm, src_vaddr); | ||
767 | |||
768 | /* drop dmabuf refs if work was aborted */ | ||
769 | if (err && src) | ||
770 | dma_buf_put(src); | ||
771 | if (err && dst) | ||
772 | dma_buf_put(dst); | ||
773 | |||
774 | mutex_unlock(&cde_app->mutex); | ||
775 | |||
776 | return err; | ||
777 | } | ||
778 | |||
779 | int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | ||
780 | { | ||
781 | struct gk20a *g = cde_ctx->g; | ||
782 | const struct firmware *img; | ||
783 | struct channel_gk20a *ch; | ||
784 | struct gr_gk20a *gr = &g->gr; | ||
785 | int err = 0; | ||
786 | u64 vaddr; | ||
787 | |||
788 | img = gk20a_request_firmware(g, "gpu2cde.bin"); | ||
789 | if (!img) { | ||
790 | dev_err(&cde_ctx->pdev->dev, "cde: could not fetch the firmware"); | ||
791 | return -ENOSYS; | ||
792 | } | ||
793 | |||
794 | ch = gk20a_open_new_channel(g); | ||
795 | if (!ch) { | ||
796 | gk20a_warn(&cde_ctx->pdev->dev, "cde: gk20a channel not available"); | ||
797 | err = -ENOMEM; | ||
798 | goto err_get_gk20a_channel; | ||
799 | } | ||
800 | |||
801 | /* bind the channel to the vm */ | ||
802 | gk20a_vm_get(&g->mm.pmu.vm); | ||
803 | ch->vm = &g->mm.pmu.vm; | ||
804 | err = channel_gk20a_commit_va(ch); | ||
805 | if (err) { | ||
806 | gk20a_warn(&cde_ctx->pdev->dev, "cde: could not bind vm"); | ||
807 | goto err_commit_va; | ||
808 | } | ||
809 | |||
810 | /* allocate gpfifo (1024 should be more than enough) */ | ||
811 | err = gk20a_alloc_channel_gpfifo(ch, | ||
812 | &(struct nvhost_alloc_gpfifo_args){1024, 0}); | ||
813 | if (err) { | ||
814 | gk20a_warn(&cde_ctx->pdev->dev, "cde: unable to allocate gpfifo"); | ||
815 | goto err_alloc_gpfifo; | ||
816 | } | ||
817 | |||
818 | /* map backing store to gpu virtual space */ | ||
819 | vaddr = gk20a_gmmu_map(ch->vm, &gr->compbit_store.sgt, | ||
820 | g->gr.compbit_store.size, 0, | ||
821 | gk20a_mem_flag_none); | ||
822 | |||
823 | if (!vaddr) { | ||
824 | gk20a_warn(&cde_ctx->pdev->dev, "cde: cannot map compression bit backing store"); | ||
825 | goto err_map_backingstore; | ||
826 | } | ||
827 | |||
828 | /* store initialisation data */ | ||
829 | cde_ctx->ch = ch; | ||
830 | cde_ctx->vm = ch->vm; | ||
831 | cde_ctx->backing_store_vaddr = vaddr; | ||
832 | |||
833 | /* initialise the firmware */ | ||
834 | err = gk20a_init_cde_img(cde_ctx, img); | ||
835 | if (err) { | ||
836 | gk20a_warn(&cde_ctx->pdev->dev, "cde: image initialisation failed"); | ||
837 | goto err_init_cde_img; | ||
838 | } | ||
839 | |||
840 | /* initialisation done */ | ||
841 | release_firmware(img); | ||
842 | |||
843 | return 0; | ||
844 | |||
845 | err_init_cde_img: | ||
846 | gk20a_gmmu_unmap(ch->vm, vaddr, g->gr.compbit_store.size, 1); | ||
847 | err_map_backingstore: | ||
848 | err_alloc_gpfifo: | ||
849 | gk20a_vm_put(ch->vm); | ||
850 | err_commit_va: | ||
851 | err_get_gk20a_channel: | ||
852 | release_firmware(img); | ||
853 | dev_err(&cde_ctx->pdev->dev, "cde: couldn't initialise buffer converter: %d", | ||
854 | err); | ||
855 | return err; | ||
856 | } | ||
857 | |||
858 | int gk20a_cde_reload(struct gk20a *g) | ||
859 | { | ||
860 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
861 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | ||
862 | int err, i; | ||
863 | |||
864 | if (!cde_app->initialised) { | ||
865 | gk20a_busy(g->dev); | ||
866 | gk20a_init_cde_support(g); | ||
867 | gk20a_idle(g->dev); | ||
868 | if (!cde_app->initialised) | ||
869 | return -ENOSYS; | ||
870 | return 0; | ||
871 | } | ||
872 | |||
873 | gk20a_busy(g->dev); | ||
874 | mutex_lock(&cde_app->mutex); | ||
875 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) { | ||
876 | gk20a_cde_remove(cde_ctx); | ||
877 | err = gk20a_cde_load(cde_ctx); | ||
878 | } | ||
879 | |||
880 | cde_app->cde_ctx_ptr = 0; | ||
881 | mutex_unlock(&cde_app->mutex); | ||
882 | |||
883 | gk20a_idle(g->dev); | ||
884 | return err; | ||
885 | } | ||
886 | |||
887 | int gk20a_init_cde_support(struct gk20a *g) | ||
888 | { | ||
889 | struct gk20a_cde_app *cde_app = &g->cde_app; | ||
890 | struct gk20a_cde_ctx *cde_ctx = cde_app->cde_ctx; | ||
891 | int ret, i; | ||
892 | |||
893 | if (cde_app->initialised) | ||
894 | return 0; | ||
895 | |||
896 | mutex_init(&cde_app->mutex); | ||
897 | mutex_lock(&cde_app->mutex); | ||
898 | |||
899 | for (i = 0; i < ARRAY_SIZE(cde_app->cde_ctx); i++, cde_ctx++) { | ||
900 | cde_ctx->g = g; | ||
901 | cde_ctx->pdev = g->dev; | ||
902 | ret = gk20a_cde_load(cde_ctx); | ||
903 | if (ret) | ||
904 | goto err_init_instance; | ||
905 | } | ||
906 | |||
907 | /* take shadow to the vm for general usage */ | ||
908 | cde_app->vm = cde_app->cde_ctx->vm; | ||
909 | |||
910 | cde_app->cde_ctx_ptr = 0; | ||
911 | cde_app->initialised = true; | ||
912 | mutex_unlock(&cde_app->mutex); | ||
913 | |||
914 | return 0; | ||
915 | |||
916 | err_init_instance: | ||
917 | |||
918 | /* deinitialise initialised channels */ | ||
919 | while (i--) { | ||
920 | gk20a_cde_remove(cde_ctx); | ||
921 | cde_ctx--; | ||
922 | } | ||
923 | return ret; | ||
924 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h new file mode 100644 index 00000000..784ae8b4 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h | |||
@@ -0,0 +1,254 @@ | |||
1 | /* | ||
2 | * GK20A color decompression engine support | ||
3 | * | ||
4 | * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | |||
19 | #ifndef _CDE_GK20A_H_ | ||
20 | #define _CDE_GK20A_H_ | ||
21 | |||
22 | #include "mm_gk20a.h" | ||
23 | |||
24 | #define MAX_CDE_BUFS 10 | ||
25 | #define MAX_CDE_PARAMS 64 | ||
26 | #define MAX_CDE_USER_PARAMS 32 | ||
27 | #define MAX_CDE_OBJ_IDS 4 | ||
28 | |||
29 | struct dma_buf; | ||
30 | struct gk20a; | ||
31 | |||
32 | /* | ||
33 | * this element defines a buffer that is allocated and mapped into gpu address | ||
34 | * space. data_byte_offset defines the beginning of the buffer inside the | ||
35 | * firmare. num_bytes defines how many bytes the firmware contains. | ||
36 | * | ||
37 | * If data_byte_offset is zero, we allocate an empty buffer. | ||
38 | */ | ||
39 | |||
40 | struct gk20a_cde_hdr_buf { | ||
41 | u64 data_byte_offset; | ||
42 | u64 num_bytes; | ||
43 | }; | ||
44 | |||
45 | /* | ||
46 | * this element defines a constant patching in buffers. It basically | ||
47 | * computes physical address to <source_buf>+source_byte_offset. The | ||
48 | * address is then modified into patch value as per: | ||
49 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
50 | * | ||
51 | * The type field defines the register size as: | ||
52 | * 0=u32, | ||
53 | * 1=u64 (little endian), | ||
54 | * 2=u64 (big endian) | ||
55 | */ | ||
56 | |||
57 | struct gk20a_cde_hdr_replace { | ||
58 | u32 target_buf; | ||
59 | u32 source_buf; | ||
60 | s32 shift; | ||
61 | u32 type; | ||
62 | s64 target_byte_offset; | ||
63 | s64 source_byte_offset; | ||
64 | u64 mask; | ||
65 | }; | ||
66 | |||
67 | enum { | ||
68 | TYPE_PARAM_TYPE_U32 = 0, | ||
69 | TYPE_PARAM_TYPE_U64_LITTLE, | ||
70 | TYPE_PARAM_TYPE_U64_BIG | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * this element defines a runtime patching in buffers. Parameters with id from | ||
75 | * 0 to 1024 are reserved for special usage as follows: | ||
76 | * 0 = comptags_per_cacheline, | ||
77 | * 1 = slices_per_fbp, | ||
78 | * 2 = num_fbps | ||
79 | * 3 = source buffer first page offset | ||
80 | * 4 = source buffer block height log2 | ||
81 | * 5 = backing store memory address | ||
82 | * 6 = destination memory address | ||
83 | * 7 = destination size (bytes) | ||
84 | * 8 = backing store size (bytes) | ||
85 | * 9 = cache line size | ||
86 | * | ||
87 | * Parameters above id 1024 are user-specified. I.e. they determine where a | ||
88 | * parameters from user space should be placed in buffers, what is their | ||
89 | * type, etc. | ||
90 | * | ||
91 | * Once the value is available, we add data_offset to the value. | ||
92 | * | ||
93 | * The value address is then modified into patch value as per: | ||
94 | * value = (current_value & ~mask) | (address << shift) & mask . | ||
95 | * | ||
96 | * The type field defines the register size as: | ||
97 | * 0=u32, | ||
98 | * 1=u64 (little endian), | ||
99 | * 2=u64 (big endian) | ||
100 | */ | ||
101 | |||
102 | struct gk20a_cde_hdr_param { | ||
103 | u32 id; | ||
104 | u32 target_buf; | ||
105 | s32 shift; | ||
106 | u32 type; | ||
107 | s64 data_offset; | ||
108 | s64 target_byte_offset; | ||
109 | u64 mask; | ||
110 | }; | ||
111 | |||
112 | enum { | ||
113 | TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0, | ||
114 | TYPE_PARAM_GPU_CONFIGURATION, | ||
115 | TYPE_PARAM_FIRSTPAGEOFFSET, | ||
116 | TYPE_PARAM_NUMPAGES, | ||
117 | TYPE_PARAM_BACKINGSTORE, | ||
118 | TYPE_PARAM_DESTINATION, | ||
119 | TYPE_PARAM_DESTINATION_SIZE, | ||
120 | TYPE_PARAM_BACKINGSTORE_SIZE, | ||
121 | TYPE_PARAM_SOURCE_SMMU_ADDR, | ||
122 | NUM_RESERVED_PARAMS = 1024, | ||
123 | }; | ||
124 | |||
125 | /* | ||
126 | * This header element defines a command. The op field determines whether the | ||
127 | * element is defining an init (0) or convert command (1). data_byte_offset | ||
128 | * denotes the beginning address of command elements in the file. | ||
129 | */ | ||
130 | |||
131 | struct gk20a_cde_hdr_command { | ||
132 | u32 op; | ||
133 | u32 num_entries; | ||
134 | u64 data_byte_offset; | ||
135 | }; | ||
136 | |||
137 | enum { | ||
138 | TYPE_BUF_COMMAND_INIT = 0, | ||
139 | TYPE_BUF_COMMAND_CONVERT | ||
140 | }; | ||
141 | |||
142 | /* | ||
143 | * This is a command element defines one entry inside push buffer. target_buf | ||
144 | * defines the buffer including the pushbuffer entries, target_byte_offset the | ||
145 | * offset inside the buffer and num_bytes the number of words in the buffer. | ||
146 | */ | ||
147 | |||
148 | struct gk20a_cde_cmd_elem { | ||
149 | u32 target_buf; | ||
150 | u32 padding; | ||
151 | u64 target_byte_offset; | ||
152 | u64 num_bytes; | ||
153 | }; | ||
154 | |||
155 | /* | ||
156 | * Following defines a single header element. Each element has a type and | ||
157 | * some of the data structures. | ||
158 | */ | ||
159 | |||
160 | struct gk20a_cde_hdr_elem { | ||
161 | u32 type; | ||
162 | u32 padding; | ||
163 | union { | ||
164 | struct gk20a_cde_hdr_buf buf; | ||
165 | struct gk20a_cde_hdr_replace replace; | ||
166 | struct gk20a_cde_hdr_param param; | ||
167 | u32 required_class; | ||
168 | struct gk20a_cde_hdr_command command; | ||
169 | }; | ||
170 | }; | ||
171 | |||
172 | enum { | ||
173 | TYPE_BUF = 0, | ||
174 | TYPE_REPLACE, | ||
175 | TYPE_PARAM, | ||
176 | TYPE_REQUIRED_CLASS, | ||
177 | TYPE_COMMAND | ||
178 | }; | ||
179 | |||
180 | struct gk20a_cde_mem_desc { | ||
181 | struct sg_table *sgt; | ||
182 | dma_addr_t iova; | ||
183 | void *cpuva; | ||
184 | size_t num_bytes; | ||
185 | u64 gpu_va; | ||
186 | }; | ||
187 | |||
188 | struct gk20a_cde_param { | ||
189 | u32 id; | ||
190 | u32 padding; | ||
191 | u64 value; | ||
192 | }; | ||
193 | |||
194 | struct gk20a_cde_ctx { | ||
195 | struct gk20a *g; | ||
196 | struct platform_device *pdev; | ||
197 | |||
198 | /* channel related data */ | ||
199 | struct channel_gk20a *ch; | ||
200 | struct vm_gk20a *vm; | ||
201 | |||
202 | /* buf converter configuration */ | ||
203 | struct gk20a_cde_mem_desc mem[MAX_CDE_BUFS]; | ||
204 | int num_bufs; | ||
205 | |||
206 | /* buffer patching params (where should patching be done) */ | ||
207 | struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS]; | ||
208 | int num_params; | ||
209 | |||
210 | /* storage for user space parameter values */ | ||
211 | u32 user_param_values[MAX_CDE_USER_PARAMS]; | ||
212 | |||
213 | u64 src_smmu_addr; | ||
214 | u32 src_param_offset; | ||
215 | u32 src_param_lines; | ||
216 | |||
217 | u64 src_vaddr; | ||
218 | |||
219 | u64 dest_vaddr; | ||
220 | u64 dest_size; | ||
221 | |||
222 | u32 obj_ids[MAX_CDE_OBJ_IDS]; | ||
223 | int num_obj_ids; | ||
224 | |||
225 | u64 backing_store_vaddr; | ||
226 | |||
227 | struct nvhost_gpfifo *init_cmd; | ||
228 | int init_cmd_num_entries; | ||
229 | |||
230 | struct nvhost_gpfifo *convert_cmd; | ||
231 | int convert_cmd_num_entries; | ||
232 | |||
233 | struct kobj_attribute attr; | ||
234 | }; | ||
235 | |||
236 | struct gk20a_cde_app { | ||
237 | bool initialised; | ||
238 | struct mutex mutex; | ||
239 | struct vm_gk20a *vm; | ||
240 | |||
241 | struct gk20a_cde_ctx cde_ctx[1]; | ||
242 | int cde_ctx_ptr; | ||
243 | }; | ||
244 | |||
245 | int gk20a_cde_destroy(struct gk20a *g); | ||
246 | int gk20a_init_cde_support(struct gk20a *g); | ||
247 | int gk20a_cde_reload(struct gk20a *g); | ||
248 | int gk20a_cde_convert(struct gk20a *g, u32 src_fd, u32 dst_fd, | ||
249 | s32 dst_kind, u64 dst_word_offset, | ||
250 | u32 dst_size, struct nvhost_fence *fence, | ||
251 | u32 __flags, struct gk20a_cde_param *params, | ||
252 | int num_params, struct gk20a_fence **fence_out); | ||
253 | |||
254 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index fa6e0cce..2975798f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -976,6 +976,7 @@ static int gk20a_pm_finalize_poweron(struct device *dev) | |||
976 | goto done; | 976 | goto done; |
977 | } | 977 | } |
978 | 978 | ||
979 | |||
979 | gk20a_channel_resume(g); | 980 | gk20a_channel_resume(g); |
980 | set_user_nice(current, nice_value); | 981 | set_user_nice(current, nice_value); |
981 | 982 | ||
@@ -983,6 +984,8 @@ static int gk20a_pm_finalize_poweron(struct device *dev) | |||
983 | 984 | ||
984 | trace_gk20a_finalize_poweron_done(dev_name(dev)); | 985 | trace_gk20a_finalize_poweron_done(dev_name(dev)); |
985 | 986 | ||
987 | if (IS_ENABLED(CONFIG_GK20A_CDE)) | ||
988 | gk20a_init_cde_support(g); | ||
986 | done: | 989 | done: |
987 | return err; | 990 | return err; |
988 | } | 991 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 05ed9270..b2ecade5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -48,6 +48,7 @@ struct acr_gm20b; | |||
48 | #include "therm_gk20a.h" | 48 | #include "therm_gk20a.h" |
49 | #include "platform_gk20a.h" | 49 | #include "platform_gk20a.h" |
50 | #include "gm20b/acr_gm20b.h" | 50 | #include "gm20b/acr_gm20b.h" |
51 | #include "cde_gk20a.h" | ||
51 | 52 | ||
52 | extern struct platform_device tegra_gk20a_device; | 53 | extern struct platform_device tegra_gk20a_device; |
53 | 54 | ||
@@ -356,6 +357,8 @@ struct gk20a { | |||
356 | struct gk20a_scale_profile *scale_profile; | 357 | struct gk20a_scale_profile *scale_profile; |
357 | 358 | ||
358 | struct device_dma_parameters dma_parms; | 359 | struct device_dma_parameters dma_parms; |
360 | |||
361 | struct gk20a_cde_app cde_app; | ||
359 | }; | 362 | }; |
360 | 363 | ||
361 | static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) | 364 | static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) |
@@ -422,6 +425,7 @@ enum gk20a_dbg_categories { | |||
422 | gpu_dbg_clk = BIT(7), /* gk20a clk */ | 425 | gpu_dbg_clk = BIT(7), /* gk20a clk */ |
423 | gpu_dbg_map = BIT(8), /* mem mappings */ | 426 | gpu_dbg_map = BIT(8), /* mem mappings */ |
424 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ | 427 | gpu_dbg_gpu_dbg = BIT(9), /* gpu debugger/profiler */ |
428 | gpu_dbg_cde = BIT(10), /* cde info messages */ | ||
425 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | 429 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ |
426 | }; | 430 | }; |
427 | 431 | ||