diff options
Diffstat (limited to 'include/gk20a/ce2_gk20a.c')
-rw-r--r-- | include/gk20a/ce2_gk20a.c | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/include/gk20a/ce2_gk20a.c b/include/gk20a/ce2_gk20a.c new file mode 100644 index 0000000..2a40b08 --- /dev/null +++ b/include/gk20a/ce2_gk20a.c | |||
@@ -0,0 +1,576 @@ | |||
1 | /* | ||
2 | * GK20A Graphics Copy Engine (gr host) | ||
3 | * | ||
4 | * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/dma.h> | ||
27 | #include <nvgpu/os_sched.h> | ||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/enabled.h> | ||
30 | #include <nvgpu/io.h> | ||
31 | #include <nvgpu/utils.h> | ||
32 | #include <nvgpu/channel.h> | ||
33 | #include <nvgpu/power_features/cg.h> | ||
34 | |||
35 | #include "gk20a.h" | ||
36 | #include "gk20a/fence_gk20a.h" | ||
37 | |||
38 | #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h> | ||
39 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
40 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
41 | #include <nvgpu/hw/gk20a/hw_ram_gk20a.h> | ||
42 | #include <nvgpu/hw/gk20a/hw_top_gk20a.h> | ||
43 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
44 | #include <nvgpu/barrier.h> | ||
45 | |||
46 | /* | ||
47 | * Copy engine defines line size in pixels | ||
48 | */ | ||
49 | #define MAX_CE_SHIFT 31 /* 4Gpixels -1 */ | ||
50 | #define MAX_CE_MASK ((u32) (~(~0U << MAX_CE_SHIFT))) | ||
51 | #define MAX_CE_ALIGN(a) (a & MAX_CE_MASK) | ||
52 | |||
53 | |||
54 | static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) | ||
55 | { | ||
56 | nvgpu_log(g, gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n"); | ||
57 | |||
58 | return ce2_intr_status_nonblockpipe_pending_f(); | ||
59 | } | ||
60 | |||
61 | static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr) | ||
62 | { | ||
63 | nvgpu_log(g, gpu_dbg_intr, "ce2 blocking pipe interrupt\n"); | ||
64 | |||
65 | return ce2_intr_status_blockpipe_pending_f(); | ||
66 | } | ||
67 | |||
68 | static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr) | ||
69 | { | ||
70 | nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt\n"); | ||
71 | |||
72 | return ce2_intr_status_launcherr_pending_f(); | ||
73 | } | ||
74 | |||
75 | void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base) | ||
76 | { | ||
77 | u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); | ||
78 | u32 clear_intr = 0; | ||
79 | |||
80 | nvgpu_log(g, gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr); | ||
81 | |||
82 | /* clear blocking interrupts: they exibit broken behavior */ | ||
83 | if (ce2_intr & ce2_intr_status_blockpipe_pending_f()) { | ||
84 | clear_intr |= ce2_blockpipe_isr(g, ce2_intr); | ||
85 | } | ||
86 | |||
87 | if (ce2_intr & ce2_intr_status_launcherr_pending_f()) { | ||
88 | clear_intr |= ce2_launcherr_isr(g, ce2_intr); | ||
89 | } | ||
90 | |||
91 | gk20a_writel(g, ce2_intr_status_r(), clear_intr); | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) | ||
96 | { | ||
97 | u32 ops = 0; | ||
98 | u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); | ||
99 | |||
100 | nvgpu_log(g, gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr); | ||
101 | |||
102 | if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) { | ||
103 | gk20a_writel(g, ce2_intr_status_r(), | ||
104 | ce2_nonblockpipe_isr(g, ce2_intr)); | ||
105 | ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE | | ||
106 | GK20A_NONSTALL_OPS_POST_EVENTS); | ||
107 | } | ||
108 | return ops; | ||
109 | } | ||
110 | |||
111 | /* static CE app api */ | ||
112 | static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx) | ||
113 | { | ||
114 | u32 i; | ||
115 | |||
116 | for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) { | ||
117 | struct gk20a_fence **fence = &ce_ctx->postfences[i]; | ||
118 | if (*fence) { | ||
119 | gk20a_fence_put(*fence); | ||
120 | } | ||
121 | *fence = NULL; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */ | ||
126 | static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) | ||
127 | { | ||
128 | struct nvgpu_list_node *list = &ce_ctx->list; | ||
129 | |||
130 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED; | ||
131 | |||
132 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | ||
133 | |||
134 | if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) { | ||
135 | gk20a_ce_put_fences(ce_ctx); | ||
136 | nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * free the channel | ||
141 | * gk20a_channel_close() will also unbind the channel from TSG | ||
142 | */ | ||
143 | gk20a_channel_close(ce_ctx->ch); | ||
144 | nvgpu_ref_put(&ce_ctx->tsg->refcount, gk20a_tsg_release); | ||
145 | |||
146 | /* housekeeping on app */ | ||
147 | if (list->prev && list->next) { | ||
148 | nvgpu_list_del(list); | ||
149 | } | ||
150 | |||
151 | nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); | ||
152 | nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex); | ||
153 | |||
154 | nvgpu_kfree(ce_ctx->g, ce_ctx); | ||
155 | } | ||
156 | |||
157 | static inline unsigned int gk20a_ce_get_method_size(int request_operation, | ||
158 | u64 size) | ||
159 | { | ||
160 | /* failure size */ | ||
161 | unsigned int methodsize = UINT_MAX; | ||
162 | unsigned int iterations = 0; | ||
163 | u32 shift; | ||
164 | u64 chunk = size; | ||
165 | u32 height, width; | ||
166 | |||
167 | while (chunk) { | ||
168 | iterations++; | ||
169 | |||
170 | shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) : | ||
171 | MAX_CE_SHIFT; | ||
172 | width = chunk >> shift; | ||
173 | height = 1 << shift; | ||
174 | width = MAX_CE_ALIGN(width); | ||
175 | |||
176 | chunk -= (u64) height * width; | ||
177 | } | ||
178 | |||
179 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) { | ||
180 | methodsize = (2 + (16 * iterations)) * sizeof(u32); | ||
181 | } else if (request_operation & NVGPU_CE_MEMSET) { | ||
182 | methodsize = (2 + (15 * iterations)) * sizeof(u32); | ||
183 | } | ||
184 | |||
185 | return methodsize; | ||
186 | } | ||
187 | |||
188 | int gk20a_ce_prepare_submit(u64 src_buf, | ||
189 | u64 dst_buf, | ||
190 | u64 size, | ||
191 | u32 *cmd_buf_cpu_va, | ||
192 | u32 max_cmd_buf_size, | ||
193 | unsigned int payload, | ||
194 | int launch_flags, | ||
195 | int request_operation, | ||
196 | u32 dma_copy_class) | ||
197 | { | ||
198 | u32 launch = 0; | ||
199 | u32 methodSize = 0; | ||
200 | u64 offset = 0; | ||
201 | u64 chunk_size = 0; | ||
202 | u64 chunk = size; | ||
203 | |||
204 | /* failure case handling */ | ||
205 | if ((gk20a_ce_get_method_size(request_operation, size) > | ||
206 | max_cmd_buf_size) || (!size) || | ||
207 | (request_operation > NVGPU_CE_MEMSET)) { | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | /* set the channel object */ | ||
212 | cmd_buf_cpu_va[methodSize++] = 0x20018000; | ||
213 | cmd_buf_cpu_va[methodSize++] = dma_copy_class; | ||
214 | |||
215 | /* | ||
216 | * The purpose clear the memory in 2D rectangles. We get the ffs to | ||
217 | * determine the number of lines to copy. The only constraint is that | ||
218 | * maximum number of pixels per line is 4Gpix - 1, which is awkward for | ||
219 | * calculation, so we settle to 2Gpix per line to make calculatione | ||
220 | * more agreable | ||
221 | */ | ||
222 | |||
223 | /* The copy engine in 2D mode can have (2^32 - 1) x (2^32 - 1) pixels in | ||
224 | * a single submit, we are going to try to clear a range of up to 2Gpix | ||
225 | * multiple lines. Because we want to copy byte aligned we will be | ||
226 | * setting 1 byte pixels */ | ||
227 | |||
228 | /* | ||
229 | * per iteration | ||
230 | * <------------------------- 40 bits ------------------------------> | ||
231 | * 1 <------ ffs -------> | ||
232 | * <-----------up to 30 bits-----------> | ||
233 | */ | ||
234 | while (chunk) { | ||
235 | u32 width, height, shift; | ||
236 | |||
237 | /* | ||
238 | * We will be aligning to bytes, making the maximum number of | ||
239 | * pix per line 2Gb | ||
240 | */ | ||
241 | |||
242 | shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) : | ||
243 | MAX_CE_SHIFT; | ||
244 | height = chunk >> shift; | ||
245 | width = 1 << shift; | ||
246 | height = MAX_CE_ALIGN(height); | ||
247 | |||
248 | chunk_size = (u64) height * width; | ||
249 | |||
250 | /* reset launch flag */ | ||
251 | launch = 0; | ||
252 | |||
253 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) { | ||
254 | /* setup the source */ | ||
255 | cmd_buf_cpu_va[methodSize++] = 0x20028100; | ||
256 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf + | ||
257 | offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
258 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf + | ||
259 | offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
260 | |||
261 | cmd_buf_cpu_va[methodSize++] = 0x20018098; | ||
262 | if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) { | ||
263 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
264 | } else if (launch_flags & | ||
265 | NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) { | ||
266 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
267 | } else { | ||
268 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
269 | } | ||
270 | |||
271 | launch |= 0x00001000; | ||
272 | } else if (request_operation & NVGPU_CE_MEMSET) { | ||
273 | /* Remap from component A on 1 byte wide pixels */ | ||
274 | cmd_buf_cpu_va[methodSize++] = 0x200181c2; | ||
275 | cmd_buf_cpu_va[methodSize++] = 0x00000004; | ||
276 | |||
277 | cmd_buf_cpu_va[methodSize++] = 0x200181c0; | ||
278 | cmd_buf_cpu_va[methodSize++] = payload; | ||
279 | |||
280 | launch |= 0x00000400; | ||
281 | } else { | ||
282 | /* Illegal size */ | ||
283 | return 0; | ||
284 | } | ||
285 | |||
286 | /* setup the destination/output */ | ||
287 | cmd_buf_cpu_va[methodSize++] = 0x20068102; | ||
288 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf + | ||
289 | offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
290 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf + | ||
291 | offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
292 | /* Pitch in/out */ | ||
293 | cmd_buf_cpu_va[methodSize++] = width; | ||
294 | cmd_buf_cpu_va[methodSize++] = width; | ||
295 | /* width and line count */ | ||
296 | cmd_buf_cpu_va[methodSize++] = width; | ||
297 | cmd_buf_cpu_va[methodSize++] = height; | ||
298 | |||
299 | cmd_buf_cpu_va[methodSize++] = 0x20018099; | ||
300 | if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) { | ||
301 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
302 | } else if (launch_flags & | ||
303 | NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) { | ||
304 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
305 | } else { | ||
306 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
307 | } | ||
308 | |||
309 | launch |= 0x00002005; | ||
310 | |||
311 | if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) { | ||
312 | launch |= 0x00000000; | ||
313 | } else { | ||
314 | launch |= 0x00000080; | ||
315 | } | ||
316 | |||
317 | if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) { | ||
318 | launch |= 0x00000000; | ||
319 | } else { | ||
320 | launch |= 0x00000100; | ||
321 | } | ||
322 | |||
323 | cmd_buf_cpu_va[methodSize++] = 0x200180c0; | ||
324 | cmd_buf_cpu_va[methodSize++] = launch; | ||
325 | offset += chunk_size; | ||
326 | chunk -= chunk_size; | ||
327 | } | ||
328 | |||
329 | return methodSize; | ||
330 | } | ||
331 | |||
332 | /* global CE app related apis */ | ||
333 | int gk20a_init_ce_support(struct gk20a *g) | ||
334 | { | ||
335 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
336 | int err; | ||
337 | u32 ce_reset_mask; | ||
338 | |||
339 | ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g); | ||
340 | |||
341 | g->ops.mc.reset(g, ce_reset_mask); | ||
342 | |||
343 | nvgpu_cg_slcg_ce2_load_enable(g); | ||
344 | |||
345 | nvgpu_cg_blcg_ce_load_enable(g); | ||
346 | |||
347 | if (ce_app->initialised) { | ||
348 | /* assume this happen during poweron/poweroff GPU sequence */ | ||
349 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | nvgpu_log(g, gpu_dbg_fn, "ce: init"); | ||
354 | |||
355 | err = nvgpu_mutex_init(&ce_app->app_mutex); | ||
356 | if (err) { | ||
357 | return err; | ||
358 | } | ||
359 | |||
360 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
361 | |||
362 | nvgpu_init_list_node(&ce_app->allocated_contexts); | ||
363 | ce_app->ctx_count = 0; | ||
364 | ce_app->next_ctx_id = 0; | ||
365 | ce_app->initialised = true; | ||
366 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
367 | |||
368 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
369 | |||
370 | if (g->ops.ce2.init_prod_values != NULL) { | ||
371 | g->ops.ce2.init_prod_values(g); | ||
372 | } | ||
373 | |||
374 | nvgpu_log(g, gpu_dbg_cde_ctx, "ce: init finished"); | ||
375 | |||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | void gk20a_ce_destroy(struct gk20a *g) | ||
380 | { | ||
381 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
382 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
383 | |||
384 | if (!ce_app->initialised) { | ||
385 | return; | ||
386 | } | ||
387 | |||
388 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
389 | ce_app->initialised = false; | ||
390 | |||
391 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
392 | |||
393 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
394 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
395 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
396 | } | ||
397 | |||
398 | nvgpu_init_list_node(&ce_app->allocated_contexts); | ||
399 | ce_app->ctx_count = 0; | ||
400 | ce_app->next_ctx_id = 0; | ||
401 | |||
402 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
403 | |||
404 | nvgpu_mutex_destroy(&ce_app->app_mutex); | ||
405 | } | ||
406 | |||
407 | void gk20a_ce_suspend(struct gk20a *g) | ||
408 | { | ||
409 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
410 | |||
411 | if (!ce_app->initialised) { | ||
412 | return; | ||
413 | } | ||
414 | |||
415 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
416 | |||
417 | return; | ||
418 | } | ||
419 | |||
420 | /* CE app utility functions */ | ||
421 | u32 gk20a_ce_create_context(struct gk20a *g, | ||
422 | int runlist_id, | ||
423 | int timeslice, | ||
424 | int runlist_level) | ||
425 | { | ||
426 | struct gk20a_gpu_ctx *ce_ctx; | ||
427 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
428 | struct nvgpu_setup_bind_args setup_bind_args; | ||
429 | u32 ctx_id = ~0; | ||
430 | int err = 0; | ||
431 | |||
432 | if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { | ||
433 | return ctx_id; | ||
434 | } | ||
435 | |||
436 | ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx)); | ||
437 | if (!ce_ctx) { | ||
438 | return ctx_id; | ||
439 | } | ||
440 | |||
441 | err = nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex); | ||
442 | if (err) { | ||
443 | nvgpu_kfree(g, ce_ctx); | ||
444 | return ctx_id; | ||
445 | } | ||
446 | |||
447 | ce_ctx->g = g; | ||
448 | |||
449 | ce_ctx->cmd_buf_read_queue_offset = 0; | ||
450 | |||
451 | ce_ctx->vm = g->mm.ce.vm; | ||
452 | |||
453 | /* allocate a tsg if needed */ | ||
454 | ce_ctx->tsg = gk20a_tsg_open(g, nvgpu_current_pid(g)); | ||
455 | if (!ce_ctx->tsg) { | ||
456 | nvgpu_err(g, "ce: gk20a tsg not available"); | ||
457 | err = -ENOMEM; | ||
458 | goto end; | ||
459 | } | ||
460 | |||
461 | /* always kernel client needs privileged channel */ | ||
462 | ce_ctx->ch = gk20a_open_new_channel(g, runlist_id, true, | ||
463 | nvgpu_current_pid(g), nvgpu_current_tid(g)); | ||
464 | if (!ce_ctx->ch) { | ||
465 | nvgpu_err(g, "ce: gk20a channel not available"); | ||
466 | err = -ENOMEM; | ||
467 | goto end; | ||
468 | } | ||
469 | ce_ctx->ch->timeout.enabled = false; | ||
470 | |||
471 | /* bind the channel to the vm */ | ||
472 | err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch); | ||
473 | if (err) { | ||
474 | nvgpu_err(g, "ce: could not bind vm"); | ||
475 | goto end; | ||
476 | } | ||
477 | |||
478 | err = gk20a_tsg_bind_channel(ce_ctx->tsg, ce_ctx->ch); | ||
479 | if (err) { | ||
480 | nvgpu_err(g, "ce: unable to bind to tsg"); | ||
481 | goto end; | ||
482 | } | ||
483 | |||
484 | setup_bind_args.num_gpfifo_entries = 1024; | ||
485 | setup_bind_args.num_inflight_jobs = 0; | ||
486 | setup_bind_args.flags = 0; | ||
487 | /* allocate gpfifo (1024 should be more than enough) */ | ||
488 | err = nvgpu_channel_setup_bind(ce_ctx->ch, &setup_bind_args); | ||
489 | if (err) { | ||
490 | nvgpu_err(g, "ce: unable to setup and bind channel"); | ||
491 | goto end; | ||
492 | } | ||
493 | |||
494 | /* allocate command buffer from sysmem */ | ||
495 | err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, | ||
496 | NVGPU_CE_MAX_INFLIGHT_JOBS * | ||
497 | NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, | ||
498 | &ce_ctx->cmd_buf_mem); | ||
499 | if (err) { | ||
500 | nvgpu_err(g, | ||
501 | "ce: could not allocate command buffer for CE context"); | ||
502 | goto end; | ||
503 | } | ||
504 | |||
505 | memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size); | ||
506 | |||
507 | /* -1 means default channel timeslice value */ | ||
508 | if (timeslice != -1) { | ||
509 | err = gk20a_fifo_tsg_set_timeslice(ce_ctx->tsg, timeslice); | ||
510 | if (err) { | ||
511 | nvgpu_err(g, | ||
512 | "ce: could not set the channel timeslice value for CE context"); | ||
513 | goto end; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /* -1 means default channel runlist level */ | ||
518 | if (runlist_level != -1) { | ||
519 | err = gk20a_tsg_set_runlist_interleave(ce_ctx->tsg, | ||
520 | runlist_level); | ||
521 | if (err) { | ||
522 | nvgpu_err(g, | ||
523 | "ce: could not set the runlist interleave for CE context"); | ||
524 | goto end; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
529 | ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id; | ||
530 | nvgpu_list_add(&ce_ctx->list, &ce_app->allocated_contexts); | ||
531 | ++ce_app->next_ctx_id; | ||
532 | ++ce_app->ctx_count; | ||
533 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
534 | |||
535 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED; | ||
536 | |||
537 | end: | ||
538 | if (ctx_id == (u32)~0) { | ||
539 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
540 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
541 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
542 | } | ||
543 | return ctx_id; | ||
544 | |||
545 | } | ||
546 | |||
547 | void gk20a_ce_delete_context(struct gk20a *g, | ||
548 | u32 ce_ctx_id) | ||
549 | { | ||
550 | gk20a_ce_delete_context_priv(g, ce_ctx_id); | ||
551 | } | ||
552 | |||
553 | void gk20a_ce_delete_context_priv(struct gk20a *g, | ||
554 | u32 ce_ctx_id) | ||
555 | { | ||
556 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
557 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
558 | |||
559 | if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { | ||
560 | return; | ||
561 | } | ||
562 | |||
563 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
564 | |||
565 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
566 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
567 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
568 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
569 | --ce_app->ctx_count; | ||
570 | break; | ||
571 | } | ||
572 | } | ||
573 | |||
574 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
575 | return; | ||
576 | } | ||