diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 578 |
1 files changed, 578 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c new file mode 100644 index 00000000..99c518b5 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -0,0 +1,578 @@ | |||
1 | /* | ||
2 | * GK20A Graphics Copy Engine (gr host) | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <nvgpu/kmem.h> | ||
26 | #include <nvgpu/dma.h> | ||
27 | |||
28 | #include "gk20a.h" | ||
29 | |||
30 | #include <nvgpu/log.h> | ||
31 | #include <nvgpu/enabled.h> | ||
32 | |||
33 | #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h> | ||
34 | #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> | ||
35 | #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h> | ||
36 | #include <nvgpu/hw/gk20a/hw_ram_gk20a.h> | ||
37 | #include <nvgpu/hw/gk20a/hw_top_gk20a.h> | ||
38 | #include <nvgpu/hw/gk20a/hw_mc_gk20a.h> | ||
39 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
40 | #include <nvgpu/barrier.h> | ||
41 | |||
42 | /* | ||
43 | * Copy engine defines line size in pixels | ||
44 | */ | ||
45 | #define MAX_CE_SHIFT 31 /* 4Gpixels -1 */ | ||
46 | #define MAX_CE_MASK ((u32) (~(~0 << MAX_CE_SHIFT))) | ||
47 | #define MAX_CE_ALIGN(a) (a & MAX_CE_MASK) | ||
48 | |||
49 | |||
50 | static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) | ||
51 | { | ||
52 | gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n"); | ||
53 | |||
54 | return ce2_intr_status_nonblockpipe_pending_f(); | ||
55 | } | ||
56 | |||
57 | static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr) | ||
58 | { | ||
59 | gk20a_dbg(gpu_dbg_intr, "ce2 blocking pipe interrupt\n"); | ||
60 | |||
61 | return ce2_intr_status_blockpipe_pending_f(); | ||
62 | } | ||
63 | |||
64 | static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr) | ||
65 | { | ||
66 | gk20a_dbg(gpu_dbg_intr, "ce2 launch error interrupt\n"); | ||
67 | |||
68 | return ce2_intr_status_launcherr_pending_f(); | ||
69 | } | ||
70 | |||
71 | void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base) | ||
72 | { | ||
73 | u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); | ||
74 | u32 clear_intr = 0; | ||
75 | |||
76 | gk20a_dbg(gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr); | ||
77 | |||
78 | /* clear blocking interrupts: they exibit broken behavior */ | ||
79 | if (ce2_intr & ce2_intr_status_blockpipe_pending_f()) | ||
80 | clear_intr |= ce2_blockpipe_isr(g, ce2_intr); | ||
81 | |||
82 | if (ce2_intr & ce2_intr_status_launcherr_pending_f()) | ||
83 | clear_intr |= ce2_launcherr_isr(g, ce2_intr); | ||
84 | |||
85 | gk20a_writel(g, ce2_intr_status_r(), clear_intr); | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | int gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) | ||
90 | { | ||
91 | int ops = 0; | ||
92 | u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); | ||
93 | |||
94 | gk20a_dbg(gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr); | ||
95 | |||
96 | if (ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) { | ||
97 | gk20a_writel(g, ce2_intr_status_r(), | ||
98 | ce2_nonblockpipe_isr(g, ce2_intr)); | ||
99 | ops |= (gk20a_nonstall_ops_wakeup_semaphore | | ||
100 | gk20a_nonstall_ops_post_events); | ||
101 | } | ||
102 | return ops; | ||
103 | } | ||
104 | |||
105 | /* static CE app api */ | ||
106 | static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) | ||
107 | { | ||
108 | u32 cmd_buf_index; | ||
109 | u32 cmd_buf_read_offset; | ||
110 | u32 fence_index; | ||
111 | u32 *cmd_buf_cpu_va; | ||
112 | |||
113 | for (cmd_buf_index = 0; | ||
114 | cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; | ||
115 | cmd_buf_index++) { | ||
116 | cmd_buf_read_offset = (cmd_buf_index * | ||
117 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
118 | |||
119 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
120 | fence_index = (cmd_buf_read_offset + | ||
121 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
122 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
123 | |||
124 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
125 | |||
126 | /* 0 is treated as invalid pre-sync */ | ||
127 | if (cmd_buf_cpu_va[fence_index]) { | ||
128 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
129 | |||
130 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
131 | (void *)(cmd_buf_cpu_va + fence_index), | ||
132 | sizeof(struct gk20a_fence *)); | ||
133 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
134 | /* Reset the stored last pre-sync */ | ||
135 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
136 | 0, | ||
137 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
138 | } | ||
139 | } | ||
140 | } | ||
141 | |||
142 | /* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */ | ||
143 | static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) | ||
144 | { | ||
145 | struct nvgpu_list_node *list = &ce_ctx->list; | ||
146 | |||
147 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED; | ||
148 | |||
149 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | ||
150 | |||
151 | if (ce_ctx->cmd_buf_mem.cpu_va) { | ||
152 | gk20a_ce_free_command_buffer_stored_fence(ce_ctx); | ||
153 | nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); | ||
154 | } | ||
155 | |||
156 | /* unbind tsg */ | ||
157 | if (ce_ctx->tsg && ce_ctx->ch) | ||
158 | gk20a_tsg_unbind_channel(ce_ctx->ch); | ||
159 | |||
160 | /* free the channel */ | ||
161 | if (ce_ctx->ch) | ||
162 | gk20a_channel_close(ce_ctx->ch); | ||
163 | |||
164 | /* housekeeping on app */ | ||
165 | if (list->prev && list->next) | ||
166 | nvgpu_list_del(list); | ||
167 | |||
168 | nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); | ||
169 | nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex); | ||
170 | |||
171 | nvgpu_kfree(ce_ctx->g, ce_ctx); | ||
172 | } | ||
173 | |||
174 | static inline unsigned int gk20a_ce_get_method_size(int request_operation, | ||
175 | u64 size) | ||
176 | { | ||
177 | /* failure size */ | ||
178 | unsigned int methodsize = UINT_MAX; | ||
179 | unsigned int iterations = 0; | ||
180 | u32 shift; | ||
181 | u64 chunk = size; | ||
182 | u32 height, width; | ||
183 | |||
184 | while (chunk) { | ||
185 | iterations++; | ||
186 | |||
187 | shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) : | ||
188 | MAX_CE_SHIFT; | ||
189 | width = chunk >> shift; | ||
190 | height = 1 << shift; | ||
191 | width = MAX_CE_ALIGN(width); | ||
192 | |||
193 | chunk -= (u64) height * width; | ||
194 | } | ||
195 | |||
196 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) | ||
197 | methodsize = (2 + (16 * iterations)) * sizeof(u32); | ||
198 | else if (request_operation & NVGPU_CE_MEMSET) | ||
199 | methodsize = (2 + (15 * iterations)) * sizeof(u32); | ||
200 | |||
201 | return methodsize; | ||
202 | } | ||
203 | |||
204 | int gk20a_ce_prepare_submit(u64 src_buf, | ||
205 | u64 dst_buf, | ||
206 | u64 size, | ||
207 | u32 *cmd_buf_cpu_va, | ||
208 | u32 max_cmd_buf_size, | ||
209 | unsigned int payload, | ||
210 | int launch_flags, | ||
211 | int request_operation, | ||
212 | u32 dma_copy_class, | ||
213 | struct gk20a_fence *gk20a_fence_in) | ||
214 | { | ||
215 | u32 launch = 0; | ||
216 | u32 methodSize = 0; | ||
217 | u64 offset = 0; | ||
218 | u64 chunk_size = 0; | ||
219 | u64 chunk = size; | ||
220 | |||
221 | /* failure case handling */ | ||
222 | if ((gk20a_ce_get_method_size(request_operation, size) > | ||
223 | max_cmd_buf_size) || (!size) || | ||
224 | (request_operation > NVGPU_CE_MEMSET)) | ||
225 | return 0; | ||
226 | |||
227 | /* set the channel object */ | ||
228 | cmd_buf_cpu_va[methodSize++] = 0x20018000; | ||
229 | cmd_buf_cpu_va[methodSize++] = dma_copy_class; | ||
230 | |||
231 | /* | ||
232 | * The purpose clear the memory in 2D rectangles. We get the ffs to | ||
233 | * determine the number of lines to copy. The only constraint is that | ||
234 | * maximum number of pixels per line is 4Gpix - 1, which is awkward for | ||
235 | * calculation, so we settle to 2Gpix per line to make calculatione | ||
236 | * more agreable | ||
237 | */ | ||
238 | |||
239 | /* The copy engine in 2D mode can have (2^32 - 1) x (2^32 - 1) pixels in | ||
240 | * a single submit, we are going to try to clear a range of up to 2Gpix | ||
241 | * multiple lines. Because we want to copy byte aligned we will be | ||
242 | * setting 1 byte pixels */ | ||
243 | |||
244 | /* | ||
245 | * per iteration | ||
246 | * <------------------------- 40 bits ------------------------------> | ||
247 | * 1 <------ ffs -------> | ||
248 | * <-----------up to 30 bits-----------> | ||
249 | */ | ||
250 | while (chunk) { | ||
251 | u32 width, height, shift; | ||
252 | |||
253 | /* | ||
254 | * We will be aligning to bytes, making the maximum number of | ||
255 | * pix per line 2Gb | ||
256 | */ | ||
257 | |||
258 | shift = MAX_CE_ALIGN(chunk) ? __ffs(MAX_CE_ALIGN(chunk)) : | ||
259 | MAX_CE_SHIFT; | ||
260 | height = chunk >> shift; | ||
261 | width = 1 << shift; | ||
262 | height = MAX_CE_ALIGN(height); | ||
263 | |||
264 | chunk_size = (u64) height * width; | ||
265 | |||
266 | /* reset launch flag */ | ||
267 | launch = 0; | ||
268 | |||
269 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) { | ||
270 | /* setup the source */ | ||
271 | cmd_buf_cpu_va[methodSize++] = 0x20028100; | ||
272 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf + | ||
273 | offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
274 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf + | ||
275 | offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
276 | |||
277 | cmd_buf_cpu_va[methodSize++] = 0x20018098; | ||
278 | if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) | ||
279 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
280 | else if (launch_flags & | ||
281 | NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) | ||
282 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
283 | else | ||
284 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
285 | |||
286 | launch |= 0x00001000; | ||
287 | } else if (request_operation & NVGPU_CE_MEMSET) { | ||
288 | /* Remap from component A on 1 byte wide pixels */ | ||
289 | cmd_buf_cpu_va[methodSize++] = 0x200181c2; | ||
290 | cmd_buf_cpu_va[methodSize++] = 0x00000004; | ||
291 | |||
292 | cmd_buf_cpu_va[methodSize++] = 0x200181c0; | ||
293 | cmd_buf_cpu_va[methodSize++] = payload; | ||
294 | |||
295 | launch |= 0x00000400; | ||
296 | } else { | ||
297 | /* Illegal size */ | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | /* setup the destination/output */ | ||
302 | cmd_buf_cpu_va[methodSize++] = 0x20068102; | ||
303 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf + | ||
304 | offset) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
305 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf + | ||
306 | offset) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
307 | /* Pitch in/out */ | ||
308 | cmd_buf_cpu_va[methodSize++] = width; | ||
309 | cmd_buf_cpu_va[methodSize++] = width; | ||
310 | /* width and line count */ | ||
311 | cmd_buf_cpu_va[methodSize++] = width; | ||
312 | cmd_buf_cpu_va[methodSize++] = height; | ||
313 | |||
314 | cmd_buf_cpu_va[methodSize++] = 0x20018099; | ||
315 | if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) | ||
316 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
317 | else if (launch_flags & | ||
318 | NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) | ||
319 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
320 | else | ||
321 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
322 | |||
323 | launch |= 0x00002005; | ||
324 | |||
325 | if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) | ||
326 | launch |= 0x00000000; | ||
327 | else | ||
328 | launch |= 0x00000080; | ||
329 | |||
330 | if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) | ||
331 | launch |= 0x00000000; | ||
332 | else | ||
333 | launch |= 0x00000100; | ||
334 | |||
335 | cmd_buf_cpu_va[methodSize++] = 0x200180c0; | ||
336 | cmd_buf_cpu_va[methodSize++] = launch; | ||
337 | offset += chunk_size; | ||
338 | chunk -= chunk_size; | ||
339 | } | ||
340 | |||
341 | return methodSize; | ||
342 | } | ||
343 | |||
344 | /* global CE app related apis */ | ||
345 | int gk20a_init_ce_support(struct gk20a *g) | ||
346 | { | ||
347 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
348 | int err; | ||
349 | u32 ce_reset_mask; | ||
350 | |||
351 | ce_reset_mask = gk20a_fifo_get_all_ce_engine_reset_mask(g); | ||
352 | |||
353 | g->ops.mc.reset(g, ce_reset_mask); | ||
354 | |||
355 | if (g->ops.clock_gating.slcg_ce2_load_gating_prod) | ||
356 | g->ops.clock_gating.slcg_ce2_load_gating_prod(g, | ||
357 | g->slcg_enabled); | ||
358 | if (g->ops.clock_gating.blcg_ce_load_gating_prod) | ||
359 | g->ops.clock_gating.blcg_ce_load_gating_prod(g, | ||
360 | g->blcg_enabled); | ||
361 | |||
362 | if (ce_app->initialised) { | ||
363 | /* assume this happen during poweron/poweroff GPU sequence */ | ||
364 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | gk20a_dbg(gpu_dbg_fn, "ce: init"); | ||
369 | |||
370 | err = nvgpu_mutex_init(&ce_app->app_mutex); | ||
371 | if (err) | ||
372 | return err; | ||
373 | |||
374 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
375 | |||
376 | nvgpu_init_list_node(&ce_app->allocated_contexts); | ||
377 | ce_app->ctx_count = 0; | ||
378 | ce_app->next_ctx_id = 0; | ||
379 | ce_app->initialised = true; | ||
380 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
381 | |||
382 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
383 | gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished"); | ||
384 | |||
385 | return 0; | ||
386 | } | ||
387 | |||
388 | void gk20a_ce_destroy(struct gk20a *g) | ||
389 | { | ||
390 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
391 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
392 | |||
393 | if (!ce_app->initialised) | ||
394 | return; | ||
395 | |||
396 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
397 | ce_app->initialised = false; | ||
398 | |||
399 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
400 | |||
401 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
402 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
403 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
404 | } | ||
405 | |||
406 | nvgpu_init_list_node(&ce_app->allocated_contexts); | ||
407 | ce_app->ctx_count = 0; | ||
408 | ce_app->next_ctx_id = 0; | ||
409 | |||
410 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
411 | |||
412 | nvgpu_mutex_destroy(&ce_app->app_mutex); | ||
413 | } | ||
414 | |||
415 | void gk20a_ce_suspend(struct gk20a *g) | ||
416 | { | ||
417 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
418 | |||
419 | if (!ce_app->initialised) | ||
420 | return; | ||
421 | |||
422 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
423 | |||
424 | return; | ||
425 | } | ||
426 | |||
427 | /* CE app utility functions */ | ||
428 | u32 gk20a_ce_create_context(struct gk20a *g, | ||
429 | int runlist_id, | ||
430 | int timeslice, | ||
431 | int runlist_level) | ||
432 | { | ||
433 | struct gk20a_gpu_ctx *ce_ctx; | ||
434 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
435 | u32 ctx_id = ~0; | ||
436 | int err = 0; | ||
437 | |||
438 | if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) | ||
439 | return ctx_id; | ||
440 | |||
441 | ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx)); | ||
442 | if (!ce_ctx) | ||
443 | return ctx_id; | ||
444 | |||
445 | err = nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex); | ||
446 | if (err) { | ||
447 | nvgpu_kfree(g, ce_ctx); | ||
448 | return ctx_id; | ||
449 | } | ||
450 | |||
451 | ce_ctx->g = g; | ||
452 | |||
453 | ce_ctx->cmd_buf_read_queue_offset = 0; | ||
454 | ce_ctx->cmd_buf_end_queue_offset = | ||
455 | (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF); | ||
456 | |||
457 | ce_ctx->vm = g->mm.ce.vm; | ||
458 | |||
459 | if (nvgpu_is_enabled(g, NVGPU_MM_CE_TSG_REQUIRED)) { | ||
460 | /* allocate a tsg if needed */ | ||
461 | ce_ctx->tsg = gk20a_tsg_open(g); | ||
462 | |||
463 | if (!ce_ctx->tsg) { | ||
464 | nvgpu_err(g, "ce: gk20a tsg not available"); | ||
465 | goto end; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* always kernel client needs privileged channel */ | ||
470 | ce_ctx->ch = gk20a_open_new_channel(g, runlist_id, true); | ||
471 | if (!ce_ctx->ch) { | ||
472 | nvgpu_err(g, "ce: gk20a channel not available"); | ||
473 | goto end; | ||
474 | } | ||
475 | ce_ctx->ch->wdt_enabled = false; | ||
476 | |||
477 | /* bind the channel to the vm */ | ||
478 | err = __gk20a_vm_bind_channel(g->mm.ce.vm, ce_ctx->ch); | ||
479 | if (err) { | ||
480 | nvgpu_err(g, "ce: could not bind vm"); | ||
481 | goto end; | ||
482 | } | ||
483 | |||
484 | if (nvgpu_is_enabled(g, NVGPU_MM_CE_TSG_REQUIRED)) { | ||
485 | err = gk20a_tsg_bind_channel(ce_ctx->tsg, ce_ctx->ch); | ||
486 | if (err) { | ||
487 | nvgpu_err(g, "ce: unable to bind to tsg"); | ||
488 | goto end; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | /* allocate gpfifo (1024 should be more than enough) */ | ||
493 | err = gk20a_channel_alloc_gpfifo(ce_ctx->ch, 1024, 0, 0); | ||
494 | if (err) { | ||
495 | nvgpu_err(g, "ce: unable to allocate gpfifo"); | ||
496 | goto end; | ||
497 | } | ||
498 | |||
499 | /* allocate command buffer (4096 should be more than enough) from sysmem*/ | ||
500 | err = nvgpu_dma_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); | ||
501 | if (err) { | ||
502 | nvgpu_err(g, | ||
503 | "ce: could not allocate command buffer for CE context"); | ||
504 | goto end; | ||
505 | } | ||
506 | |||
507 | memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size); | ||
508 | |||
509 | /* -1 means default channel timeslice value */ | ||
510 | if (timeslice != -1) { | ||
511 | err = gk20a_fifo_set_timeslice(ce_ctx->ch, timeslice); | ||
512 | if (err) { | ||
513 | nvgpu_err(g, | ||
514 | "ce: could not set the channel timeslice value for CE context"); | ||
515 | goto end; | ||
516 | } | ||
517 | } | ||
518 | |||
519 | /* -1 means default channel runlist level */ | ||
520 | if (runlist_level != -1) { | ||
521 | err = gk20a_channel_set_runlist_interleave(ce_ctx->ch, runlist_level); | ||
522 | if (err) { | ||
523 | nvgpu_err(g, | ||
524 | "ce: could not set the runlist interleave for CE context"); | ||
525 | goto end; | ||
526 | } | ||
527 | } | ||
528 | |||
529 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
530 | ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id; | ||
531 | nvgpu_list_add(&ce_ctx->list, &ce_app->allocated_contexts); | ||
532 | ++ce_app->next_ctx_id; | ||
533 | ++ce_app->ctx_count; | ||
534 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
535 | |||
536 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED; | ||
537 | |||
538 | end: | ||
539 | if (ctx_id == (u32)~0) { | ||
540 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
541 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
542 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
543 | } | ||
544 | return ctx_id; | ||
545 | |||
546 | } | ||
547 | EXPORT_SYMBOL(gk20a_ce_create_context); | ||
548 | |||
549 | void gk20a_ce_delete_context(struct gk20a *g, | ||
550 | u32 ce_ctx_id) | ||
551 | { | ||
552 | gk20a_ce_delete_context_priv(g, ce_ctx_id); | ||
553 | } | ||
554 | |||
555 | void gk20a_ce_delete_context_priv(struct gk20a *g, | ||
556 | u32 ce_ctx_id) | ||
557 | { | ||
558 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
559 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
560 | |||
561 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
562 | return; | ||
563 | |||
564 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
565 | |||
566 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
567 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
568 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
569 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
570 | --ce_app->ctx_count; | ||
571 | break; | ||
572 | } | ||
573 | } | ||
574 | |||
575 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
576 | return; | ||
577 | } | ||
578 | EXPORT_SYMBOL(gk20a_ce_delete_context); | ||