diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 617 |
1 files changed, 617 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 96d38b11..e2f2d9e9 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <trace/events/gk20a.h> | 24 | #include <trace/events/gk20a.h> |
25 | #include <linux/dma-mapping.h> | 25 | #include <linux/dma-mapping.h> |
26 | #include <linux/nvhost.h> | 26 | #include <linux/nvhost.h> |
27 | #include <linux/debugfs.h> | ||
27 | 28 | ||
28 | #include "gk20a.h" | 29 | #include "gk20a.h" |
29 | #include "debug_gk20a.h" | 30 | #include "debug_gk20a.h" |
@@ -96,3 +97,619 @@ void gk20a_init_ce2(struct gpu_ops *gops) | |||
96 | gops->ce2.isr_stall = gk20a_ce2_isr; | 97 | gops->ce2.isr_stall = gk20a_ce2_isr; |
97 | gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr; | 98 | gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr; |
98 | } | 99 | } |
100 | |||
101 | /* static CE app api */ | ||
102 | static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event) | ||
103 | { | ||
104 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
105 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
106 | |||
107 | if (!ce_app->initialised) | ||
108 | return; | ||
109 | |||
110 | mutex_lock(&ce_app->app_mutex); | ||
111 | |||
112 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
113 | &ce_app->allocated_contexts, list) { | ||
114 | if (ce_ctx->user_event_callback) { | ||
115 | ce_ctx->user_event_callback(ce_ctx->ctx_id, | ||
116 | event); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | mutex_unlock(&ce_app->app_mutex); | ||
121 | } | ||
122 | |||
123 | static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data) | ||
124 | { | ||
125 | struct gk20a_gpu_ctx *ce_ctx = data; | ||
126 | bool channel_idle; | ||
127 | u32 event; | ||
128 | |||
129 | mutex_lock(&ch->jobs_lock); | ||
130 | channel_idle = list_empty(&ch->jobs); | ||
131 | mutex_unlock(&ch->jobs_lock); | ||
132 | |||
133 | if (!channel_idle) | ||
134 | return; | ||
135 | |||
136 | gk20a_dbg(gpu_dbg_fn, "ce: finished %p", ce_ctx); | ||
137 | |||
138 | if (ch->has_timedout) | ||
139 | event = NVGPU_CE_CONTEXT_JOB_TIMEDOUT; | ||
140 | else | ||
141 | event = NVGPU_CE_CONTEXT_JOB_COMPLETED; | ||
142 | |||
143 | if (ce_ctx->user_event_callback) | ||
144 | ce_ctx->user_event_callback(ce_ctx->ctx_id, | ||
145 | event); | ||
146 | |||
147 | ++ce_ctx->completed_seq_number; | ||
148 | } | ||
149 | |||
150 | static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx) | ||
151 | { | ||
152 | u32 cmd_buf_index; | ||
153 | u32 cmd_buf_read_offset; | ||
154 | u32 fence_index; | ||
155 | u32 *cmd_buf_cpu_va; | ||
156 | |||
157 | for (cmd_buf_index = 0; | ||
158 | cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset; | ||
159 | cmd_buf_index++) { | ||
160 | cmd_buf_read_offset = (cmd_buf_index * | ||
161 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
162 | |||
163 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
164 | fence_index = (cmd_buf_read_offset + | ||
165 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
166 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
167 | |||
168 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
169 | |||
170 | /* 0 is treated as invalid pre-sync */ | ||
171 | if (cmd_buf_cpu_va[fence_index]) { | ||
172 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
173 | |||
174 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
175 | (void *)(cmd_buf_cpu_va + fence_index), | ||
176 | sizeof(struct gk20a_fence *)); | ||
177 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
178 | /* Reset the stored last pre-sync */ | ||
179 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
180 | 0, | ||
181 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
182 | } | ||
183 | } | ||
184 | } | ||
185 | |||
186 | /* assume this api should need to call under mutex_lock(&ce_app->app_mutex) */ | ||
187 | static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) | ||
188 | { | ||
189 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED; | ||
190 | |||
191 | mutex_lock(&ce_ctx->gpu_ctx_mutex); | ||
192 | |||
193 | gk20a_ce_free_command_buffer_stored_fence(ce_ctx); | ||
194 | |||
195 | gk20a_gmmu_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); | ||
196 | |||
197 | /* free the channel */ | ||
198 | if (ce_ctx->ch) | ||
199 | gk20a_channel_close(ce_ctx->ch); | ||
200 | |||
201 | /* housekeeping on app */ | ||
202 | list_del(&ce_ctx->list); | ||
203 | |||
204 | mutex_unlock(&ce_ctx->gpu_ctx_mutex); | ||
205 | mutex_destroy(&ce_ctx->gpu_ctx_mutex); | ||
206 | |||
207 | kfree(ce_ctx); | ||
208 | } | ||
209 | |||
210 | static inline int gk20a_ce_get_method_size(int request_operation) | ||
211 | { | ||
212 | /* failure size */ | ||
213 | int methodsize = ~0; | ||
214 | |||
215 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) | ||
216 | methodsize = 10 * 2 * sizeof(u32); | ||
217 | else if (request_operation & NVGPU_CE_MEMSET) | ||
218 | methodsize = 9 * 2 * sizeof(u32); | ||
219 | |||
220 | return methodsize; | ||
221 | } | ||
222 | |||
223 | static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) | ||
224 | { | ||
225 | /* there is no local memory available, | ||
226 | don't allow local memory related CE flags */ | ||
227 | if (!g->mm.vidmem_size) { | ||
228 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | ||
229 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | ||
230 | } | ||
231 | return launch_flags; | ||
232 | } | ||
233 | |||
234 | static int gk20a_ce_prepare_submit(u64 src_buf, | ||
235 | u64 dst_buf, | ||
236 | u64 size, | ||
237 | u32 *cmd_buf_cpu_va, | ||
238 | u32 max_cmd_buf_size, | ||
239 | unsigned int payload, | ||
240 | int launch_flags, | ||
241 | int request_operation, | ||
242 | u32 dma_copy_class, | ||
243 | struct gk20a_fence *gk20a_fence_in) | ||
244 | { | ||
245 | u32 launch = 0; | ||
246 | u32 methodSize = 0; | ||
247 | |||
248 | /* failure case handling */ | ||
249 | if ((gk20a_ce_get_method_size(request_operation) > max_cmd_buf_size) || | ||
250 | (!size) || | ||
251 | (request_operation > NVGPU_CE_MEMSET)) | ||
252 | return 0; | ||
253 | |||
254 | /* set the channel object */ | ||
255 | cmd_buf_cpu_va[methodSize++] = 0x20018000; | ||
256 | cmd_buf_cpu_va[methodSize++] = dma_copy_class; | ||
257 | |||
258 | if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) { | ||
259 | /* setup the source */ | ||
260 | cmd_buf_cpu_va[methodSize++] = 0x20018101; | ||
261 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf) & | ||
262 | NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
263 | |||
264 | cmd_buf_cpu_va[methodSize++] = 0x20018100; | ||
265 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf) & | ||
266 | NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
267 | |||
268 | cmd_buf_cpu_va[methodSize++] = 0x20018098; | ||
269 | if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) { | ||
270 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
271 | } else if (launch_flags & NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) { | ||
272 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
273 | } else { | ||
274 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
275 | } | ||
276 | |||
277 | launch |= 0x00001000; | ||
278 | } else if (request_operation & NVGPU_CE_MEMSET) { | ||
279 | cmd_buf_cpu_va[methodSize++] = 0x200181c2; | ||
280 | cmd_buf_cpu_va[methodSize++] = 0x00030004; | ||
281 | |||
282 | cmd_buf_cpu_va[methodSize++] = 0x200181c0; | ||
283 | cmd_buf_cpu_va[methodSize++] = payload; | ||
284 | |||
285 | launch |= 0x00000400; | ||
286 | |||
287 | /* converted into number of words */ | ||
288 | size /= sizeof(u32); | ||
289 | } | ||
290 | |||
291 | /* setup the destination/output */ | ||
292 | cmd_buf_cpu_va[methodSize++] = 0x20018103; | ||
293 | cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK); | ||
294 | |||
295 | cmd_buf_cpu_va[methodSize++] = 0x20018102; | ||
296 | cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK); | ||
297 | |||
298 | cmd_buf_cpu_va[methodSize++] = 0x20018099; | ||
299 | if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) { | ||
300 | cmd_buf_cpu_va[methodSize++] = 0x00000000; | ||
301 | } else if (launch_flags & NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) { | ||
302 | cmd_buf_cpu_va[methodSize++] = 0x00000002; | ||
303 | } else { | ||
304 | cmd_buf_cpu_va[methodSize++] = 0x00000001; | ||
305 | } | ||
306 | |||
307 | launch |= 0x00002000; | ||
308 | |||
309 | /* setup the format */ | ||
310 | cmd_buf_cpu_va[methodSize++] = 0x20018107; | ||
311 | cmd_buf_cpu_va[methodSize++] = 1; | ||
312 | cmd_buf_cpu_va[methodSize++] = 0x20018106; | ||
313 | cmd_buf_cpu_va[methodSize++] = u64_lo32(size); | ||
314 | |||
315 | launch |= 0x00000004; | ||
316 | |||
317 | if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) | ||
318 | launch |= 0x00000000; | ||
319 | else | ||
320 | launch |= 0x00000080; | ||
321 | |||
322 | if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) | ||
323 | launch |= 0x00000000; | ||
324 | else | ||
325 | launch |= 0x00000100; | ||
326 | |||
327 | if (launch_flags & NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED) | ||
328 | launch |= 0x00000002; | ||
329 | else | ||
330 | launch |= 0x00000001; | ||
331 | |||
332 | cmd_buf_cpu_va[methodSize++] = 0x200180c0; | ||
333 | cmd_buf_cpu_va[methodSize++] = launch; | ||
334 | |||
335 | return methodSize; | ||
336 | } | ||
337 | |||
338 | /* global CE app related apis */ | ||
339 | int gk20a_init_ce_support(struct gk20a *g) | ||
340 | { | ||
341 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
342 | |||
343 | if (ce_app->initialised) { | ||
344 | /* assume this happen during poweron/poweroff GPU sequence */ | ||
345 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
346 | gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_RESUME); | ||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | gk20a_dbg(gpu_dbg_fn, "ce: init"); | ||
351 | |||
352 | mutex_init(&ce_app->app_mutex); | ||
353 | mutex_lock(&ce_app->app_mutex); | ||
354 | |||
355 | INIT_LIST_HEAD(&ce_app->allocated_contexts); | ||
356 | ce_app->ctx_count = 0; | ||
357 | ce_app->next_ctx_id = 0; | ||
358 | ce_app->initialised = true; | ||
359 | ce_app->app_state = NVGPU_CE_ACTIVE; | ||
360 | |||
361 | mutex_unlock(&ce_app->app_mutex); | ||
362 | gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished"); | ||
363 | |||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | void gk20a_ce_destroy(struct gk20a *g) | ||
368 | { | ||
369 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
370 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
371 | |||
372 | if (!ce_app->initialised) | ||
373 | return; | ||
374 | |||
375 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
376 | ce_app->initialised = false; | ||
377 | |||
378 | mutex_lock(&ce_app->app_mutex); | ||
379 | |||
380 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
381 | &ce_app->allocated_contexts, list) { | ||
382 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
383 | } | ||
384 | |||
385 | INIT_LIST_HEAD(&ce_app->allocated_contexts); | ||
386 | ce_app->ctx_count = 0; | ||
387 | ce_app->next_ctx_id = 0; | ||
388 | |||
389 | mutex_unlock(&ce_app->app_mutex); | ||
390 | mutex_destroy(&ce_app->app_mutex); | ||
391 | } | ||
392 | |||
393 | void gk20a_ce_suspend(struct gk20a *g) | ||
394 | { | ||
395 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
396 | |||
397 | if (!ce_app->initialised) | ||
398 | return; | ||
399 | |||
400 | ce_app->app_state = NVGPU_CE_SUSPEND; | ||
401 | gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_SUSPEND); | ||
402 | |||
403 | return; | ||
404 | } | ||
405 | |||
406 | /* CE app utility functions */ | ||
407 | u32 gk20a_ce_create_context_with_cb(struct device *dev, | ||
408 | int runlist_id, | ||
409 | int priority, | ||
410 | int timeslice, | ||
411 | int runlist_level, | ||
412 | ce_event_callback user_event_callback) | ||
413 | { | ||
414 | struct gk20a_gpu_ctx *ce_ctx; | ||
415 | struct gk20a *g = gk20a_from_dev(dev); | ||
416 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
417 | u32 ctx_id = ~0; | ||
418 | int err = 0; | ||
419 | |||
420 | if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) | ||
421 | return ctx_id; | ||
422 | |||
423 | ce_ctx = kzalloc(sizeof(*ce_ctx), GFP_KERNEL); | ||
424 | if (!ce_ctx) | ||
425 | return ctx_id; | ||
426 | |||
427 | mutex_init(&ce_ctx->gpu_ctx_mutex); | ||
428 | |||
429 | ce_ctx->g = g; | ||
430 | ce_ctx->dev = g->dev; | ||
431 | ce_ctx->user_event_callback = user_event_callback; | ||
432 | |||
433 | ce_ctx->cmd_buf_read_queue_offset = 0; | ||
434 | ce_ctx->cmd_buf_end_queue_offset = | ||
435 | (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF); | ||
436 | |||
437 | ce_ctx->submitted_seq_number = 0; | ||
438 | ce_ctx->completed_seq_number = 0; | ||
439 | |||
440 | /* always kernel client needs privileged channel */ | ||
441 | ce_ctx->ch = gk20a_open_new_channel_with_cb(g, gk20a_ce_finished_ctx_cb, | ||
442 | ce_ctx, | ||
443 | runlist_id, | ||
444 | true); | ||
445 | if (!ce_ctx->ch) { | ||
446 | gk20a_err(ce_ctx->dev, "ce: gk20a channel not available"); | ||
447 | goto end; | ||
448 | } | ||
449 | |||
450 | /* bind the channel to the vm */ | ||
451 | gk20a_vm_get(&g->mm.ce.vm); | ||
452 | ce_ctx->vm = ce_ctx->ch->vm = &g->mm.ce.vm; | ||
453 | err = channel_gk20a_commit_va(ce_ctx->ch); | ||
454 | if (err) { | ||
455 | gk20a_err(ce_ctx->dev, "ce: could not bind vm"); | ||
456 | goto end; | ||
457 | } | ||
458 | |||
459 | /* allocate gpfifo (1024 should be more than enough) */ | ||
460 | err = gk20a_alloc_channel_gpfifo(ce_ctx->ch, | ||
461 | &(struct nvgpu_alloc_gpfifo_args){1024, 0}); | ||
462 | if (err) { | ||
463 | gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo"); | ||
464 | goto end; | ||
465 | } | ||
466 | |||
467 | /* allocate command buffer (4096 should be more than enough) from sysmem*/ | ||
468 | err = gk20a_gmmu_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem); | ||
469 | if (err) { | ||
470 | gk20a_err(ce_ctx->dev, | ||
471 | "ce: could not allocate command buffer for CE context"); | ||
472 | goto end; | ||
473 | } | ||
474 | |||
475 | memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size); | ||
476 | |||
477 | /* -1 means default channel priority */ | ||
478 | if (priority != -1) { | ||
479 | err = gk20a_channel_set_priority(ce_ctx->ch, priority); | ||
480 | if (err) { | ||
481 | gk20a_err(ce_ctx->dev, | ||
482 | "ce: could not set the channel priority for CE context"); | ||
483 | goto end; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* -1 means default channel timeslice value */ | ||
488 | if (timeslice != -1) { | ||
489 | err = gk20a_channel_set_timeslice(ce_ctx->ch, timeslice); | ||
490 | if (err) { | ||
491 | gk20a_err(ce_ctx->dev, | ||
492 | "ce: could not set the channel timeslice value for CE context"); | ||
493 | goto end; | ||
494 | } | ||
495 | } | ||
496 | |||
497 | /* -1 means default channel runlist level */ | ||
498 | if (runlist_level != -1) { | ||
499 | err = gk20a_channel_set_runlist_interleave(ce_ctx->ch, runlist_level); | ||
500 | if (err) { | ||
501 | gk20a_err(ce_ctx->dev, | ||
502 | "ce: could not set the runlist interleave for CE context"); | ||
503 | goto end; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | mutex_lock(&ce_app->app_mutex); | ||
508 | ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id; | ||
509 | list_add(&ce_ctx->list, &ce_app->allocated_contexts); | ||
510 | ++ce_app->next_ctx_id; | ||
511 | ++ce_app->ctx_count; | ||
512 | mutex_unlock(&ce_app->app_mutex); | ||
513 | |||
514 | ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED; | ||
515 | |||
516 | end: | ||
517 | if (ctx_id == ~0) { | ||
518 | mutex_lock(&ce_app->app_mutex); | ||
519 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
520 | mutex_unlock(&ce_app->app_mutex); | ||
521 | } | ||
522 | return ctx_id; | ||
523 | |||
524 | } | ||
525 | EXPORT_SYMBOL(gk20a_ce_create_context_with_cb); | ||
526 | |||
527 | int gk20a_ce_execute_ops(struct device *dev, | ||
528 | u32 ce_ctx_id, | ||
529 | u64 src_buf, | ||
530 | u64 dst_buf, | ||
531 | u64 size, | ||
532 | unsigned int payload, | ||
533 | int launch_flags, | ||
534 | int request_operation, | ||
535 | struct gk20a_fence *gk20a_fence_in, | ||
536 | u32 submit_flags, | ||
537 | struct gk20a_fence **gk20a_fence_out) | ||
538 | { | ||
539 | int ret = -EPERM; | ||
540 | struct gk20a *g = gk20a_from_dev(dev); | ||
541 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
542 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
543 | bool found = false; | ||
544 | u32 *cmd_buf_cpu_va; | ||
545 | u64 cmd_buf_gpu_va = 0; | ||
546 | u32 methodSize; | ||
547 | u32 cmd_buf_read_offset; | ||
548 | u32 fence_index; | ||
549 | struct nvgpu_gpfifo gpfifo; | ||
550 | struct nvgpu_fence fence = {0,0}; | ||
551 | struct gk20a_fence *ce_cmd_buf_fence_out = NULL; | ||
552 | struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics; | ||
553 | |||
554 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
555 | goto end; | ||
556 | |||
557 | mutex_lock(&ce_app->app_mutex); | ||
558 | |||
559 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
560 | &ce_app->allocated_contexts, list) { | ||
561 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
562 | found = true; | ||
563 | break; | ||
564 | } | ||
565 | } | ||
566 | |||
567 | mutex_unlock(&ce_app->app_mutex); | ||
568 | |||
569 | if (!found) { | ||
570 | ret = -EINVAL; | ||
571 | goto end; | ||
572 | } | ||
573 | |||
574 | if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { | ||
575 | ret = -ENODEV; | ||
576 | goto end; | ||
577 | } | ||
578 | |||
579 | mutex_lock(&ce_ctx->gpu_ctx_mutex); | ||
580 | |||
581 | ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; | ||
582 | |||
583 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | ||
584 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
585 | |||
586 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
587 | fence_index = (cmd_buf_read_offset + | ||
588 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
589 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
590 | |||
591 | if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { | ||
592 | ret = -ENOMEM; | ||
593 | goto noop; | ||
594 | } | ||
595 | |||
596 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
597 | |||
598 | /* 0 is treated as invalid pre-sync */ | ||
599 | if (cmd_buf_cpu_va[fence_index]) { | ||
600 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
601 | |||
602 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
603 | (void *)(cmd_buf_cpu_va + fence_index), | ||
604 | sizeof(struct gk20a_fence *)); | ||
605 | ret = gk20a_fence_wait(ce_cmd_buf_fence_in, gk20a_get_gr_idle_timeout(g)); | ||
606 | |||
607 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
608 | /* Reset the stored last pre-sync */ | ||
609 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
610 | 0, | ||
611 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
612 | if (ret) | ||
613 | goto noop; | ||
614 | } | ||
615 | |||
616 | cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); | ||
617 | |||
618 | methodSize = gk20a_ce_prepare_submit(src_buf, | ||
619 | dst_buf, | ||
620 | size, | ||
621 | &cmd_buf_cpu_va[cmd_buf_read_offset], | ||
622 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, | ||
623 | payload, | ||
624 | gk20a_get_valid_launch_flags(g, launch_flags), | ||
625 | request_operation, | ||
626 | gpu_capability->dma_copy_class, | ||
627 | gk20a_fence_in); | ||
628 | |||
629 | if (methodSize) { | ||
630 | /* TODO: Remove CPU pre-fence wait */ | ||
631 | if (gk20a_fence_in) { | ||
632 | ret = gk20a_fence_wait(gk20a_fence_in, gk20a_get_gr_idle_timeout(g)); | ||
633 | gk20a_fence_put(gk20a_fence_in); | ||
634 | if (ret) | ||
635 | goto noop; | ||
636 | } | ||
637 | |||
638 | /* store the element into gpfifo */ | ||
639 | gpfifo.entry0 = | ||
640 | u64_lo32(cmd_buf_gpu_va); | ||
641 | gpfifo.entry1 = | ||
642 | (u64_hi32(cmd_buf_gpu_va) | | ||
643 | pbdma_gp_entry1_length_f(methodSize)); | ||
644 | |||
645 | /* take always the postfence as it is needed for protecting the ce context */ | ||
646 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
647 | |||
648 | wmb(); | ||
649 | |||
650 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | ||
651 | 1, submit_flags, &fence, &ce_cmd_buf_fence_out, true); | ||
652 | |||
653 | if (!ret) { | ||
654 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | ||
655 | (void *)&ce_cmd_buf_fence_out, | ||
656 | sizeof(struct gk20a_fence *)); | ||
657 | |||
658 | if (gk20a_fence_out) { | ||
659 | gk20a_fence_get(ce_cmd_buf_fence_out); | ||
660 | *gk20a_fence_out = ce_cmd_buf_fence_out; | ||
661 | } | ||
662 | |||
663 | /* Next available command buffer queue Index */ | ||
664 | ++ce_ctx->cmd_buf_read_queue_offset; | ||
665 | ++ce_ctx->submitted_seq_number; | ||
666 | } | ||
667 | } else | ||
668 | ret = -ENOMEM; | ||
669 | noop: | ||
670 | mutex_unlock(&ce_ctx->gpu_ctx_mutex); | ||
671 | end: | ||
672 | return ret; | ||
673 | } | ||
674 | EXPORT_SYMBOL(gk20a_ce_execute_ops); | ||
675 | |||
676 | void gk20a_ce_delete_context(struct device *dev, | ||
677 | u32 ce_ctx_id) | ||
678 | { | ||
679 | struct gk20a *g = gk20a_from_dev(dev); | ||
680 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
681 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
682 | |||
683 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
684 | return; | ||
685 | |||
686 | mutex_lock(&ce_app->app_mutex); | ||
687 | |||
688 | list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
689 | &ce_app->allocated_contexts, list) { | ||
690 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
691 | gk20a_ce_delete_gpu_context(ce_ctx); | ||
692 | --ce_app->ctx_count; | ||
693 | break; | ||
694 | } | ||
695 | } | ||
696 | |||
697 | mutex_unlock(&ce_app->app_mutex); | ||
698 | return; | ||
699 | } | ||
700 | EXPORT_SYMBOL(gk20a_ce_delete_context); | ||
701 | |||
702 | #ifdef CONFIG_DEBUG_FS | ||
703 | void gk20a_ce_debugfs_init(struct device *dev) | ||
704 | { | ||
705 | struct gk20a_platform *platform = dev_get_drvdata(dev); | ||
706 | struct gk20a *g = get_gk20a(dev); | ||
707 | |||
708 | debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO, | ||
709 | platform->debugfs, &g->ce_app.ctx_count); | ||
710 | debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO, | ||
711 | platform->debugfs, &g->ce_app.app_state); | ||
712 | debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO, | ||
713 | platform->debugfs, &g->ce_app.next_ctx_id); | ||
714 | } | ||
715 | #endif | ||