summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
diff options
context:
space:
mode:
authorLakshmanan M <lm@nvidia.com>2016-06-29 06:36:39 -0400
committerVijayakumar Subbu <vsubbu@nvidia.com>2016-07-20 06:09:28 -0400
commit89aecd1202b49727e940069f2a6feb5c3cf4c927 (patch)
tree8a0d3a493b389167ce1d93e55f23e114ec2cbd38 /drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
parentf6ebdc5f2916706f7a61983567420e0985faeeb1 (diff)
gpu: nvgpu: Add nvgpu infra to allow kernel to create privileged CE channels
Added interface to allow kernel to create privileged CE channels for page migration and clearing support between sysmem and videmem. JIRA DNVGPU-53 Change-Id: I3e18d18403809c9e64fa45d40b6c4e3844992506 Signed-off-by: Lakshmanan M <lm@nvidia.com> Reviewed-on: http://git-master/r/1173085 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c617
1 files changed, 617 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 96d38b11..e2f2d9e9 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -24,6 +24,7 @@
24#include <trace/events/gk20a.h> 24#include <trace/events/gk20a.h>
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
26#include <linux/nvhost.h> 26#include <linux/nvhost.h>
27#include <linux/debugfs.h>
27 28
28#include "gk20a.h" 29#include "gk20a.h"
29#include "debug_gk20a.h" 30#include "debug_gk20a.h"
@@ -96,3 +97,619 @@ void gk20a_init_ce2(struct gpu_ops *gops)
96 gops->ce2.isr_stall = gk20a_ce2_isr; 97 gops->ce2.isr_stall = gk20a_ce2_isr;
97 gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr; 98 gops->ce2.isr_nonstall = gk20a_ce2_nonstall_isr;
98} 99}
100
101/* static CE app api */
102static void gk20a_ce_notify_all_user(struct gk20a *g, u32 event)
103{
104 struct gk20a_ce_app *ce_app = &g->ce_app;
105 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
106
107 if (!ce_app->initialised)
108 return;
109
110 mutex_lock(&ce_app->app_mutex);
111
112 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
113 &ce_app->allocated_contexts, list) {
114 if (ce_ctx->user_event_callback) {
115 ce_ctx->user_event_callback(ce_ctx->ctx_id,
116 event);
117 }
118 }
119
120 mutex_unlock(&ce_app->app_mutex);
121}
122
123static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data)
124{
125 struct gk20a_gpu_ctx *ce_ctx = data;
126 bool channel_idle;
127 u32 event;
128
129 mutex_lock(&ch->jobs_lock);
130 channel_idle = list_empty(&ch->jobs);
131 mutex_unlock(&ch->jobs_lock);
132
133 if (!channel_idle)
134 return;
135
136 gk20a_dbg(gpu_dbg_fn, "ce: finished %p", ce_ctx);
137
138 if (ch->has_timedout)
139 event = NVGPU_CE_CONTEXT_JOB_TIMEDOUT;
140 else
141 event = NVGPU_CE_CONTEXT_JOB_COMPLETED;
142
143 if (ce_ctx->user_event_callback)
144 ce_ctx->user_event_callback(ce_ctx->ctx_id,
145 event);
146
147 ++ce_ctx->completed_seq_number;
148}
149
150static void gk20a_ce_free_command_buffer_stored_fence(struct gk20a_gpu_ctx *ce_ctx)
151{
152 u32 cmd_buf_index;
153 u32 cmd_buf_read_offset;
154 u32 fence_index;
155 u32 *cmd_buf_cpu_va;
156
157 for (cmd_buf_index = 0;
158 cmd_buf_index < ce_ctx->cmd_buf_end_queue_offset;
159 cmd_buf_index++) {
160 cmd_buf_read_offset = (cmd_buf_index *
161 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
162
163 /* at end of command buffer has gk20a_fence for command buffer sync */
164 fence_index = (cmd_buf_read_offset +
165 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
166 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
167
168 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
169
170 /* 0 is treated as invalid pre-sync */
171 if (cmd_buf_cpu_va[fence_index]) {
172 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
173
174 memcpy((void *)&ce_cmd_buf_fence_in,
175 (void *)(cmd_buf_cpu_va + fence_index),
176 sizeof(struct gk20a_fence *));
177 gk20a_fence_put(ce_cmd_buf_fence_in);
178 /* Reset the stored last pre-sync */
179 memset((void *)(cmd_buf_cpu_va + fence_index),
180 0,
181 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
182 }
183 }
184}
185
186/* assume this api should need to call under mutex_lock(&ce_app->app_mutex) */
187static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx)
188{
189 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
190
191 mutex_lock(&ce_ctx->gpu_ctx_mutex);
192
193 gk20a_ce_free_command_buffer_stored_fence(ce_ctx);
194
195 gk20a_gmmu_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
196
197 /* free the channel */
198 if (ce_ctx->ch)
199 gk20a_channel_close(ce_ctx->ch);
200
201 /* housekeeping on app */
202 list_del(&ce_ctx->list);
203
204 mutex_unlock(&ce_ctx->gpu_ctx_mutex);
205 mutex_destroy(&ce_ctx->gpu_ctx_mutex);
206
207 kfree(ce_ctx);
208}
209
210static inline int gk20a_ce_get_method_size(int request_operation)
211{
212 /* failure size */
213 int methodsize = ~0;
214
215 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER)
216 methodsize = 10 * 2 * sizeof(u32);
217 else if (request_operation & NVGPU_CE_MEMSET)
218 methodsize = 9 * 2 * sizeof(u32);
219
220 return methodsize;
221}
222
223static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
224{
225 /* there is no local memory available,
226 don't allow local memory related CE flags */
227 if (!g->mm.vidmem_size) {
228 launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
229 NVGPU_CE_DST_LOCATION_LOCAL_FB);
230 }
231 return launch_flags;
232}
233
234static int gk20a_ce_prepare_submit(u64 src_buf,
235 u64 dst_buf,
236 u64 size,
237 u32 *cmd_buf_cpu_va,
238 u32 max_cmd_buf_size,
239 unsigned int payload,
240 int launch_flags,
241 int request_operation,
242 u32 dma_copy_class,
243 struct gk20a_fence *gk20a_fence_in)
244{
245 u32 launch = 0;
246 u32 methodSize = 0;
247
248 /* failure case handling */
249 if ((gk20a_ce_get_method_size(request_operation) > max_cmd_buf_size) ||
250 (!size) ||
251 (request_operation > NVGPU_CE_MEMSET))
252 return 0;
253
254 /* set the channel object */
255 cmd_buf_cpu_va[methodSize++] = 0x20018000;
256 cmd_buf_cpu_va[methodSize++] = dma_copy_class;
257
258 if (request_operation & NVGPU_CE_PHYS_MODE_TRANSFER) {
259 /* setup the source */
260 cmd_buf_cpu_va[methodSize++] = 0x20018101;
261 cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_buf) &
262 NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
263
264 cmd_buf_cpu_va[methodSize++] = 0x20018100;
265 cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_buf) &
266 NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
267
268 cmd_buf_cpu_va[methodSize++] = 0x20018098;
269 if (launch_flags & NVGPU_CE_SRC_LOCATION_LOCAL_FB) {
270 cmd_buf_cpu_va[methodSize++] = 0x00000000;
271 } else if (launch_flags & NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) {
272 cmd_buf_cpu_va[methodSize++] = 0x00000002;
273 } else {
274 cmd_buf_cpu_va[methodSize++] = 0x00000001;
275 }
276
277 launch |= 0x00001000;
278 } else if (request_operation & NVGPU_CE_MEMSET) {
279 cmd_buf_cpu_va[methodSize++] = 0x200181c2;
280 cmd_buf_cpu_va[methodSize++] = 0x00030004;
281
282 cmd_buf_cpu_va[methodSize++] = 0x200181c0;
283 cmd_buf_cpu_va[methodSize++] = payload;
284
285 launch |= 0x00000400;
286
287 /* converted into number of words */
288 size /= sizeof(u32);
289 }
290
291 /* setup the destination/output */
292 cmd_buf_cpu_va[methodSize++] = 0x20018103;
293 cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_buf) & NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
294
295 cmd_buf_cpu_va[methodSize++] = 0x20018102;
296 cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_buf) & NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
297
298 cmd_buf_cpu_va[methodSize++] = 0x20018099;
299 if (launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) {
300 cmd_buf_cpu_va[methodSize++] = 0x00000000;
301 } else if (launch_flags & NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) {
302 cmd_buf_cpu_va[methodSize++] = 0x00000002;
303 } else {
304 cmd_buf_cpu_va[methodSize++] = 0x00000001;
305 }
306
307 launch |= 0x00002000;
308
309 /* setup the format */
310 cmd_buf_cpu_va[methodSize++] = 0x20018107;
311 cmd_buf_cpu_va[methodSize++] = 1;
312 cmd_buf_cpu_va[methodSize++] = 0x20018106;
313 cmd_buf_cpu_va[methodSize++] = u64_lo32(size);
314
315 launch |= 0x00000004;
316
317 if (launch_flags & NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR)
318 launch |= 0x00000000;
319 else
320 launch |= 0x00000080;
321
322 if (launch_flags & NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR)
323 launch |= 0x00000000;
324 else
325 launch |= 0x00000100;
326
327 if (launch_flags & NVGPU_CE_DATA_TRANSFER_TYPE_NON_PIPELINED)
328 launch |= 0x00000002;
329 else
330 launch |= 0x00000001;
331
332 cmd_buf_cpu_va[methodSize++] = 0x200180c0;
333 cmd_buf_cpu_va[methodSize++] = launch;
334
335 return methodSize;
336}
337
338/* global CE app related apis */
339int gk20a_init_ce_support(struct gk20a *g)
340{
341 struct gk20a_ce_app *ce_app = &g->ce_app;
342
343 if (ce_app->initialised) {
344 /* assume this happen during poweron/poweroff GPU sequence */
345 ce_app->app_state = NVGPU_CE_ACTIVE;
346 gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_RESUME);
347 return 0;
348 }
349
350 gk20a_dbg(gpu_dbg_fn, "ce: init");
351
352 mutex_init(&ce_app->app_mutex);
353 mutex_lock(&ce_app->app_mutex);
354
355 INIT_LIST_HEAD(&ce_app->allocated_contexts);
356 ce_app->ctx_count = 0;
357 ce_app->next_ctx_id = 0;
358 ce_app->initialised = true;
359 ce_app->app_state = NVGPU_CE_ACTIVE;
360
361 mutex_unlock(&ce_app->app_mutex);
362 gk20a_dbg(gpu_dbg_cde_ctx, "ce: init finished");
363
364 return 0;
365}
366
367void gk20a_ce_destroy(struct gk20a *g)
368{
369 struct gk20a_ce_app *ce_app = &g->ce_app;
370 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
371
372 if (!ce_app->initialised)
373 return;
374
375 ce_app->app_state = NVGPU_CE_SUSPEND;
376 ce_app->initialised = false;
377
378 mutex_lock(&ce_app->app_mutex);
379
380 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
381 &ce_app->allocated_contexts, list) {
382 gk20a_ce_delete_gpu_context(ce_ctx);
383 }
384
385 INIT_LIST_HEAD(&ce_app->allocated_contexts);
386 ce_app->ctx_count = 0;
387 ce_app->next_ctx_id = 0;
388
389 mutex_unlock(&ce_app->app_mutex);
390 mutex_destroy(&ce_app->app_mutex);
391}
392
393void gk20a_ce_suspend(struct gk20a *g)
394{
395 struct gk20a_ce_app *ce_app = &g->ce_app;
396
397 if (!ce_app->initialised)
398 return;
399
400 ce_app->app_state = NVGPU_CE_SUSPEND;
401 gk20a_ce_notify_all_user(g, NVGPU_CE_CONTEXT_SUSPEND);
402
403 return;
404}
405
406/* CE app utility functions */
407u32 gk20a_ce_create_context_with_cb(struct device *dev,
408 int runlist_id,
409 int priority,
410 int timeslice,
411 int runlist_level,
412 ce_event_callback user_event_callback)
413{
414 struct gk20a_gpu_ctx *ce_ctx;
415 struct gk20a *g = gk20a_from_dev(dev);
416 struct gk20a_ce_app *ce_app = &g->ce_app;
417 u32 ctx_id = ~0;
418 int err = 0;
419
420 if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE)
421 return ctx_id;
422
423 ce_ctx = kzalloc(sizeof(*ce_ctx), GFP_KERNEL);
424 if (!ce_ctx)
425 return ctx_id;
426
427 mutex_init(&ce_ctx->gpu_ctx_mutex);
428
429 ce_ctx->g = g;
430 ce_ctx->dev = g->dev;
431 ce_ctx->user_event_callback = user_event_callback;
432
433 ce_ctx->cmd_buf_read_queue_offset = 0;
434 ce_ctx->cmd_buf_end_queue_offset =
435 (NVGPU_CE_COMMAND_BUF_SIZE / NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF);
436
437 ce_ctx->submitted_seq_number = 0;
438 ce_ctx->completed_seq_number = 0;
439
440 /* always kernel client needs privileged channel */
441 ce_ctx->ch = gk20a_open_new_channel_with_cb(g, gk20a_ce_finished_ctx_cb,
442 ce_ctx,
443 runlist_id,
444 true);
445 if (!ce_ctx->ch) {
446 gk20a_err(ce_ctx->dev, "ce: gk20a channel not available");
447 goto end;
448 }
449
450 /* bind the channel to the vm */
451 gk20a_vm_get(&g->mm.ce.vm);
452 ce_ctx->vm = ce_ctx->ch->vm = &g->mm.ce.vm;
453 err = channel_gk20a_commit_va(ce_ctx->ch);
454 if (err) {
455 gk20a_err(ce_ctx->dev, "ce: could not bind vm");
456 goto end;
457 }
458
459 /* allocate gpfifo (1024 should be more than enough) */
460 err = gk20a_alloc_channel_gpfifo(ce_ctx->ch,
461 &(struct nvgpu_alloc_gpfifo_args){1024, 0});
462 if (err) {
463 gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo");
464 goto end;
465 }
466
467 /* allocate command buffer (4096 should be more than enough) from sysmem*/
468 err = gk20a_gmmu_alloc_map_sys(ce_ctx->vm, NVGPU_CE_COMMAND_BUF_SIZE, &ce_ctx->cmd_buf_mem);
469 if (err) {
470 gk20a_err(ce_ctx->dev,
471 "ce: could not allocate command buffer for CE context");
472 goto end;
473 }
474
475 memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00, ce_ctx->cmd_buf_mem.size);
476
477 /* -1 means default channel priority */
478 if (priority != -1) {
479 err = gk20a_channel_set_priority(ce_ctx->ch, priority);
480 if (err) {
481 gk20a_err(ce_ctx->dev,
482 "ce: could not set the channel priority for CE context");
483 goto end;
484 }
485 }
486
487 /* -1 means default channel timeslice value */
488 if (timeslice != -1) {
489 err = gk20a_channel_set_timeslice(ce_ctx->ch, timeslice);
490 if (err) {
491 gk20a_err(ce_ctx->dev,
492 "ce: could not set the channel timeslice value for CE context");
493 goto end;
494 }
495 }
496
497 /* -1 means default channel runlist level */
498 if (runlist_level != -1) {
499 err = gk20a_channel_set_runlist_interleave(ce_ctx->ch, runlist_level);
500 if (err) {
501 gk20a_err(ce_ctx->dev,
502 "ce: could not set the runlist interleave for CE context");
503 goto end;
504 }
505 }
506
507 mutex_lock(&ce_app->app_mutex);
508 ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
509 list_add(&ce_ctx->list, &ce_app->allocated_contexts);
510 ++ce_app->next_ctx_id;
511 ++ce_app->ctx_count;
512 mutex_unlock(&ce_app->app_mutex);
513
514 ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
515
516end:
517 if (ctx_id == ~0) {
518 mutex_lock(&ce_app->app_mutex);
519 gk20a_ce_delete_gpu_context(ce_ctx);
520 mutex_unlock(&ce_app->app_mutex);
521 }
522 return ctx_id;
523
524}
525EXPORT_SYMBOL(gk20a_ce_create_context_with_cb);
526
527int gk20a_ce_execute_ops(struct device *dev,
528 u32 ce_ctx_id,
529 u64 src_buf,
530 u64 dst_buf,
531 u64 size,
532 unsigned int payload,
533 int launch_flags,
534 int request_operation,
535 struct gk20a_fence *gk20a_fence_in,
536 u32 submit_flags,
537 struct gk20a_fence **gk20a_fence_out)
538{
539 int ret = -EPERM;
540 struct gk20a *g = gk20a_from_dev(dev);
541 struct gk20a_ce_app *ce_app = &g->ce_app;
542 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
543 bool found = false;
544 u32 *cmd_buf_cpu_va;
545 u64 cmd_buf_gpu_va = 0;
546 u32 methodSize;
547 u32 cmd_buf_read_offset;
548 u32 fence_index;
549 struct nvgpu_gpfifo gpfifo;
550 struct nvgpu_fence fence = {0,0};
551 struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
552 struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics;
553
554 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
555 goto end;
556
557 mutex_lock(&ce_app->app_mutex);
558
559 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
560 &ce_app->allocated_contexts, list) {
561 if (ce_ctx->ctx_id == ce_ctx_id) {
562 found = true;
563 break;
564 }
565 }
566
567 mutex_unlock(&ce_app->app_mutex);
568
569 if (!found) {
570 ret = -EINVAL;
571 goto end;
572 }
573
574 if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
575 ret = -ENODEV;
576 goto end;
577 }
578
579 mutex_lock(&ce_ctx->gpu_ctx_mutex);
580
581 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
582
583 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
584 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
585
586 /* at end of command buffer has gk20a_fence for command buffer sync */
587 fence_index = (cmd_buf_read_offset +
588 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
589 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
590
591 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
592 ret = -ENOMEM;
593 goto noop;
594 }
595
596 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
597
598 /* 0 is treated as invalid pre-sync */
599 if (cmd_buf_cpu_va[fence_index]) {
600 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
601
602 memcpy((void *)&ce_cmd_buf_fence_in,
603 (void *)(cmd_buf_cpu_va + fence_index),
604 sizeof(struct gk20a_fence *));
605 ret = gk20a_fence_wait(ce_cmd_buf_fence_in, gk20a_get_gr_idle_timeout(g));
606
607 gk20a_fence_put(ce_cmd_buf_fence_in);
608 /* Reset the stored last pre-sync */
609 memset((void *)(cmd_buf_cpu_va + fence_index),
610 0,
611 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
612 if (ret)
613 goto noop;
614 }
615
616 cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
617
618 methodSize = gk20a_ce_prepare_submit(src_buf,
619 dst_buf,
620 size,
621 &cmd_buf_cpu_va[cmd_buf_read_offset],
622 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
623 payload,
624 gk20a_get_valid_launch_flags(g, launch_flags),
625 request_operation,
626 gpu_capability->dma_copy_class,
627 gk20a_fence_in);
628
629 if (methodSize) {
630 /* TODO: Remove CPU pre-fence wait */
631 if (gk20a_fence_in) {
632 ret = gk20a_fence_wait(gk20a_fence_in, gk20a_get_gr_idle_timeout(g));
633 gk20a_fence_put(gk20a_fence_in);
634 if (ret)
635 goto noop;
636 }
637
638 /* store the element into gpfifo */
639 gpfifo.entry0 =
640 u64_lo32(cmd_buf_gpu_va);
641 gpfifo.entry1 =
642 (u64_hi32(cmd_buf_gpu_va) |
643 pbdma_gp_entry1_length_f(methodSize));
644
645 /* take always the postfence as it is needed for protecting the ce context */
646 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
647
648 wmb();
649
650 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
651 1, submit_flags, &fence, &ce_cmd_buf_fence_out, true);
652
653 if (!ret) {
654 memcpy((void *)(cmd_buf_cpu_va + fence_index),
655 (void *)&ce_cmd_buf_fence_out,
656 sizeof(struct gk20a_fence *));
657
658 if (gk20a_fence_out) {
659 gk20a_fence_get(ce_cmd_buf_fence_out);
660 *gk20a_fence_out = ce_cmd_buf_fence_out;
661 }
662
663 /* Next available command buffer queue Index */
664 ++ce_ctx->cmd_buf_read_queue_offset;
665 ++ce_ctx->submitted_seq_number;
666 }
667 } else
668 ret = -ENOMEM;
669noop:
670 mutex_unlock(&ce_ctx->gpu_ctx_mutex);
671end:
672 return ret;
673}
674EXPORT_SYMBOL(gk20a_ce_execute_ops);
675
676void gk20a_ce_delete_context(struct device *dev,
677 u32 ce_ctx_id)
678{
679 struct gk20a *g = gk20a_from_dev(dev);
680 struct gk20a_ce_app *ce_app = &g->ce_app;
681 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
682
683 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
684 return;
685
686 mutex_lock(&ce_app->app_mutex);
687
688 list_for_each_entry_safe(ce_ctx, ce_ctx_save,
689 &ce_app->allocated_contexts, list) {
690 if (ce_ctx->ctx_id == ce_ctx_id) {
691 gk20a_ce_delete_gpu_context(ce_ctx);
692 --ce_app->ctx_count;
693 break;
694 }
695 }
696
697 mutex_unlock(&ce_app->app_mutex);
698 return;
699}
700EXPORT_SYMBOL(gk20a_ce_delete_context);
701
702#ifdef CONFIG_DEBUG_FS
703void gk20a_ce_debugfs_init(struct device *dev)
704{
705 struct gk20a_platform *platform = dev_get_drvdata(dev);
706 struct gk20a *g = get_gk20a(dev);
707
708 debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
709 platform->debugfs, &g->ce_app.ctx_count);
710 debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
711 platform->debugfs, &g->ce_app.app_state);
712 debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
713 platform->debugfs, &g->ce_app.next_ctx_id);
714}
715#endif