summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-10-26 11:29:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-02 08:09:59 -0400
commit23c7903eff6ee1ab184dfcc62c054de1557e5b1d (patch)
treea5122028e181e5c6009f9f8b66bfbf00f69a9290 /drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
parent5f8cfaa250f08499f587da0097f6accaa5eedf15 (diff)
gpu: nvgpu: move submit path to linux
Nvgpu submit path has a lot of dependency on Linux framework e.g. use of copy_from_user, use of structures defined in uapi/nvgpu headers, dma_buf_* calls for trace support etc Hence to keep common code independent of Linux code, move submit path to Linux directory Move below APIs to common/linux/channel.c trace_write_pushbuffer() trace_write_pushbuffer_range() gk20a_submit_prepare_syncs() gk20a_submit_append_priv_cmdbuf() gk20a_submit_append_gpfifo() gk20a_submit_channel_gpfifo() Move below APIs to common/linux/ce2.c gk20a_ce_execute_ops() Define gk20a_ce_execute_ops() in common/linux/ce2.c, and declare it in gk20a/ce2_gk20a.h since it is needed in common/mm code too Each OS needs to implement this API separately gk20a_channel_alloc_gpfifo() use sizeof(nvgpu_gpfifo) to get size of one gpfifo entry, but structure nvgpu_gpfifo is linux specific Define new nvgpu_get_gpfifo_entry_size() in linux specific code and use it in gk20a_channel_alloc_gpfifo() to get gpfifo entry size Each OS needs to implement this API separately Export some APIs from gk20a/ce2_gk20a.h and gk20a/channel_gk20a.h that are needed in linux code Jira NVGPU-259 Jira NVGPU-313 Change-Id: I360c6cb8ce4494b1e50c66af334a2a379f0d2dc4 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586277 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ce2_gk20a.c164
1 files changed, 1 insertions, 163 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 5314a1be..9ff6c792 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -249,18 +249,7 @@ static inline unsigned int gk20a_ce_get_method_size(int request_operation,
249 return methodsize; 249 return methodsize;
250} 250}
251 251
252static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) 252int gk20a_ce_prepare_submit(u64 src_buf,
253{
254 /* there is no local memory available,
255 don't allow local memory related CE flags */
256 if (!g->mm.vidmem.size) {
257 launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
258 NVGPU_CE_DST_LOCATION_LOCAL_FB);
259 }
260 return launch_flags;
261}
262
263static int gk20a_ce_prepare_submit(u64 src_buf,
264 u64 dst_buf, 253 u64 dst_buf,
265 u64 size, 254 u64 size,
266 u32 *cmd_buf_cpu_va, 255 u32 *cmd_buf_cpu_va,
@@ -626,157 +615,6 @@ end:
626} 615}
627EXPORT_SYMBOL(gk20a_ce_create_context_with_cb); 616EXPORT_SYMBOL(gk20a_ce_create_context_with_cb);
628 617
629int gk20a_ce_execute_ops(struct gk20a *g,
630 u32 ce_ctx_id,
631 u64 src_buf,
632 u64 dst_buf,
633 u64 size,
634 unsigned int payload,
635 int launch_flags,
636 int request_operation,
637 struct gk20a_fence *gk20a_fence_in,
638 u32 submit_flags,
639 struct gk20a_fence **gk20a_fence_out)
640{
641 int ret = -EPERM;
642 struct gk20a_ce_app *ce_app = &g->ce_app;
643 struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
644 bool found = false;
645 u32 *cmd_buf_cpu_va;
646 u64 cmd_buf_gpu_va = 0;
647 u32 methodSize;
648 u32 cmd_buf_read_offset;
649 u32 fence_index;
650 struct nvgpu_gpfifo gpfifo;
651 struct nvgpu_fence fence = {0,0};
652 struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
653 struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics;
654
655 if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
656 goto end;
657
658 nvgpu_mutex_acquire(&ce_app->app_mutex);
659
660 nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
661 &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
662 if (ce_ctx->ctx_id == ce_ctx_id) {
663 found = true;
664 break;
665 }
666 }
667
668 nvgpu_mutex_release(&ce_app->app_mutex);
669
670 if (!found) {
671 ret = -EINVAL;
672 goto end;
673 }
674
675 if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
676 ret = -ENODEV;
677 goto end;
678 }
679
680 nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
681
682 ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
683
684 cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
685 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
686
687 /* at end of command buffer has gk20a_fence for command buffer sync */
688 fence_index = (cmd_buf_read_offset +
689 ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
690 (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
691
692 if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
693 ret = -ENOMEM;
694 goto noop;
695 }
696
697 cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
698
699 /* 0 is treated as invalid pre-sync */
700 if (cmd_buf_cpu_va[fence_index]) {
701 struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
702
703 memcpy((void *)&ce_cmd_buf_fence_in,
704 (void *)(cmd_buf_cpu_va + fence_index),
705 sizeof(struct gk20a_fence *));
706 ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
707 gk20a_get_gr_idle_timeout(g));
708
709 gk20a_fence_put(ce_cmd_buf_fence_in);
710 /* Reset the stored last pre-sync */
711 memset((void *)(cmd_buf_cpu_va + fence_index),
712 0,
713 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
714 if (ret)
715 goto noop;
716 }
717
718 cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
719
720 methodSize = gk20a_ce_prepare_submit(src_buf,
721 dst_buf,
722 size,
723 &cmd_buf_cpu_va[cmd_buf_read_offset],
724 NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
725 payload,
726 gk20a_get_valid_launch_flags(g, launch_flags),
727 request_operation,
728 gpu_capability->dma_copy_class,
729 gk20a_fence_in);
730
731 if (methodSize) {
732 /* TODO: Remove CPU pre-fence wait */
733 if (gk20a_fence_in) {
734 ret = gk20a_fence_wait(g, gk20a_fence_in,
735 gk20a_get_gr_idle_timeout(g));
736 gk20a_fence_put(gk20a_fence_in);
737 if (ret)
738 goto noop;
739 }
740
741 /* store the element into gpfifo */
742 gpfifo.entry0 =
743 u64_lo32(cmd_buf_gpu_va);
744 gpfifo.entry1 =
745 (u64_hi32(cmd_buf_gpu_va) |
746 pbdma_gp_entry1_length_f(methodSize));
747
748 /* take always the postfence as it is needed for protecting the ce context */
749 submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
750
751 nvgpu_smp_wmb();
752
753 ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
754 1, submit_flags, &fence,
755 &ce_cmd_buf_fence_out, false, NULL);
756
757 if (!ret) {
758 memcpy((void *)(cmd_buf_cpu_va + fence_index),
759 (void *)&ce_cmd_buf_fence_out,
760 sizeof(struct gk20a_fence *));
761
762 if (gk20a_fence_out) {
763 gk20a_fence_get(ce_cmd_buf_fence_out);
764 *gk20a_fence_out = ce_cmd_buf_fence_out;
765 }
766
767 /* Next available command buffer queue Index */
768 ++ce_ctx->cmd_buf_read_queue_offset;
769 ++ce_ctx->submitted_seq_number;
770 }
771 } else
772 ret = -ENOMEM;
773noop:
774 nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
775end:
776 return ret;
777}
778EXPORT_SYMBOL(gk20a_ce_execute_ops);
779
780void gk20a_ce_delete_context(struct gk20a *g, 618void gk20a_ce_delete_context(struct gk20a *g,
781 u32 ce_ctx_id) 619 u32 ce_ctx_id)
782{ 620{