diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2017-10-26 11:29:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-02 08:09:59 -0400 |
commit | 23c7903eff6ee1ab184dfcc62c054de1557e5b1d (patch) | |
tree | a5122028e181e5c6009f9f8b66bfbf00f69a9290 /drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |
parent | 5f8cfaa250f08499f587da0097f6accaa5eedf15 (diff) |
gpu: nvgpu: move submit path to linux
Nvgpu submit path has a lot of dependency on Linux framework
e.g. use of copy_from_user, use of structures defined in uapi/nvgpu headers,
dma_buf_* calls for trace support etc
Hence to keep common code independent of Linux code, move submit path to
Linux directory
Move below APIs to common/linux/channel.c
trace_write_pushbuffer()
trace_write_pushbuffer_range()
gk20a_submit_prepare_syncs()
gk20a_submit_append_priv_cmdbuf()
gk20a_submit_append_gpfifo()
gk20a_submit_channel_gpfifo()
Move below APIs to common/linux/ce2.c
gk20a_ce_execute_ops()
Define gk20a_ce_execute_ops() in common/linux/ce2.c, and declare it in
gk20a/ce2_gk20a.h since it is needed in common/mm code too
Each OS needs to implement this API separately
gk20a_channel_alloc_gpfifo() use sizeof(nvgpu_gpfifo) to get size of one gpfifo
entry, but structure nvgpu_gpfifo is linux specific
Define new nvgpu_get_gpfifo_entry_size() in linux specific code and use it
in gk20a_channel_alloc_gpfifo() to get gpfifo entry size
Each OS needs to implement this API separately
Export some APIs from gk20a/ce2_gk20a.h and gk20a/channel_gk20a.h that are
needed in linux code
Jira NVGPU-259
Jira NVGPU-313
Change-Id: I360c6cb8ce4494b1e50c66af334a2a379f0d2dc4
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1586277
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 164 |
1 files changed, 1 insertions, 163 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 5314a1be..9ff6c792 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -249,18 +249,7 @@ static inline unsigned int gk20a_ce_get_method_size(int request_operation, | |||
249 | return methodsize; | 249 | return methodsize; |
250 | } | 250 | } |
251 | 251 | ||
252 | static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) | 252 | int gk20a_ce_prepare_submit(u64 src_buf, |
253 | { | ||
254 | /* there is no local memory available, | ||
255 | don't allow local memory related CE flags */ | ||
256 | if (!g->mm.vidmem.size) { | ||
257 | launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | | ||
258 | NVGPU_CE_DST_LOCATION_LOCAL_FB); | ||
259 | } | ||
260 | return launch_flags; | ||
261 | } | ||
262 | |||
263 | static int gk20a_ce_prepare_submit(u64 src_buf, | ||
264 | u64 dst_buf, | 253 | u64 dst_buf, |
265 | u64 size, | 254 | u64 size, |
266 | u32 *cmd_buf_cpu_va, | 255 | u32 *cmd_buf_cpu_va, |
@@ -626,157 +615,6 @@ end: | |||
626 | } | 615 | } |
627 | EXPORT_SYMBOL(gk20a_ce_create_context_with_cb); | 616 | EXPORT_SYMBOL(gk20a_ce_create_context_with_cb); |
628 | 617 | ||
629 | int gk20a_ce_execute_ops(struct gk20a *g, | ||
630 | u32 ce_ctx_id, | ||
631 | u64 src_buf, | ||
632 | u64 dst_buf, | ||
633 | u64 size, | ||
634 | unsigned int payload, | ||
635 | int launch_flags, | ||
636 | int request_operation, | ||
637 | struct gk20a_fence *gk20a_fence_in, | ||
638 | u32 submit_flags, | ||
639 | struct gk20a_fence **gk20a_fence_out) | ||
640 | { | ||
641 | int ret = -EPERM; | ||
642 | struct gk20a_ce_app *ce_app = &g->ce_app; | ||
643 | struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; | ||
644 | bool found = false; | ||
645 | u32 *cmd_buf_cpu_va; | ||
646 | u64 cmd_buf_gpu_va = 0; | ||
647 | u32 methodSize; | ||
648 | u32 cmd_buf_read_offset; | ||
649 | u32 fence_index; | ||
650 | struct nvgpu_gpfifo gpfifo; | ||
651 | struct nvgpu_fence fence = {0,0}; | ||
652 | struct gk20a_fence *ce_cmd_buf_fence_out = NULL; | ||
653 | struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics; | ||
654 | |||
655 | if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) | ||
656 | goto end; | ||
657 | |||
658 | nvgpu_mutex_acquire(&ce_app->app_mutex); | ||
659 | |||
660 | nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, | ||
661 | &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { | ||
662 | if (ce_ctx->ctx_id == ce_ctx_id) { | ||
663 | found = true; | ||
664 | break; | ||
665 | } | ||
666 | } | ||
667 | |||
668 | nvgpu_mutex_release(&ce_app->app_mutex); | ||
669 | |||
670 | if (!found) { | ||
671 | ret = -EINVAL; | ||
672 | goto end; | ||
673 | } | ||
674 | |||
675 | if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { | ||
676 | ret = -ENODEV; | ||
677 | goto end; | ||
678 | } | ||
679 | |||
680 | nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); | ||
681 | |||
682 | ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; | ||
683 | |||
684 | cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * | ||
685 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); | ||
686 | |||
687 | /* at end of command buffer has gk20a_fence for command buffer sync */ | ||
688 | fence_index = (cmd_buf_read_offset + | ||
689 | ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - | ||
690 | (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); | ||
691 | |||
692 | if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { | ||
693 | ret = -ENOMEM; | ||
694 | goto noop; | ||
695 | } | ||
696 | |||
697 | cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; | ||
698 | |||
699 | /* 0 is treated as invalid pre-sync */ | ||
700 | if (cmd_buf_cpu_va[fence_index]) { | ||
701 | struct gk20a_fence * ce_cmd_buf_fence_in = NULL; | ||
702 | |||
703 | memcpy((void *)&ce_cmd_buf_fence_in, | ||
704 | (void *)(cmd_buf_cpu_va + fence_index), | ||
705 | sizeof(struct gk20a_fence *)); | ||
706 | ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in, | ||
707 | gk20a_get_gr_idle_timeout(g)); | ||
708 | |||
709 | gk20a_fence_put(ce_cmd_buf_fence_in); | ||
710 | /* Reset the stored last pre-sync */ | ||
711 | memset((void *)(cmd_buf_cpu_va + fence_index), | ||
712 | 0, | ||
713 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); | ||
714 | if (ret) | ||
715 | goto noop; | ||
716 | } | ||
717 | |||
718 | cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); | ||
719 | |||
720 | methodSize = gk20a_ce_prepare_submit(src_buf, | ||
721 | dst_buf, | ||
722 | size, | ||
723 | &cmd_buf_cpu_va[cmd_buf_read_offset], | ||
724 | NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, | ||
725 | payload, | ||
726 | gk20a_get_valid_launch_flags(g, launch_flags), | ||
727 | request_operation, | ||
728 | gpu_capability->dma_copy_class, | ||
729 | gk20a_fence_in); | ||
730 | |||
731 | if (methodSize) { | ||
732 | /* TODO: Remove CPU pre-fence wait */ | ||
733 | if (gk20a_fence_in) { | ||
734 | ret = gk20a_fence_wait(g, gk20a_fence_in, | ||
735 | gk20a_get_gr_idle_timeout(g)); | ||
736 | gk20a_fence_put(gk20a_fence_in); | ||
737 | if (ret) | ||
738 | goto noop; | ||
739 | } | ||
740 | |||
741 | /* store the element into gpfifo */ | ||
742 | gpfifo.entry0 = | ||
743 | u64_lo32(cmd_buf_gpu_va); | ||
744 | gpfifo.entry1 = | ||
745 | (u64_hi32(cmd_buf_gpu_va) | | ||
746 | pbdma_gp_entry1_length_f(methodSize)); | ||
747 | |||
748 | /* take always the postfence as it is needed for protecting the ce context */ | ||
749 | submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; | ||
750 | |||
751 | nvgpu_smp_wmb(); | ||
752 | |||
753 | ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, | ||
754 | 1, submit_flags, &fence, | ||
755 | &ce_cmd_buf_fence_out, false, NULL); | ||
756 | |||
757 | if (!ret) { | ||
758 | memcpy((void *)(cmd_buf_cpu_va + fence_index), | ||
759 | (void *)&ce_cmd_buf_fence_out, | ||
760 | sizeof(struct gk20a_fence *)); | ||
761 | |||
762 | if (gk20a_fence_out) { | ||
763 | gk20a_fence_get(ce_cmd_buf_fence_out); | ||
764 | *gk20a_fence_out = ce_cmd_buf_fence_out; | ||
765 | } | ||
766 | |||
767 | /* Next available command buffer queue Index */ | ||
768 | ++ce_ctx->cmd_buf_read_queue_offset; | ||
769 | ++ce_ctx->submitted_seq_number; | ||
770 | } | ||
771 | } else | ||
772 | ret = -ENOMEM; | ||
773 | noop: | ||
774 | nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); | ||
775 | end: | ||
776 | return ret; | ||
777 | } | ||
778 | EXPORT_SYMBOL(gk20a_ce_execute_ops); | ||
779 | |||
780 | void gk20a_ce_delete_context(struct gk20a *g, | 618 | void gk20a_ce_delete_context(struct gk20a *g, |
781 | u32 ce_ctx_id) | 619 | u32 ce_ctx_id) |
782 | { | 620 | { |