gpu: nvgpu: move submit path to linux

Nvgpu submit path has a lot of dependency on Linux framework e.g. use of copy_from_user, use of structures defined in uapi/nvgpu headers, dma_buf_* calls for trace support etc Hence to keep common code independent of Linux code, move submit path to Linux directory Move below APIs to common/linux/channel.c trace_write_pushbuffer() trace_write_pushbuffer_range() gk20a_submit_prepare_syncs() gk20a_submit_append_priv_cmdbuf() gk20a_submit_append_gpfifo() gk20a_submit_channel_gpfifo() Move below APIs to common/linux/ce2.c gk20a_ce_execute_ops() Define gk20a_ce_execute_ops() in common/linux/ce2.c, and declare it in gk20a/ce2_gk20a.h since it is needed in common/mm code too Each OS needs to implement this API separately gk20a_channel_alloc_gpfifo() use sizeof(nvgpu_gpfifo) to get size of one gpfifo entry, but structure nvgpu_gpfifo is linux specific Define new nvgpu_get_gpfifo_entry_size() in linux specific code and use it in gk20a_channel_alloc_gpfifo() to get gpfifo entry size Each OS needs to implement this API separately Export some APIs from gk20a/ce2_gk20a.h and gk20a/channel_gk20a.h that are needed in linux code Jira NVGPU-259 Jira NVGPU-313 Change-Id: I360c6cb8ce4494b1e50c66af334a2a379f0d2dc4 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1586277 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2017-10-26 11:29:56 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-11-02 08:09:59 -0400
commit: 23c7903eff6ee1ab184dfcc62c054de1557e5b1d (patch)
tree: a5122028e181e5c6009f9f8b66bfbf00f69a9290 /drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
parent: 5f8cfaa250f08499f587da0097f6accaa5eedf15 (diff)
1 files changed, 1 insertions, 163 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 5314a1be..9ff6c792 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -249,18 +249,7 @@ static inline unsigned int gk20a_ce_get_method_size(int request_operation,
        return methodsize;
 }
-static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
+int gk20a_ce_prepare_submit(u64 src_buf,
-{
-        /* there is no local memory available,
-        don't allow local memory related CE flags */
-        if (!g->mm.vidmem.size) {
-                launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
-                        NVGPU_CE_DST_LOCATION_LOCAL_FB);
-        }
-        return launch_flags;
-}
-static int gk20a_ce_prepare_submit(u64 src_buf,
                u64 dst_buf,
                u64 size,
                u32 *cmd_buf_cpu_va,
@@ -626,157 +615,6 @@ end:
 }
 EXPORT_SYMBOL(gk20a_ce_create_context_with_cb);
-int gk20a_ce_execute_ops(struct gk20a *g,
-                u32 ce_ctx_id,
-                u64 src_buf,
-                u64 dst_buf,
-                u64 size,
-                unsigned int payload,
-                int launch_flags,
-                int request_operation,
-                struct gk20a_fence *gk20a_fence_in,
-                u32 submit_flags,
-                struct gk20a_fence **gk20a_fence_out)
-{
-        int ret = -EPERM;
-        struct gk20a_ce_app *ce_app = &g->ce_app;
-        struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
-        bool found = false;
-        u32 *cmd_buf_cpu_va;
-        u64 cmd_buf_gpu_va = 0;
-        u32 methodSize;
-        u32 cmd_buf_read_offset;
-        u32 fence_index;
-        struct nvgpu_gpfifo gpfifo;
-        struct nvgpu_fence fence = {0,0};
-        struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
-        struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics;
-        if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
-                goto end;
-        nvgpu_mutex_acquire(&ce_app->app_mutex);
-        nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
-                        &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
-                if (ce_ctx->ctx_id == ce_ctx_id) {
-                        found = true;
-                        break;
-                }
-        }
-        nvgpu_mutex_release(&ce_app->app_mutex);
-        if (!found) {
-                ret = -EINVAL;
-                goto end;
-        }
-        if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
-                ret = -ENODEV;
-                goto end;
-        }
-        nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
-        ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
-        cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
-                        (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
-        /* at end of command buffer has gk20a_fence for command buffer sync */
-        fence_index = (cmd_buf_read_offset +
-                        ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
-                        (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
-        if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
-                ret = -ENOMEM;
-                goto noop;
-        }
-        cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
-        /* 0 is treated as invalid pre-sync */
-        if (cmd_buf_cpu_va[fence_index]) {
-                struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
-                memcpy((void *)&ce_cmd_buf_fence_in,
-                                (void *)(cmd_buf_cpu_va + fence_index),
-                                sizeof(struct gk20a_fence *));
-                ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
-                                       gk20a_get_gr_idle_timeout(g));
-                gk20a_fence_put(ce_cmd_buf_fence_in);
-                /* Reset the stored last pre-sync */
-                memset((void *)(cmd_buf_cpu_va + fence_index),
-                                0,
-                                NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
-                if (ret)
-                        goto noop;
-        }
-        cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
-        methodSize = gk20a_ce_prepare_submit(src_buf,
-                                        dst_buf,
-                                        size,
-                                        &cmd_buf_cpu_va[cmd_buf_read_offset],
-                                        NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
-                                        payload,
-                                        gk20a_get_valid_launch_flags(g, launch_flags),
-                                        request_operation,
-                                        gpu_capability->dma_copy_class,
-                                        gk20a_fence_in);
-        if (methodSize) {
-                /* TODO: Remove CPU pre-fence wait */
-                if (gk20a_fence_in) {
-                        ret = gk20a_fence_wait(g, gk20a_fence_in,
-                                               gk20a_get_gr_idle_timeout(g));
-                        gk20a_fence_put(gk20a_fence_in);
-                        if (ret)
-                                goto noop;
-                }
-                /* store the element into gpfifo */
-                gpfifo.entry0 =
-                        u64_lo32(cmd_buf_gpu_va);
-                gpfifo.entry1 =
-                        (u64_hi32(cmd_buf_gpu_va) |
-                        pbdma_gp_entry1_length_f(methodSize));
-                /* take always the postfence as it is needed for protecting the ce context */
-                submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
-                nvgpu_smp_wmb();
-                ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
-                                        1, submit_flags, &fence,
-                                        &ce_cmd_buf_fence_out, false, NULL);
-                if (!ret) {
-                        memcpy((void *)(cmd_buf_cpu_va + fence_index),
-                                        (void *)&ce_cmd_buf_fence_out,
-                                        sizeof(struct gk20a_fence *));
-                        if (gk20a_fence_out) {
-                                gk20a_fence_get(ce_cmd_buf_fence_out);
-                                *gk20a_fence_out = ce_cmd_buf_fence_out;
-                        }
-                        /* Next available command buffer queue Index */
-                        ++ce_ctx->cmd_buf_read_queue_offset;
-                        ++ce_ctx->submitted_seq_number;
-                        }
-        } else
-                ret = -ENOMEM;
-noop:
-        nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
-end:
-        return ret;
-}
-EXPORT_SYMBOL(gk20a_ce_execute_ops);
 void gk20a_ce_delete_context(struct gk20a *g,
                u32 ce_ctx_id)
 {
author	Deepak Nibade <dnibade@nvidia.com>	2017-10-26 11:29:56 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-11-02 08:09:59 -0400
commit	23c7903eff6ee1ab184dfcc62c054de1557e5b1d (patch)
tree	a5122028e181e5c6009f9f8b66bfbf00f69a9290 /drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
parent	5f8cfaa250f08499f587da0097f6accaa5eedf15 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 5314a1be..9ff6c792 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -249,18 +249,7 @@ static inline unsigned int gk20a_ce_get_method_size(int request_operation,
249	return methodsize;	249	return methodsize;
250	}	250	}
251		251
252	static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)	252	int gk20a_ce_prepare_submit(u64 src_buf,
253	{
254	/* there is no local memory available,
255	don't allow local memory related CE flags */
256	if (!g->mm.vidmem.size) {
257	launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB \|
258	NVGPU_CE_DST_LOCATION_LOCAL_FB);
259	}
260	return launch_flags;
261	}
262
263	static int gk20a_ce_prepare_submit(u64 src_buf,
264	u64 dst_buf,	253	u64 dst_buf,
265	u64 size,	254	u64 size,
266	u32 *cmd_buf_cpu_va,	255	u32 *cmd_buf_cpu_va,
@@ -626,157 +615,6 @@ end:
626	}	615	}
627	EXPORT_SYMBOL(gk20a_ce_create_context_with_cb);	616	EXPORT_SYMBOL(gk20a_ce_create_context_with_cb);
628		617
629	int gk20a_ce_execute_ops(struct gk20a *g,
630	u32 ce_ctx_id,
631	u64 src_buf,
632	u64 dst_buf,
633	u64 size,
634	unsigned int payload,
635	int launch_flags,
636	int request_operation,
637	struct gk20a_fence *gk20a_fence_in,
638	u32 submit_flags,
639	struct gk20a_fence **gk20a_fence_out)
640	{
641	int ret = -EPERM;
642	struct gk20a_ce_app *ce_app = &g->ce_app;
643	struct gk20a_gpu_ctx ce_ctx, ce_ctx_save;
644	bool found = false;
645	u32 *cmd_buf_cpu_va;
646	u64 cmd_buf_gpu_va = 0;
647	u32 methodSize;
648	u32 cmd_buf_read_offset;
649	u32 fence_index;
650	struct nvgpu_gpfifo gpfifo;
651	struct nvgpu_fence fence = {0,0};
652	struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
653	struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics;
654
655	if (!ce_app->initialised \|\|ce_app->app_state != NVGPU_CE_ACTIVE)
656	goto end;
657
658	nvgpu_mutex_acquire(&ce_app->app_mutex);
659
660	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
661	&ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
662	if (ce_ctx->ctx_id == ce_ctx_id) {
663	found = true;
664	break;
665	}
666	}
667
668	nvgpu_mutex_release(&ce_app->app_mutex);
669
670	if (!found) {
671	ret = -EINVAL;
672	goto end;
673	}
674
675	if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
676	ret = -ENODEV;
677	goto end;
678	}
679
680	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
681
682	ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset;
683
684	cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
685	(NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)));
686
687	/* at end of command buffer has gk20a_fence for command buffer sync */
688	fence_index = (cmd_buf_read_offset +
689	((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) -
690	(NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32))));
691
692	if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) {
693	ret = -ENOMEM;
694	goto noop;
695	}
696
697	cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
698
699	/* 0 is treated as invalid pre-sync */
700	if (cmd_buf_cpu_va[fence_index]) {
701	struct gk20a_fence * ce_cmd_buf_fence_in = NULL;
702
703	memcpy((void *)&ce_cmd_buf_fence_in,
704	(void *)(cmd_buf_cpu_va + fence_index),
705	sizeof(struct gk20a_fence *));
706	ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in,
707	gk20a_get_gr_idle_timeout(g));
708
709	gk20a_fence_put(ce_cmd_buf_fence_in);
710	/* Reset the stored last pre-sync */
711	memset((void *)(cmd_buf_cpu_va + fence_index),
712	0,
713	NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING);
714	if (ret)
715	goto noop;
716	}
717
718	cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
719
720	methodSize = gk20a_ce_prepare_submit(src_buf,
721	dst_buf,
722	size,
723	&cmd_buf_cpu_va[cmd_buf_read_offset],
724	NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF,
725	payload,
726	gk20a_get_valid_launch_flags(g, launch_flags),
727	request_operation,
728	gpu_capability->dma_copy_class,
729	gk20a_fence_in);
730
731	if (methodSize) {
732	/* TODO: Remove CPU pre-fence wait */
733	if (gk20a_fence_in) {
734	ret = gk20a_fence_wait(g, gk20a_fence_in,
735	gk20a_get_gr_idle_timeout(g));
736	gk20a_fence_put(gk20a_fence_in);
737	if (ret)
738	goto noop;
739	}
740
741	/* store the element into gpfifo */
742	gpfifo.entry0 =
743	u64_lo32(cmd_buf_gpu_va);
744	gpfifo.entry1 =
745	(u64_hi32(cmd_buf_gpu_va) \|
746	pbdma_gp_entry1_length_f(methodSize));
747
748	/* take always the postfence as it is needed for protecting the ce context */
749	submit_flags \|= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
750
751	nvgpu_smp_wmb();
752
753	ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
754	1, submit_flags, &fence,
755	&ce_cmd_buf_fence_out, false, NULL);
756
757	if (!ret) {
758	memcpy((void *)(cmd_buf_cpu_va + fence_index),
759	(void *)&ce_cmd_buf_fence_out,
760	sizeof(struct gk20a_fence *));
761
762	if (gk20a_fence_out) {
763	gk20a_fence_get(ce_cmd_buf_fence_out);
764	*gk20a_fence_out = ce_cmd_buf_fence_out;
765	}
766
767	/* Next available command buffer queue Index */
768	++ce_ctx->cmd_buf_read_queue_offset;
769	++ce_ctx->submitted_seq_number;
770	}
771	} else
772	ret = -ENOMEM;
773	noop:
774	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
775	end:
776	return ret;
777	}
778	EXPORT_SYMBOL(gk20a_ce_execute_ops);
779
780	void gk20a_ce_delete_context(struct gk20a *g,	618	void gk20a_ce_delete_context(struct gk20a *g,
781	u32 ce_ctx_id)	619	u32 ce_ctx_id)
782	{	620	{