1 files changed, 160 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/ce2.c b/drivers/gpu/nvgpu/common/ce2.c
new file mode 100644
index 00000000..9385b531
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/ce2.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/types.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
+#include "gk20a/ce2_gk20a.h"
+#include "gk20a/gk20a.h"
+static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
+{
+        /* there is no local memory available,
+        don't allow local memory related CE flags */
+        if (!g->mm.vidmem.size) {
+                launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
+                        NVGPU_CE_DST_LOCATION_LOCAL_FB);
+        }
+        return launch_flags;
+}
+int gk20a_ce_execute_ops(struct gk20a *g,
+                u32 ce_ctx_id,
+                u64 src_buf,
+                u64 dst_buf,
+                u64 size,
+                unsigned int payload,
+                int launch_flags,
+                int request_operation,
+                u32 submit_flags,
+                struct gk20a_fence **gk20a_fence_out)
+{
+        int ret = -EPERM;
+        struct gk20a_ce_app *ce_app = &g->ce_app;
+        struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
+        bool found = false;
+        u32 *cmd_buf_cpu_va;
+        u64 cmd_buf_gpu_va = 0;
+        u32 methodSize;
+        u32 cmd_buf_read_offset;
+        u32 dma_copy_class;
+        struct nvgpu_gpfifo_entry gpfifo;
+        struct nvgpu_channel_fence fence = {0, 0};
+        struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
+        if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
+                goto end;
+        nvgpu_mutex_acquire(&ce_app->app_mutex);
+        nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
+                        &ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
+                if (ce_ctx->ctx_id == ce_ctx_id) {
+                        found = true;
+                        break;
+                }
+        }
+        nvgpu_mutex_release(&ce_app->app_mutex);
+        if (!found) {
+                ret = -EINVAL;
+                goto end;
+        }
+        if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
+                ret = -ENODEV;
+                goto end;
+        }
+        nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
+        ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
+        cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
+                        (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
+        cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
+        if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
+                struct gk20a_fence **prev_post_fence =
+                        &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
+                ret = gk20a_fence_wait(g, *prev_post_fence,
+                                       gk20a_get_gr_idle_timeout(g));
+                gk20a_fence_put(*prev_post_fence);
+                *prev_post_fence = NULL;
+                if (ret)
+                        goto noop;
+        }
+        cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
+        dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
+        methodSize = gk20a_ce_prepare_submit(src_buf,
+                                        dst_buf,
+                                        size,
+                                        &cmd_buf_cpu_va[cmd_buf_read_offset],
+                                        NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
+                                        payload,
+                                        gk20a_get_valid_launch_flags(g, launch_flags),
+                                        request_operation,
+                                        dma_copy_class);
+        if (methodSize) {
+                /* store the element into gpfifo */
+                gpfifo.entry0 =
+                        u64_lo32(cmd_buf_gpu_va);
+                gpfifo.entry1 =
+                        (u64_hi32(cmd_buf_gpu_va) |
+                        pbdma_gp_entry1_length_f(methodSize));
+                /* take always the postfence as it is needed for protecting the ce context */
+                submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
+                nvgpu_smp_wmb();
+                ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
+                                1, submit_flags, &fence, &ce_cmd_buf_fence_out);
+                if (!ret) {
+                        ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
+                                ce_cmd_buf_fence_out;
+                        if (gk20a_fence_out) {
+                                gk20a_fence_get(ce_cmd_buf_fence_out);
+                                *gk20a_fence_out = ce_cmd_buf_fence_out;
+                        }
+                        /* Next available command buffer queue Index */
+                        ++ce_ctx->cmd_buf_read_queue_offset;
+                }
+        } else {
+                ret = -ENOMEM;
+        }
+noop:
+        nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
+end:
+        return ret;
+}

diff --git a/drivers/gpu/nvgpu/common/ce2.c b/drivers/gpu/nvgpu/common/ce2.c new file mode 100644 index 00000000..9385b531 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ce2.c
@@ -0,0 +1,160 @@
	1	/*
	2	* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* Permission is hereby granted, free of charge, to any person obtaining a
	5	* copy of this software and associated documentation files (the "Software"),
	6	* to deal in the Software without restriction, including without limitation
	7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	8	* and/or sell copies of the Software, and to permit persons to whom the
	9	* Software is furnished to do so, subject to the following conditions:
	10	*
	11	* The above copyright notice and this permission notice shall be included in
	12	* all copies or substantial portions of the Software.
	13	*
	14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	20	* DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include <nvgpu/types.h>
	24	#include <nvgpu/channel.h>
	25
	26	#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
	27
	28	#include "gk20a/ce2_gk20a.h"
	29	#include "gk20a/gk20a.h"
	30
	31	static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
	32	{
	33	/* there is no local memory available,
	34	don't allow local memory related CE flags */
	35	if (!g->mm.vidmem.size) {
	36	launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB \|
	37	NVGPU_CE_DST_LOCATION_LOCAL_FB);
	38	}
	39	return launch_flags;
	40	}
	41
	42	int gk20a_ce_execute_ops(struct gk20a *g,
	43	u32 ce_ctx_id,
	44	u64 src_buf,
	45	u64 dst_buf,
	46	u64 size,
	47	unsigned int payload,
	48	int launch_flags,
	49	int request_operation,
	50	u32 submit_flags,
	51	struct gk20a_fence **gk20a_fence_out)
	52	{
	53	int ret = -EPERM;
	54	struct gk20a_ce_app *ce_app = &g->ce_app;
	55	struct gk20a_gpu_ctx ce_ctx, ce_ctx_save;
	56	bool found = false;
	57	u32 *cmd_buf_cpu_va;
	58	u64 cmd_buf_gpu_va = 0;
	59	u32 methodSize;
	60	u32 cmd_buf_read_offset;
	61	u32 dma_copy_class;
	62	struct nvgpu_gpfifo_entry gpfifo;
	63	struct nvgpu_channel_fence fence = {0, 0};
	64	struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
	65
	66	if (!ce_app->initialised \|\|ce_app->app_state != NVGPU_CE_ACTIVE)
	67	goto end;
	68
	69	nvgpu_mutex_acquire(&ce_app->app_mutex);
	70
	71	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
	72	&ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
	73	if (ce_ctx->ctx_id == ce_ctx_id) {
	74	found = true;
	75	break;
	76	}
	77	}
	78
	79	nvgpu_mutex_release(&ce_app->app_mutex);
	80
	81	if (!found) {
	82	ret = -EINVAL;
	83	goto end;
	84	}
	85
	86	if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
	87	ret = -ENODEV;
	88	goto end;
	89	}
	90
	91	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
	92
	93	ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
	94
	95	cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
	96	(NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
	97
	98	cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
	99
	100	if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
	101	struct gk20a_fence **prev_post_fence =
	102	&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
	103
	104	ret = gk20a_fence_wait(g, *prev_post_fence,
	105	gk20a_get_gr_idle_timeout(g));
	106
	107	gk20a_fence_put(*prev_post_fence);
	108	*prev_post_fence = NULL;
	109	if (ret)
	110	goto noop;
	111	}
	112
	113	cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
	114
	115	dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
	116	methodSize = gk20a_ce_prepare_submit(src_buf,
	117	dst_buf,
	118	size,
	119	&cmd_buf_cpu_va[cmd_buf_read_offset],
	120	NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
	121	payload,
	122	gk20a_get_valid_launch_flags(g, launch_flags),
	123	request_operation,
	124	dma_copy_class);
	125
	126	if (methodSize) {
	127	/* store the element into gpfifo */
	128	gpfifo.entry0 =
	129	u64_lo32(cmd_buf_gpu_va);
	130	gpfifo.entry1 =
	131	(u64_hi32(cmd_buf_gpu_va) \|
	132	pbdma_gp_entry1_length_f(methodSize));
	133
	134	/* take always the postfence as it is needed for protecting the ce context */
	135	submit_flags \|= NVGPU_SUBMIT_FLAGS_FENCE_GET;
	136
	137	nvgpu_smp_wmb();
	138
	139	ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
	140	1, submit_flags, &fence, &ce_cmd_buf_fence_out);
	141
	142	if (!ret) {
	143	ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
	144	ce_cmd_buf_fence_out;
	145	if (gk20a_fence_out) {
	146	gk20a_fence_get(ce_cmd_buf_fence_out);
	147	*gk20a_fence_out = ce_cmd_buf_fence_out;
	148	}
	149
	150	/* Next available command buffer queue Index */
	151	++ce_ctx->cmd_buf_read_queue_offset;
	152	}
	153	} else {
	154	ret = -ENOMEM;
	155	}
	156	noop:
	157	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
	158	end:
	159	return ret;
	160	}