From 23c7903eff6ee1ab184dfcc62c054de1557e5b1d Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 26 Oct 2017 08:29:56 -0700 Subject: gpu: nvgpu: move submit path to linux Nvgpu submit path has a lot of dependency on Linux framework e.g. use of copy_from_user, use of structures defined in uapi/nvgpu headers, dma_buf_* calls for trace support etc Hence to keep common code independent of Linux code, move submit path to Linux directory Move below APIs to common/linux/channel.c trace_write_pushbuffer() trace_write_pushbuffer_range() gk20a_submit_prepare_syncs() gk20a_submit_append_priv_cmdbuf() gk20a_submit_append_gpfifo() gk20a_submit_channel_gpfifo() Move below APIs to common/linux/ce2.c gk20a_ce_execute_ops() Define gk20a_ce_execute_ops() in common/linux/ce2.c, and declare it in gk20a/ce2_gk20a.h since it is needed in common/mm code too Each OS needs to implement this API separately gk20a_channel_alloc_gpfifo() use sizeof(nvgpu_gpfifo) to get size of one gpfifo entry, but structure nvgpu_gpfifo is linux specific Define new nvgpu_get_gpfifo_entry_size() in linux specific code and use it in gk20a_channel_alloc_gpfifo() to get gpfifo entry size Each OS needs to implement this API separately Export some APIs from gk20a/ce2_gk20a.h and gk20a/channel_gk20a.h that are needed in linux code Jira NVGPU-259 Jira NVGPU-313 Change-Id: I360c6cb8ce4494b1e50c66af334a2a379f0d2dc4 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1586277 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 164 +----------------------------------- 1 file changed, 1 insertion(+), 163 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a/ce2_gk20a.c') diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 5314a1be..9ff6c792 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c @@ -249,18 +249,7 @@ static inline unsigned int gk20a_ce_get_method_size(int request_operation, return methodsize; } -static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags) -{ - /* there is no local memory available, - don't allow local memory related CE flags */ - if (!g->mm.vidmem.size) { - launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | - NVGPU_CE_DST_LOCATION_LOCAL_FB); - } - return launch_flags; -} - -static int gk20a_ce_prepare_submit(u64 src_buf, +int gk20a_ce_prepare_submit(u64 src_buf, u64 dst_buf, u64 size, u32 *cmd_buf_cpu_va, @@ -626,157 +615,6 @@ end: } EXPORT_SYMBOL(gk20a_ce_create_context_with_cb); -int gk20a_ce_execute_ops(struct gk20a *g, - u32 ce_ctx_id, - u64 src_buf, - u64 dst_buf, - u64 size, - unsigned int payload, - int launch_flags, - int request_operation, - struct gk20a_fence *gk20a_fence_in, - u32 submit_flags, - struct gk20a_fence **gk20a_fence_out) -{ - int ret = -EPERM; - struct gk20a_ce_app *ce_app = &g->ce_app; - struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; - bool found = false; - u32 *cmd_buf_cpu_va; - u64 cmd_buf_gpu_va = 0; - u32 methodSize; - u32 cmd_buf_read_offset; - u32 fence_index; - struct nvgpu_gpfifo gpfifo; - struct nvgpu_fence fence = {0,0}; - struct gk20a_fence *ce_cmd_buf_fence_out = NULL; - struct nvgpu_gpu_characteristics *gpu_capability = &g->gpu_characteristics; - - if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE) - goto end; - - nvgpu_mutex_acquire(&ce_app->app_mutex); - - nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, - &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { - if (ce_ctx->ctx_id == ce_ctx_id) { - found = true; - break; - } - } - - nvgpu_mutex_release(&ce_app->app_mutex); - - if (!found) { - ret = -EINVAL; - goto end; - } - - if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { - ret = -ENODEV; - goto end; - } - - nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); - - ce_ctx->cmd_buf_read_queue_offset %= ce_ctx->cmd_buf_end_queue_offset; - - cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * - (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32))); - - /* at end of command buffer has gk20a_fence for command buffer sync */ - fence_index = (cmd_buf_read_offset + - ((NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF / sizeof(u32)) - - (NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING / sizeof(u32)))); - - if (sizeof(struct gk20a_fence *) > NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING) { - ret = -ENOMEM; - goto noop; - } - - cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; - - /* 0 is treated as invalid pre-sync */ - if (cmd_buf_cpu_va[fence_index]) { - struct gk20a_fence * ce_cmd_buf_fence_in = NULL; - - memcpy((void *)&ce_cmd_buf_fence_in, - (void *)(cmd_buf_cpu_va + fence_index), - sizeof(struct gk20a_fence *)); - ret = gk20a_fence_wait(g, ce_cmd_buf_fence_in, - gk20a_get_gr_idle_timeout(g)); - - gk20a_fence_put(ce_cmd_buf_fence_in); - /* Reset the stored last pre-sync */ - memset((void *)(cmd_buf_cpu_va + fence_index), - 0, - NVGPU_CE_MAX_COMMAND_BUFF_SIZE_FOR_TRACING); - if (ret) - goto noop; - } - - cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); - - methodSize = gk20a_ce_prepare_submit(src_buf, - dst_buf, - size, - &cmd_buf_cpu_va[cmd_buf_read_offset], - NVGPU_CE_MAX_COMMAND_BUFF_SIZE_PER_KICKOFF, - payload, - gk20a_get_valid_launch_flags(g, launch_flags), - request_operation, - gpu_capability->dma_copy_class, - gk20a_fence_in); - - if (methodSize) { - /* TODO: Remove CPU pre-fence wait */ - if (gk20a_fence_in) { - ret = gk20a_fence_wait(g, gk20a_fence_in, - gk20a_get_gr_idle_timeout(g)); - gk20a_fence_put(gk20a_fence_in); - if (ret) - goto noop; - } - - /* store the element into gpfifo */ - gpfifo.entry0 = - u64_lo32(cmd_buf_gpu_va); - gpfifo.entry1 = - (u64_hi32(cmd_buf_gpu_va) | - pbdma_gp_entry1_length_f(methodSize)); - - /* take always the postfence as it is needed for protecting the ce context */ - submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET; - - nvgpu_smp_wmb(); - - ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL, - 1, submit_flags, &fence, - &ce_cmd_buf_fence_out, false, NULL); - - if (!ret) { - memcpy((void *)(cmd_buf_cpu_va + fence_index), - (void *)&ce_cmd_buf_fence_out, - sizeof(struct gk20a_fence *)); - - if (gk20a_fence_out) { - gk20a_fence_get(ce_cmd_buf_fence_out); - *gk20a_fence_out = ce_cmd_buf_fence_out; - } - - /* Next available command buffer queue Index */ - ++ce_ctx->cmd_buf_read_queue_offset; - ++ce_ctx->submitted_seq_number; - } - } else - ret = -ENOMEM; -noop: - nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); -end: - return ret; -} -EXPORT_SYMBOL(gk20a_ce_execute_ops); - void gk20a_ce_delete_context(struct gk20a *g, u32 ce_ctx_id) { -- cgit v1.2.2