gpu: nvgpu: move submit code to common

To finish OS unification of the submit path, move the gk20a_submit_channel_gpfifo* functions to a file that's accessible also outside Linux code. Also change the prefix of the submit functions from gk20a_ to nvgpu_. Jira NVGPU-705 Change-Id: I8ca355d1eb69771fb016c7a21fc7f102ca7967d7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760421 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2018-06-25 05:35:42 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-06-27 21:40:16 -0400
commit: 7998233b77a343d002b699d5f348bbeb243e16f5 (patch)
tree: aa24afcc414be8fbccf6991804f69946e2b72525 /drivers/gpu/nvgpu/os/linux
parent: 2ac6fb4253fa815ed17f09a01141b938c826dac9 (diff)
5 files changed, 9 insertions, 568 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index 052a1d21..39b7d1f5 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -32,6 +32,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/firmware.h>
 #include <nvgpu/os_sched.h>
+#include <nvgpu/channel.h>
 #include <nvgpu/linux/vm.h>
@@ -783,7 +784,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
                return -ENOSYS;
        }
-        return gk20a_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,
+        return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,
                        num_entries, flags, fence, fence_out);
 }
diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c
index 8f20091b..0b43c0d1 100644
--- a/drivers/gpu/nvgpu/os/linux/ce2.c
+++ b/drivers/gpu/nvgpu/os/linux/ce2.c
@@ -15,6 +15,7 @@
 */
 #include <nvgpu/types.h>
+#include <nvgpu/channel.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -130,7 +131,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
                nvgpu_smp_wmb();
-                ret = gk20a_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
+                ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
                                1, submit_flags, &fence, &ce_cmd_buf_fence_out);
                if (!ret) {
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
index 391950af..fef44f2b 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.c
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -16,7 +16,6 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
-#include <nvgpu/ltc.h>
 #include <nvgpu/error_notifier.h>
 #include <nvgpu/os_sched.h>
@@ -489,11 +488,9 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
                dma_buf_vunmap(dmabuf, mem);
        }
 }
-#endif
-static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
+void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
 {
-#ifdef CONFIG_DEBUG_FS
        struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
        u32 n = c->gpfifo.entry_num;
        u32 start = c->gpfifo.put;
@@ -507,549 +504,5 @@ static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
        for (i = 0; i < count; i++)
                trace_write_pushbuffer(c, &gp[(start + i) % n]);
-#endif
-}
-/*
- * Handle the submit synchronization - pre-fences and post-fences.
- */
-static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
-                                      struct nvgpu_channel_fence *fence,
-                                      struct channel_gk20a_job *job,
-                                      struct priv_cmd_entry **wait_cmd,
-                                      struct priv_cmd_entry **incr_cmd,
-                                      struct gk20a_fence **post_fence,
-                                      bool register_irq,
-                                      u32 flags)
-{
-        struct gk20a *g = c->g;
-        bool need_sync_fence = false;
-        bool new_sync_created = false;
-        int wait_fence_fd = -1;
-        int err = 0;
-        bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
-        bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
-        if (g->aggressive_sync_destroy_thresh) {
-                nvgpu_mutex_acquire(&c->sync_lock);
-                if (!c->sync) {
-                        c->sync = gk20a_channel_sync_create(c, false);
-                        if (!c->sync) {
-                                err = -ENOMEM;
-                                nvgpu_mutex_release(&c->sync_lock);
-                                goto fail;
-                        }
-                        new_sync_created = true;
-                }
-                nvgpu_atomic_inc(&c->sync->refcount);
-                nvgpu_mutex_release(&c->sync_lock);
-        }
-        if (g->ops.fifo.resetup_ramfc && new_sync_created) {
-                err = g->ops.fifo.resetup_ramfc(c);
-                if (err)
-                        goto fail;
-        }
-        /*
-         * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
-         * submission when user requested and the wait hasn't expired.
-         */
-        if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
-                int max_wait_cmds = c->deterministic ? 1 : 0;
-                if (!pre_alloc_enabled)
-                        job->wait_cmd = nvgpu_kzalloc(g,
-                                sizeof(struct priv_cmd_entry));
-                if (!job->wait_cmd) {
-                        err = -ENOMEM;
-                        goto fail;
-                }
-                if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
-                        wait_fence_fd = fence->id;
-                        err = c->sync->wait_fd(c->sync, wait_fence_fd,
-                                               job->wait_cmd, max_wait_cmds);
-                } else {
-                        err = c->sync->wait_syncpt(c->sync, fence->id,
-                                                   fence->value,
-                                                   job->wait_cmd);
-                }
-                if (err)
-                        goto clean_up_wait_cmd;
-                if (job->wait_cmd->valid)
-                        *wait_cmd = job->wait_cmd;
-        }
-        if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
-            (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
-                need_sync_fence = true;
-        /*
-         * Always generate an increment at the end of a GPFIFO submission. This
-         * is used to keep track of method completion for idle railgating. The
-         * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
-         */
-        job->post_fence = gk20a_alloc_fence(c);
-        if (!job->post_fence) {
-                err = -ENOMEM;
-                goto clean_up_wait_cmd;
-        }
-        if (!pre_alloc_enabled)
-                job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
-        if (!job->incr_cmd) {
-                err = -ENOMEM;
-                goto clean_up_post_fence;
-        }
-        if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
-                err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
-                                 job->post_fence, need_wfi, need_sync_fence,
-                                 register_irq);
-        else
-                err = c->sync->incr(c->sync, job->incr_cmd,
-                                    job->post_fence, need_sync_fence,
-                                    register_irq);
-        if (!err) {
-                *incr_cmd = job->incr_cmd;
-                *post_fence = job->post_fence;
-        } else
-                goto clean_up_incr_cmd;
-        return 0;
-clean_up_incr_cmd:
-        free_priv_cmdbuf(c, job->incr_cmd);
-        if (!pre_alloc_enabled)
-                job->incr_cmd = NULL;
-clean_up_post_fence:
-        gk20a_fence_put(job->post_fence);
-        job->post_fence = NULL;
-clean_up_wait_cmd:
-        if (job->wait_cmd)
-                free_priv_cmdbuf(c, job->wait_cmd);
-        if (!pre_alloc_enabled)
-                job->wait_cmd = NULL;
-fail:
-        *wait_cmd = NULL;
-        return err;
-}
-static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
-                struct priv_cmd_entry *cmd)
-{
-        struct gk20a *g = c->g;
-        struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
-        struct nvgpu_gpfifo_entry x = {
-                .entry0 = u64_lo32(cmd->gva),
-                .entry1 = u64_hi32(cmd->gva) |
-                        pbdma_gp_entry1_length_f(cmd->size)
-        };
-        nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
-                        &x, sizeof(x));
-        if (cmd->mem->aperture == APERTURE_SYSMEM)
-                trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
-                                (u32 *)cmd->mem->cpu_va + cmd->off);
-        c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
-}
-static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
-                struct nvgpu_gpfifo_userdata userdata,
-                u32 num_entries)
-{
-        struct gk20a *g = c->g;
-        struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
-        u32 gpfifo_size = c->gpfifo.entry_num;
-        u32 len = num_entries;
-        u32 start = c->gpfifo.put;
-        u32 end = start + len; /* exclusive */
-        int err;
-        if (end > gpfifo_size) {
-                /* wrap-around */
-                int length0 = gpfifo_size - start;
-                int length1 = len - length0;
-                err = g->os_channel.copy_user_gpfifo(
-                                gpfifo_cpu + start, userdata,
-                                0, length0);
-                if (err)
-                        return err;
-                err = g->os_channel.copy_user_gpfifo(
-                                gpfifo_cpu, userdata,
-                                length0, length1);
-                if (err)
-                        return err;
-        } else {
-                err = g->os_channel.copy_user_gpfifo(
-                                gpfifo_cpu + start, userdata,
-                                0, len);
-                if (err)
-                        return err;
-        }
-        return 0;
-}
-static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
-                struct nvgpu_gpfifo_entry *src, u32 num_entries)
-{
-        struct gk20a *g = c->g;
-        struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
-        /* in bytes */
-        u32 gpfifo_size =
-                c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
-        u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
-        u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
-        u32 end = start + len; /* exclusive */
-        if (end > gpfifo_size) {
-                /* wrap-around */
-                int length0 = gpfifo_size - start;
-                int length1 = len - length0;
-                struct nvgpu_gpfifo_entry *src2 = src + length0;
-                nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
-                nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
-        } else {
-                nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
-        }
-}
-/*
- * Copy source gpfifo entries into the gpfifo ring buffer, potentially
- * splitting into two memcpys to handle wrap-around.
- */
-static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
-                struct nvgpu_gpfifo_entry *kern_gpfifo,
-                struct nvgpu_gpfifo_userdata userdata,
-                u32 num_entries)
-{
-        struct gk20a *g = c->g;
-        int err;
-        if (!kern_gpfifo && !c->gpfifo.pipe) {
-                /*
-                 * This path (from userspace to sysmem) is special in order to
-                 * avoid two copies unnecessarily (from user to pipe, then from
-                 * pipe to gpu sysmem buffer).
-                 */
-                err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
-                                num_entries);
-                if (err)
-                        return err;
-        } else if (!kern_gpfifo) {
-                /* from userspace to vidmem, use the common path */
-                err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
-                                0, num_entries);
-                if (err)
-                        return err;
-                nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
-                                num_entries);
-        } else {
-                /* from kernel to either sysmem or vidmem, don't need
-                 * copy_user_gpfifo so use the common path */
-                nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
-        }
-        trace_write_pushbuffers(c, num_entries);
-        c->gpfifo.put = (c->gpfifo.put + num_entries) &
-                (c->gpfifo.entry_num - 1);
-        return 0;
-}
-static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
-                                struct nvgpu_gpfifo_entry *gpfifo,
-                                struct nvgpu_gpfifo_userdata userdata,
-                                u32 num_entries,
-                                u32 flags,
-                                struct nvgpu_channel_fence *fence,
-                                struct gk20a_fence **fence_out,
-                                struct fifo_profile_gk20a *profile)
-{
-        struct gk20a *g = c->g;
-        struct priv_cmd_entry *wait_cmd = NULL;
-        struct priv_cmd_entry *incr_cmd = NULL;
-        struct gk20a_fence *post_fence = NULL;
-        struct channel_gk20a_job *job = NULL;
-        /* we might need two extra gpfifo entries - one for pre fence
-         * and one for post fence. */
-        const int extra_entries = 2;
-        bool skip_buffer_refcounting = (flags &
-                        NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
-        int err = 0;
-        bool need_job_tracking;
-        bool need_deferred_cleanup = false;
-        if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
-                return -ENODEV;
-        if (c->has_timedout)
-                return -ETIMEDOUT;
-        if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
-                return -ENOMEM;
-        /* fifo not large enough for request. Return error immediately.
-         * Kernel can insert gpfifo entries before and after user gpfifos.
-         * So, add extra_entries in user request. Also, HW with fifo size N
-         * can accept only N-1 entreis and so the below condition */
-        if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
-                nvgpu_err(g, "not enough gpfifo space allocated");
-                return -ENOMEM;
-        }
-        if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
-                      NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
-            !fence)
-                return -EINVAL;
-        /* an address space needs to have been bound at this point. */
-        if (!gk20a_channel_as_bound(c)) {
-                nvgpu_err(g,
-                            "not bound to an address space at time of gpfifo"
-                            " submission.");
-                return -EINVAL;
-        }
-        gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
-        /* update debug settings */
-        nvgpu_ltc_sync_enabled(g);
-        nvgpu_log_info(g, "channel %d", c->chid);
-        /*
-         * Job tracking is necessary for any of the following conditions:
-         *  - pre- or post-fence functionality
-         *  - channel wdt
-         *  - GPU rail-gating with non-deterministic channels
-         *  - buffer refcounting
-         *
-         * If none of the conditions are met, then job tracking is not
-         * required and a fast submit can be done (ie. only need to write
-         * out userspace GPFIFO entries and update GP_PUT).
-         */
-        need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
-                        (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
-                        c->timeout.enabled ||
-                        (g->can_railgate && !c->deterministic) ||
-                        !skip_buffer_refcounting;
-        if (need_job_tracking) {
-                bool need_sync_framework = false;
-                /*
-                 * If the channel is to have deterministic latency and
-                 * job tracking is required, the channel must have
-                 * pre-allocated resources. Otherwise, we fail the submit here
-                 */
-                if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
-                        return -EINVAL;
-                need_sync_framework =
-                        gk20a_channel_sync_needs_sync_framework(g) ||
-                        (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
-                         flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
-                /*
-                 * Deferred clean-up is necessary for any of the following
-                 * conditions:
-                 * - channel's deterministic flag is not set
-                 * - dependency on sync framework, which could make the
-                 *   behavior of the clean-up operation non-deterministic
-                 *   (should not be performed in the submit path)
-                 * - channel wdt
-                 * - GPU rail-gating with non-deterministic channels
-                 * - buffer refcounting
-                 *
-                 * If none of the conditions are met, then deferred clean-up
-                 * is not required, and we clean-up one job-tracking
-                 * resource in the submit path.
-                 */
-                need_deferred_cleanup = !c->deterministic ||
-                                        need_sync_framework ||
-                                        c->timeout.enabled ||
-                                        (g->can_railgate &&
-                                         !c->deterministic) ||
-                                        !skip_buffer_refcounting;
-                /*
-                 * For deterministic channels, we don't allow deferred clean_up
-                 * processing to occur. In cases we hit this, we fail the submit
-                 */
-                if (c->deterministic && need_deferred_cleanup)
-                        return -EINVAL;
-                if (!c->deterministic) {
-                        /*
-                         * Get a power ref unless this is a deterministic
-                         * channel that holds them during the channel lifetime.
-                         * This one is released by gk20a_channel_clean_up_jobs,
-                         * via syncpt or sema interrupt, whichever is used.
-                         */
-                        err = gk20a_busy(g);
-                        if (err) {
-                                nvgpu_err(g,
-                                        "failed to host gk20a to submit gpfifo");
-                                nvgpu_print_current(g, NULL, NVGPU_ERROR);
-                                return err;
-                        }
-                }
-                if (!need_deferred_cleanup) {
-                        /* clean up a single job */
-                        gk20a_channel_clean_up_jobs(c, false);
-                }
-        }
-        /* Grab access to HW to deal with do_idle */
-        if (c->deterministic)
-                nvgpu_rwsem_down_read(&g->deterministic_busy);
-        if (c->deterministic && c->deterministic_railgate_allowed) {
-                /*
-                 * Nope - this channel has dropped its own power ref. As
-                 * deterministic submits don't hold power on per each submitted
-                 * job like normal ones do, the GPU might railgate any time now
-                 * and thus submit is disallowed.
-                 */
-                err = -EINVAL;
-                goto clean_up;
-        }
-        trace_gk20a_channel_submit_gpfifo(g->name,
-                                          c->chid,
-                                          num_entries,
-                                          flags,
-                                          fence ? fence->id : 0,
-                                          fence ? fence->value : 0);
-        nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
-                c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
-        /*
-         * Make sure we have enough space for gpfifo entries. Check cached
-         * values first and then read from HW. If no space, return EAGAIN
-         * and let userpace decide to re-try request or not.
-         */
-        if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
-                if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
-                        err = -EAGAIN;
-                        goto clean_up;
-                }
-        }
-        if (c->has_timedout) {
-                err = -ETIMEDOUT;
-                goto clean_up;
-        }
-        if (need_job_tracking) {
-                err = channel_gk20a_alloc_job(c, &job);
-                if (err)
-                        goto clean_up;
-                err = gk20a_submit_prepare_syncs(c, fence, job,
-                                                 &wait_cmd, &incr_cmd,
-                                                 &post_fence,
-                                                 need_deferred_cleanup,
-                                                 flags);
-                if (err)
-                        goto clean_up_job;
-        }
-        gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
-        if (wait_cmd)
-                gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
-        err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
-                        num_entries);
-        if (err)
-                goto clean_up_job;
-        /*
-         * And here's where we add the incr_cmd we generated earlier. It should
-         * always run!
-         */
-        if (incr_cmd)
-                gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
-        if (fence_out)
-                *fence_out = gk20a_fence_get(post_fence);
-        if (need_job_tracking)
-                /* TODO! Check for errors... */
-                gk20a_channel_add_job(c, job, skip_buffer_refcounting);
-        gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
-        g->ops.fifo.userd_gp_put(g, c);
-        /* No hw access beyond this point */
-        if (c->deterministic)
-                nvgpu_rwsem_up_read(&g->deterministic_busy);
-        trace_gk20a_channel_submitted_gpfifo(g->name,
-                                c->chid,
-                                num_entries,
-                                flags,
-                                post_fence ? post_fence->syncpt_id : 0,
-                                post_fence ? post_fence->syncpt_value : 0);
-        nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
-                c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
-        gk20a_fifo_profile_snapshot(profile, PROFILE_END);
-        nvgpu_log_fn(g, "done");
-        return err;
-clean_up_job:
-        channel_gk20a_free_job(c, job);
-clean_up:
-        nvgpu_log_fn(g, "fail");
-        gk20a_fence_put(post_fence);
-        if (c->deterministic)
-                nvgpu_rwsem_up_read(&g->deterministic_busy);
-        else if (need_deferred_cleanup)
-                gk20a_idle(g);
-        return err;
-}
-int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c,
-                                struct nvgpu_gpfifo_userdata userdata,
-                                u32 num_entries,
-                                u32 flags,
-                                struct nvgpu_channel_fence *fence,
-                                struct gk20a_fence **fence_out,
-                                struct fifo_profile_gk20a *profile)
-{
-        return gk20a_submit_channel_gpfifo(c, NULL, userdata, num_entries,
-                        flags, fence, fence_out, profile);
-}
-int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
-                                struct nvgpu_gpfifo_entry *gpfifo,
-                                u32 num_entries,
-                                u32 flags,
-                                struct nvgpu_channel_fence *fence,
-                                struct gk20a_fence **fence_out)
-{
-        struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
-        return gk20a_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
-                        flags, fence, fence_out, NULL);
 }
+#endif
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h
index 43fa492b..87231a79 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.h
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -84,19 +84,4 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
                int runlist_id,
                bool is_privileged_channel);
-int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c,
-                                struct nvgpu_gpfifo_userdata userdata,
-                                u32 num_entries,
-                                u32 flags,
-                                struct nvgpu_channel_fence *fence,
-                                struct gk20a_fence **fence_out,
-                                struct fifo_profile_gk20a *profile);
-int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
-                                struct nvgpu_gpfifo_entry *gpfifo,
-                                u32 num_entries,
-                                u32 flags,
-                                struct nvgpu_channel_fence *fence,
-                                struct gk20a_fence **fence_out);
 #endif /* __NVGPU_CHANNEL_H__ */
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index fa6a02d6..7b003b76 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -34,6 +34,7 @@
 #include <nvgpu/barrier.h>
 #include <nvgpu/nvhost.h>
 #include <nvgpu/os_sched.h>
+#include <nvgpu/channel.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
@@ -799,11 +800,11 @@ static int gk20a_ioctl_channel_submit_gpfifo(
                                return fd;
        }
-        userdata.entries = (struct nvgpu_gpfifo_entry __user*)
+        userdata.entries = (struct nvgpu_gpfifo_entry __user *)
                (uintptr_t)args->gpfifo;
        userdata.context = NULL;
-        ret = gk20a_submit_channel_gpfifo_user(ch,
+        ret = nvgpu_submit_channel_gpfifo_user(ch,
                        userdata, args->num_entries,
                        submit_flags, &fence, &fence_out, profile);
author	Konsta Holtta <kholtta@nvidia.com>	2018-06-25 05:35:42 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-06-27 21:40:16 -0400
commit	7998233b77a343d002b699d5f348bbeb243e16f5 (patch)
tree	aa24afcc414be8fbccf6991804f69946e2b72525 /drivers/gpu/nvgpu/os/linux
parent	2ac6fb4253fa815ed17f09a01141b938c826dac9 (diff)

diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 052a1d21..39b7d1f5 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -32,6 +32,7 @@
32	#include <nvgpu/bug.h>	32	#include <nvgpu/bug.h>
33	#include <nvgpu/firmware.h>	33	#include <nvgpu/firmware.h>
34	#include <nvgpu/os_sched.h>	34	#include <nvgpu/os_sched.h>
		35	#include <nvgpu/channel.h>
35		36
36	#include <nvgpu/linux/vm.h>	37	#include <nvgpu/linux/vm.h>
37		38
@@ -783,7 +784,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
783	return -ENOSYS;	784	return -ENOSYS;
784	}	785	}
785		786
786	return gk20a_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,	787	return nvgpu_submit_channel_gpfifo_kernel(cde_ctx->ch, gpfifo,
787	num_entries, flags, fence, fence_out);	788	num_entries, flags, fence, fence_out);
788	}	789	}
789		790


diff --git a/drivers/gpu/nvgpu/os/linux/ce2.c b/drivers/gpu/nvgpu/os/linux/ce2.c index 8f20091b..0b43c0d1 100644 --- a/drivers/gpu/nvgpu/os/linux/ce2.c +++ b/drivers/gpu/nvgpu/os/linux/ce2.c
@@ -15,6 +15,7 @@
15	*/	15	*/
16		16
17	#include <nvgpu/types.h>	17	#include <nvgpu/types.h>
		18	#include <nvgpu/channel.h>
18		19
19	#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>	20	#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
20		21
@@ -130,7 +131,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
130		131
131	nvgpu_smp_wmb();	132	nvgpu_smp_wmb();
132		133
133	ret = gk20a_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,	134	ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
134	1, submit_flags, &fence, &ce_cmd_buf_fence_out);	135	1, submit_flags, &fence, &ce_cmd_buf_fence_out);
135		136
136	if (!ret) {	137	if (!ret) {


diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c index 391950af..fef44f2b 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.c +++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -16,7 +16,6 @@
16		16
17	#include <nvgpu/enabled.h>	17	#include <nvgpu/enabled.h>
18	#include <nvgpu/debug.h>	18	#include <nvgpu/debug.h>
19	#include <nvgpu/ltc.h>
20	#include <nvgpu/error_notifier.h>	19	#include <nvgpu/error_notifier.h>
21	#include <nvgpu/os_sched.h>	20	#include <nvgpu/os_sched.h>
22		21
@@ -489,11 +488,9 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
489	dma_buf_vunmap(dmabuf, mem);	488	dma_buf_vunmap(dmabuf, mem);
490	}	489	}
491	}	490	}
492	#endif
493		491
494	static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)	492	void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
495	{	493	{
496	#ifdef CONFIG_DEBUG_FS
497	struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;	494	struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
498	u32 n = c->gpfifo.entry_num;	495	u32 n = c->gpfifo.entry_num;
499	u32 start = c->gpfifo.put;	496	u32 start = c->gpfifo.put;
@@ -507,549 +504,5 @@ static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
507		504
508	for (i = 0; i < count; i++)	505	for (i = 0; i < count; i++)
509	trace_write_pushbuffer(c, &gp[(start + i) % n]);	506	trace_write_pushbuffer(c, &gp[(start + i) % n]);
510	#endif
511	}
512
513	/*
514	* Handle the submit synchronization - pre-fences and post-fences.
515	*/
516	static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
517	struct nvgpu_channel_fence *fence,
518	struct channel_gk20a_job *job,
519	struct priv_cmd_entry **wait_cmd,
520	struct priv_cmd_entry **incr_cmd,
521	struct gk20a_fence **post_fence,
522	bool register_irq,
523	u32 flags)
524	{
525	struct gk20a *g = c->g;
526	bool need_sync_fence = false;
527	bool new_sync_created = false;
528	int wait_fence_fd = -1;
529	int err = 0;
530	bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
531	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
532
533	if (g->aggressive_sync_destroy_thresh) {
534	nvgpu_mutex_acquire(&c->sync_lock);
535	if (!c->sync) {
536	c->sync = gk20a_channel_sync_create(c, false);
537	if (!c->sync) {
538	err = -ENOMEM;
539	nvgpu_mutex_release(&c->sync_lock);
540	goto fail;
541	}
542	new_sync_created = true;
543	}
544	nvgpu_atomic_inc(&c->sync->refcount);
545	nvgpu_mutex_release(&c->sync_lock);
546	}
547
548	if (g->ops.fifo.resetup_ramfc && new_sync_created) {
549	err = g->ops.fifo.resetup_ramfc(c);
550	if (err)
551	goto fail;
552	}
553
554	/*
555	* Optionally insert syncpt/semaphore wait in the beginning of gpfifo
556	* submission when user requested and the wait hasn't expired.
557	*/
558	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
559	int max_wait_cmds = c->deterministic ? 1 : 0;
560
561	if (!pre_alloc_enabled)
562	job->wait_cmd = nvgpu_kzalloc(g,
563	sizeof(struct priv_cmd_entry));
564
565	if (!job->wait_cmd) {
566	err = -ENOMEM;
567	goto fail;
568	}
569
570	if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
571	wait_fence_fd = fence->id;
572	err = c->sync->wait_fd(c->sync, wait_fence_fd,
573	job->wait_cmd, max_wait_cmds);
574	} else {
575	err = c->sync->wait_syncpt(c->sync, fence->id,
576	fence->value,
577	job->wait_cmd);
578	}
579
580	if (err)
581	goto clean_up_wait_cmd;
582
583	if (job->wait_cmd->valid)
584	*wait_cmd = job->wait_cmd;
585	}
586
587	if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
588	(flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
589	need_sync_fence = true;
590
591	/*
592	* Always generate an increment at the end of a GPFIFO submission. This
593	* is used to keep track of method completion for idle railgating. The
594	* sync_pt/semaphore PB is added to the GPFIFO later on in submit.
595	*/
596	job->post_fence = gk20a_alloc_fence(c);
597	if (!job->post_fence) {
598	err = -ENOMEM;
599	goto clean_up_wait_cmd;
600	}
601	if (!pre_alloc_enabled)
602	job->incr_cmd = nvgpu_kzalloc(g, sizeof(struct priv_cmd_entry));
603
604	if (!job->incr_cmd) {
605	err = -ENOMEM;
606	goto clean_up_post_fence;
607	}
608
609	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
610	err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
611	job->post_fence, need_wfi, need_sync_fence,
612	register_irq);
613	else
614	err = c->sync->incr(c->sync, job->incr_cmd,
615	job->post_fence, need_sync_fence,
616	register_irq);
617	if (!err) {
618	*incr_cmd = job->incr_cmd;
619	*post_fence = job->post_fence;
620	} else
621	goto clean_up_incr_cmd;
622
623	return 0;
624
625	clean_up_incr_cmd:
626	free_priv_cmdbuf(c, job->incr_cmd);
627	if (!pre_alloc_enabled)
628	job->incr_cmd = NULL;
629	clean_up_post_fence:
630	gk20a_fence_put(job->post_fence);
631	job->post_fence = NULL;
632	clean_up_wait_cmd:
633	if (job->wait_cmd)
634	free_priv_cmdbuf(c, job->wait_cmd);
635	if (!pre_alloc_enabled)
636	job->wait_cmd = NULL;
637	fail:
638	*wait_cmd = NULL;
639	return err;
640	}
641
642	static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
643	struct priv_cmd_entry *cmd)
644	{
645	struct gk20a *g = c->g;
646	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
647	struct nvgpu_gpfifo_entry x = {
648	.entry0 = u64_lo32(cmd->gva),
649	.entry1 = u64_hi32(cmd->gva) \|
650	pbdma_gp_entry1_length_f(cmd->size)
651	};
652
653	nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
654	&x, sizeof(x));
655
656	if (cmd->mem->aperture == APERTURE_SYSMEM)
657	trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0,
658	(u32 *)cmd->mem->cpu_va + cmd->off);
659
660	c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
661	}
662
663	static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
664	struct nvgpu_gpfifo_userdata userdata,
665	u32 num_entries)
666	{
667	struct gk20a *g = c->g;
668	struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
669	u32 gpfifo_size = c->gpfifo.entry_num;
670	u32 len = num_entries;
671	u32 start = c->gpfifo.put;
672	u32 end = start + len; /* exclusive */
673	int err;
674
675	if (end > gpfifo_size) {
676	/* wrap-around */
677	int length0 = gpfifo_size - start;
678	int length1 = len - length0;
679
680	err = g->os_channel.copy_user_gpfifo(
681	gpfifo_cpu + start, userdata,
682	0, length0);
683	if (err)
684	return err;
685
686	err = g->os_channel.copy_user_gpfifo(
687	gpfifo_cpu, userdata,
688	length0, length1);
689	if (err)
690	return err;
691	} else {
692	err = g->os_channel.copy_user_gpfifo(
693	gpfifo_cpu + start, userdata,
694	0, len);
695	if (err)
696	return err;
697	}
698
699	return 0;
700	}
701
702	static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
703	struct nvgpu_gpfifo_entry *src, u32 num_entries)
704	{
705	struct gk20a *g = c->g;
706	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
707	/* in bytes */
708	u32 gpfifo_size =
709	c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
710	u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
711	u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
712	u32 end = start + len; /* exclusive */
713
714	if (end > gpfifo_size) {
715	/* wrap-around */
716	int length0 = gpfifo_size - start;
717	int length1 = len - length0;
718	struct nvgpu_gpfifo_entry *src2 = src + length0;
719
720	nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
721	nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
722	} else {
723	nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
724	}
725	}
726
727	/*
728	* Copy source gpfifo entries into the gpfifo ring buffer, potentially
729	* splitting into two memcpys to handle wrap-around.
730	*/
731	static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
732	struct nvgpu_gpfifo_entry *kern_gpfifo,
733	struct nvgpu_gpfifo_userdata userdata,
734	u32 num_entries)
735	{
736	struct gk20a *g = c->g;
737	int err;
738
739	if (!kern_gpfifo && !c->gpfifo.pipe) {
740	/*
741	* This path (from userspace to sysmem) is special in order to
742	* avoid two copies unnecessarily (from user to pipe, then from
743	* pipe to gpu sysmem buffer).
744	*/
745	err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
746	num_entries);
747	if (err)
748	return err;
749	} else if (!kern_gpfifo) {
750	/* from userspace to vidmem, use the common path */
751	err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
752	0, num_entries);
753	if (err)
754	return err;
755
756	nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
757	num_entries);
758	} else {
759	/* from kernel to either sysmem or vidmem, don't need
760	* copy_user_gpfifo so use the common path */
761	nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
762	}
763
764	trace_write_pushbuffers(c, num_entries);
765
766	c->gpfifo.put = (c->gpfifo.put + num_entries) &
767	(c->gpfifo.entry_num - 1);
768
769	return 0;
770	}
771
772	static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
773	struct nvgpu_gpfifo_entry *gpfifo,
774	struct nvgpu_gpfifo_userdata userdata,
775	u32 num_entries,
776	u32 flags,
777	struct nvgpu_channel_fence *fence,
778	struct gk20a_fence **fence_out,
779	struct fifo_profile_gk20a *profile)
780	{
781	struct gk20a *g = c->g;
782	struct priv_cmd_entry *wait_cmd = NULL;
783	struct priv_cmd_entry *incr_cmd = NULL;
784	struct gk20a_fence *post_fence = NULL;
785	struct channel_gk20a_job *job = NULL;
786	/* we might need two extra gpfifo entries - one for pre fence
787	* and one for post fence. */
788	const int extra_entries = 2;
789	bool skip_buffer_refcounting = (flags &
790	NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
791	int err = 0;
792	bool need_job_tracking;
793	bool need_deferred_cleanup = false;
794
795	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING))
796	return -ENODEV;
797
798	if (c->has_timedout)
799	return -ETIMEDOUT;
800
801	if (!nvgpu_mem_is_valid(&c->gpfifo.mem))
802	return -ENOMEM;
803
804	/* fifo not large enough for request. Return error immediately.
805	* Kernel can insert gpfifo entries before and after user gpfifos.
806	* So, add extra_entries in user request. Also, HW with fifo size N
807	* can accept only N-1 entreis and so the below condition */
808	if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
809	nvgpu_err(g, "not enough gpfifo space allocated");
810	return -ENOMEM;
811	}
812
813	if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT \|
814	NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
815	!fence)
816	return -EINVAL;
817
818	/* an address space needs to have been bound at this point. */
819	if (!gk20a_channel_as_bound(c)) {
820	nvgpu_err(g,
821	"not bound to an address space at time of gpfifo"
822	" submission.");
823	return -EINVAL;
824	}
825
826	gk20a_fifo_profile_snapshot(profile, PROFILE_ENTRY);
827
828	/* update debug settings */
829	nvgpu_ltc_sync_enabled(g);
830
831	nvgpu_log_info(g, "channel %d", c->chid);
832
833	/*
834	* Job tracking is necessary for any of the following conditions:
835	* - pre- or post-fence functionality
836	* - channel wdt
837	* - GPU rail-gating with non-deterministic channels
838	* - buffer refcounting
839	*
840	* If none of the conditions are met, then job tracking is not
841	* required and a fast submit can be done (ie. only need to write
842	* out userspace GPFIFO entries and update GP_PUT).
843	*/
844	need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) \|\|
845	(flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) \|\|
846	c->timeout.enabled \|\|
847	(g->can_railgate && !c->deterministic) \|\|
848	!skip_buffer_refcounting;
849
850	if (need_job_tracking) {
851	bool need_sync_framework = false;
852
853	/*
854	* If the channel is to have deterministic latency and
855	* job tracking is required, the channel must have
856	* pre-allocated resources. Otherwise, we fail the submit here
857	*/
858	if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
859	return -EINVAL;
860
861	need_sync_framework =
862	gk20a_channel_sync_needs_sync_framework(g) \|\|
863	(flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
864	flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
865
866	/*
867	* Deferred clean-up is necessary for any of the following
868	* conditions:
869	* - channel's deterministic flag is not set
870	* - dependency on sync framework, which could make the
871	* behavior of the clean-up operation non-deterministic
872	* (should not be performed in the submit path)
873	* - channel wdt
874	* - GPU rail-gating with non-deterministic channels
875	* - buffer refcounting
876	*
877	* If none of the conditions are met, then deferred clean-up
878	* is not required, and we clean-up one job-tracking
879	* resource in the submit path.
880	*/
881	need_deferred_cleanup = !c->deterministic \|\|
882	need_sync_framework \|\|
883	c->timeout.enabled \|\|
884	(g->can_railgate &&
885	!c->deterministic) \|\|
886	!skip_buffer_refcounting;
887
888	/*
889	* For deterministic channels, we don't allow deferred clean_up
890	* processing to occur. In cases we hit this, we fail the submit
891	*/
892	if (c->deterministic && need_deferred_cleanup)
893	return -EINVAL;
894
895	if (!c->deterministic) {
896	/*
897	* Get a power ref unless this is a deterministic
898	* channel that holds them during the channel lifetime.
899	* This one is released by gk20a_channel_clean_up_jobs,
900	* via syncpt or sema interrupt, whichever is used.
901	*/
902	err = gk20a_busy(g);
903	if (err) {
904	nvgpu_err(g,
905	"failed to host gk20a to submit gpfifo");
906	nvgpu_print_current(g, NULL, NVGPU_ERROR);
907	return err;
908	}
909	}
910
911	if (!need_deferred_cleanup) {
912	/* clean up a single job */
913	gk20a_channel_clean_up_jobs(c, false);
914	}
915	}
916
917
918	/* Grab access to HW to deal with do_idle */
919	if (c->deterministic)
920	nvgpu_rwsem_down_read(&g->deterministic_busy);
921
922	if (c->deterministic && c->deterministic_railgate_allowed) {
923	/*
924	* Nope - this channel has dropped its own power ref. As
925	* deterministic submits don't hold power on per each submitted
926	* job like normal ones do, the GPU might railgate any time now
927	* and thus submit is disallowed.
928	*/
929	err = -EINVAL;
930	goto clean_up;
931	}
932
933	trace_gk20a_channel_submit_gpfifo(g->name,
934	c->chid,
935	num_entries,
936	flags,
937	fence ? fence->id : 0,
938	fence ? fence->value : 0);
939
940	nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
941	c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
942
943	/*
944	* Make sure we have enough space for gpfifo entries. Check cached
945	* values first and then read from HW. If no space, return EAGAIN
946	* and let userpace decide to re-try request or not.
947	*/
948	if (nvgpu_gp_free_count(c) < num_entries + extra_entries) {
949	if (nvgpu_get_gp_free_count(c) < num_entries + extra_entries) {
950	err = -EAGAIN;
951	goto clean_up;
952	}
953	}
954
955	if (c->has_timedout) {
956	err = -ETIMEDOUT;
957	goto clean_up;
958	}
959
960	if (need_job_tracking) {
961	err = channel_gk20a_alloc_job(c, &job);
962	if (err)
963	goto clean_up;
964
965	err = gk20a_submit_prepare_syncs(c, fence, job,
966	&wait_cmd, &incr_cmd,
967	&post_fence,
968	need_deferred_cleanup,
969	flags);
970	if (err)
971	goto clean_up_job;
972	}
973
974	gk20a_fifo_profile_snapshot(profile, PROFILE_JOB_TRACKING);
975
976	if (wait_cmd)
977	gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
978
979	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
980	num_entries);
981	if (err)
982	goto clean_up_job;
983
984	/*
985	* And here's where we add the incr_cmd we generated earlier. It should
986	* always run!
987	*/
988	if (incr_cmd)
989	gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
990
991	if (fence_out)
992	*fence_out = gk20a_fence_get(post_fence);
993
994	if (need_job_tracking)
995	/* TODO! Check for errors... */
996	gk20a_channel_add_job(c, job, skip_buffer_refcounting);
997	gk20a_fifo_profile_snapshot(profile, PROFILE_APPEND);
998
999	g->ops.fifo.userd_gp_put(g, c);
1000
1001	/* No hw access beyond this point */
1002	if (c->deterministic)
1003	nvgpu_rwsem_up_read(&g->deterministic_busy);
1004
1005	trace_gk20a_channel_submitted_gpfifo(g->name,
1006	c->chid,
1007	num_entries,
1008	flags,
1009	post_fence ? post_fence->syncpt_id : 0,
1010	post_fence ? post_fence->syncpt_value : 0);
1011
1012	nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
1013	c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1014
1015	gk20a_fifo_profile_snapshot(profile, PROFILE_END);
1016
1017	nvgpu_log_fn(g, "done");
1018	return err;
1019
1020	clean_up_job:
1021	channel_gk20a_free_job(c, job);
1022	clean_up:
1023	nvgpu_log_fn(g, "fail");
1024	gk20a_fence_put(post_fence);
1025	if (c->deterministic)
1026	nvgpu_rwsem_up_read(&g->deterministic_busy);
1027	else if (need_deferred_cleanup)
1028	gk20a_idle(g);
1029
1030	return err;
1031	}
1032
1033	int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c,
1034	struct nvgpu_gpfifo_userdata userdata,
1035	u32 num_entries,
1036	u32 flags,
1037	struct nvgpu_channel_fence *fence,
1038	struct gk20a_fence **fence_out,
1039	struct fifo_profile_gk20a *profile)
1040	{
1041	return gk20a_submit_channel_gpfifo(c, NULL, userdata, num_entries,
1042	flags, fence, fence_out, profile);
1043	}
1044
1045	int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
1046	struct nvgpu_gpfifo_entry *gpfifo,
1047	u32 num_entries,
1048	u32 flags,
1049	struct nvgpu_channel_fence *fence,
1050	struct gk20a_fence **fence_out)
1051	{
1052	struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
1053	return gk20a_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
1054	flags, fence, fence_out, NULL);
1055	}	507	}
		508	#endif


diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h index 43fa492b..87231a79 100644 --- a/drivers/gpu/nvgpu/os/linux/channel.h +++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -84,19 +84,4 @@ struct channel_gk20a gk20a_open_new_channel_with_cb(struct gk20a g,
84	int runlist_id,	84	int runlist_id,
85	bool is_privileged_channel);	85	bool is_privileged_channel);
86		86
87	int gk20a_submit_channel_gpfifo_user(struct channel_gk20a *c,
88	struct nvgpu_gpfifo_userdata userdata,
89	u32 num_entries,
90	u32 flags,
91	struct nvgpu_channel_fence *fence,
92	struct gk20a_fence **fence_out,
93	struct fifo_profile_gk20a *profile);
94
95	int gk20a_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
96	struct nvgpu_gpfifo_entry *gpfifo,
97	u32 num_entries,
98	u32 flags,
99	struct nvgpu_channel_fence *fence,
100	struct gk20a_fence **fence_out);
101
102	#endif /* __NVGPU_CHANNEL_H__ */	87	#endif /* __NVGPU_CHANNEL_H__ */


diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index fa6a02d6..7b003b76 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -34,6 +34,7 @@
34	#include <nvgpu/barrier.h>	34	#include <nvgpu/barrier.h>
35	#include <nvgpu/nvhost.h>	35	#include <nvgpu/nvhost.h>
36	#include <nvgpu/os_sched.h>	36	#include <nvgpu/os_sched.h>
		37	#include <nvgpu/channel.h>
37		38
38	#include "gk20a/gk20a.h"	39	#include "gk20a/gk20a.h"
39	#include "gk20a/dbg_gpu_gk20a.h"	40	#include "gk20a/dbg_gpu_gk20a.h"
@@ -799,11 +800,11 @@ static int gk20a_ioctl_channel_submit_gpfifo(
799	return fd;	800	return fd;
800	}	801	}
801		802
802	userdata.entries = (struct nvgpu_gpfifo_entry __user*)	803	userdata.entries = (struct nvgpu_gpfifo_entry __user *)
803	(uintptr_t)args->gpfifo;	804	(uintptr_t)args->gpfifo;
804	userdata.context = NULL;	805	userdata.context = NULL;
805		806
806	ret = gk20a_submit_channel_gpfifo_user(ch,	807	ret = nvgpu_submit_channel_gpfifo_user(ch,
807	userdata, args->num_entries,	808	userdata, args->num_entries,
808	submit_flags, &fence, &fence_out, profile);	809	submit_flags, &fence, &fence_out, profile);
809		810