gpu: nvgpu: use inplace allocation in sync framework

This change is the first of a series of changes to support the usage of pre-allocated job tracking resources in the submit path. With this change, we still maintain a dynamically-allocated joblist, but make the necessary changes in the channel_sync & fence framework to use in-place allocations. Specifically, we: 1) Update channel sync framework routines to take in pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather than dynamically allocating themselves 2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s) to gk20a_submit_prepare_syncs 3) Modify fence framework to have seperate allocation and init APIs. We expose allocation as a seperate API, so the client can allocate the object before passing it into the channel sync framework. 4) Fix clean_up logic in channel sync framework Bug 1795076 Change-Id: I96db457683cd207fd029c31c45f548f98055e844 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1206725 (cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624) Reviewed-on: http://git-master/r/1223933 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Sachit Kadle <skadle@nvidia.com> 2016-08-22 21:06:30 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-10-20 11:14:04 -0400
commit: 63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch)
tree: b91247eebf886f4e987d38eb4069aceace284ecf
parent: 3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff)
7 files changed, 275 insertions, 175 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f60a92b4..4019721a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
 /* allocate a cmd buffer with given size. size is number of u32 entries */
 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
-                             struct priv_cmd_entry **entry)
+                             struct priv_cmd_entry *e)
 {
        struct priv_cmd_queue *q = &c->priv_cmd_q;
-        struct priv_cmd_entry *e;
        u32 free_count;
        u32 size = orig_size;
        gk20a_dbg_fn("size %d", orig_size);
-        *entry = NULL;
+        if (!e) {
+                gk20a_err(dev_from_gk20a(c->g),
+                        "ch %d: priv cmd entry is null",
+                        c->hw_chid);
+                return -EINVAL;
+        }
        /* if free space in the end is less than requested, increase the size
         * to make the real allocated space start from beginning. */
@@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        if (size > free_count)
                return -EAGAIN;
-        e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
-        if (!e) {
-                gk20a_err(dev_from_gk20a(c->g),
-                        "ch %d: fail to allocate priv cmd entry",
-                        c->hw_chid);
-                return -ENOMEM;
-        }
        e->size = orig_size;
        e->mem = &q->mem;
@@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        /* we already handled q->put + size > q->size so BUG_ON this */
        BUG_ON(q->put > q->size);
-        *entry = e;
+        /* commit the previous writes before making the entry valid */
+        wmb();
+        e->valid = true;
        gk20a_dbg_fn("done");
        return 0;
@@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
        kfree(e);
 }
+static struct channel_gk20a_job *channel_gk20a_alloc_job(
+                struct channel_gk20a *c)
+{
+        struct channel_gk20a_job *job = NULL;
+        job = kzalloc(sizeof(*job), GFP_KERNEL);
+        return job;
+}
+static void channel_gk20a_free_job(struct channel_gk20a *c,
+                struct channel_gk20a_job *job)
+{
+        kfree(job);
+}
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
                struct nvgpu_alloc_gpfifo_args *args)
 {
@@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
        if (!e)
                return 0;
-        if ((q->get != e->off) && e->off != 0)
+        if (e->valid) {
-                gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
+                /* read the entry's valid flag before reading its contents */
+                rmb();
+                if ((q->get != e->off) && e->off != 0)
+                        gk20a_err(d, "requests out-of-order, ch=%d\n",
+                                  c->hw_chid);
+                q->get = e->off + e->size;
+        }
-        q->get = e->off + e->size;
        free_priv_cmdbuf(c, e);
        return 0;
@@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
 }
 static int gk20a_channel_add_job(struct channel_gk20a *c,
-                                 struct gk20a_fence *pre_fence,
+                                 struct channel_gk20a_job *job,
-                                 struct gk20a_fence *post_fence,
-                                 struct priv_cmd_entry *wait_cmd,
-                                 struct priv_cmd_entry *incr_cmd,
                                 bool skip_buffer_refcounting)
 {
        struct vm_gk20a *vm = c->vm;
-        struct channel_gk20a_job *job = NULL;
        struct mapped_buffer_node **mapped_buffers = NULL;
        int err = 0, num_mapped_buffers = 0;
@@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
                        goto err_put_vm;
        }
-        job = kzalloc(sizeof(*job), GFP_KERNEL);
-        if (!job) {
-                err = -ENOMEM;
-                goto err_put_buffers;
-        }
        /* put() is done in gk20a_channel_update() when the job is done */
        c = gk20a_channel_get(c);
        if (c) {
                job->num_mapped_buffers = num_mapped_buffers;
                job->mapped_buffers = mapped_buffers;
-                job->pre_fence = pre_fence;
-                job->post_fence = post_fence;
-                job->wait_cmd = wait_cmd;
-                job->incr_cmd = incr_cmd;
                gk20a_channel_timeout_start(c, job);
@@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
                spin_unlock(&c->jobs_lock);
        } else {
                err = -ETIMEDOUT;
-                goto err_free_job;
+                goto err_put_buffers;
        }
        return 0;
-err_free_job:
-        kfree(job);
 err_put_buffers:
        gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
 err_put_vm:
@@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
                list_del_init(&job->list);
                spin_unlock(&c->jobs_lock);
-                kfree(job);
+                channel_gk20a_free_job(c, job);
                job_finished = 1;
                gk20a_idle(g->dev);
        }
@@ -2143,6 +2145,7 @@ out:
 */
 static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
                                      struct nvgpu_fence *fence,
+                                      struct channel_gk20a_job *job,
                                      struct priv_cmd_entry **wait_cmd,
                                      struct priv_cmd_entry **incr_cmd,
                                      struct gk20a_fence **pre_fence,
@@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
         * this condition.
         */
        if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
+                job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
+                                        GFP_KERNEL);
+                job->pre_fence = gk20a_alloc_fence(c);
+                if (!job->wait_cmd || !job->pre_fence) {
+                        err = -ENOMEM;
+                        goto clean_up_pre_fence;
+                }
                if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
                        wait_fence_fd = fence->id;
                        err = c->sync->wait_fd(c->sync, wait_fence_fd,
-                                               wait_cmd, pre_fence);
+                                               job->wait_cmd, job->pre_fence);
                } else {
                        err = c->sync->wait_syncpt(c->sync, fence->id,
-                                                   fence->value, wait_cmd,
+                                                   fence->value, job->wait_cmd,
-                                                   pre_fence);
+                                                   job->pre_fence);
                }
+                if (!err) {
+                        if (job->wait_cmd->valid)
+                                *wait_cmd = job->wait_cmd;
+                        *pre_fence = job->pre_fence;
+                } else
+                        goto clean_up_pre_fence;
        }
-        if (err)
-                goto fail;
        if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
            (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
@@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
         * is used to keep track of method completion for idle railgating. The
         * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
         */
+        job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
+        job->post_fence = gk20a_alloc_fence(c);
+        if (!job->incr_cmd || !job->post_fence) {
+                err = -ENOMEM;
+                goto clean_up_post_fence;
+        }
        if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-                err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd,
+                err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
-                                 post_fence, need_wfi, need_sync_fence);
+                                 job->post_fence, need_wfi, need_sync_fence);
        else
-                err = c->sync->incr(c->sync, incr_cmd,
+                err = c->sync->incr(c->sync, job->incr_cmd,
-                                    post_fence, need_sync_fence);
+                                    job->post_fence, need_sync_fence);
-        if (err)
+        if (!err) {
-                goto fail;
+                *incr_cmd = job->incr_cmd;
+                *post_fence = job->post_fence;
+        } else
+                goto clean_up_post_fence;
        return 0;
+clean_up_post_fence:
+        gk20a_free_priv_cmdbuf(c, job->incr_cmd);
+        gk20a_fence_put(job->post_fence);
+        job->incr_cmd = NULL;
+        job->post_fence = NULL;
+clean_up_pre_fence:
+        gk20a_free_priv_cmdbuf(c, job->wait_cmd);
+        gk20a_fence_put(job->pre_fence);
+        job->wait_cmd = NULL;
+        job->pre_fence = NULL;
+        *wait_cmd = NULL;
+        *pre_fence = NULL;
 fail:
-        /*
-         * Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the
-         * real owner of the objects we make here.
-         */
        return err;
 }
@@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        struct priv_cmd_entry *incr_cmd = NULL;
        struct gk20a_fence *pre_fence = NULL;
        struct gk20a_fence *post_fence = NULL;
+        struct channel_gk20a_job *job = NULL;
        /* we might need two extra gpfifo entries - one for pre fence
         * and one for post fence. */
        const int extra_entries = 2;
@@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        }
        if (need_job_tracking) {
-                err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd,
+                job = channel_gk20a_alloc_job(c);
+                if (!job) {
+                        err = -ENOMEM;
+                        goto clean_up;
+                }
+                err = gk20a_submit_prepare_syncs(c, fence, job,
+                                                 &wait_cmd, &incr_cmd,
                                                 &pre_fence, &post_fence,
                                                 force_need_sync_fence, flags);
                if (err)
-                        goto clean_up;
+                        goto clean_up_job;
        }
        if (wait_cmd)
@@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
                                num_entries);
        if (err)
-                goto clean_up;
+                goto clean_up_job;
        /*
         * And here's where we add the incr_cmd we generated earlier. It should
@@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        if (need_job_tracking)
                /* TODO! Check for errors... */
-                gk20a_channel_add_job(c, pre_fence, post_fence,
+                gk20a_channel_add_job(c, job, skip_buffer_refcounting);
-                                wait_cmd, incr_cmd,
-                                skip_buffer_refcounting);
        g->ops.fifo.userd_gp_put(g, c);
@@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        gk20a_dbg_fn("done");
        return err;
+clean_up_job:
+        channel_gk20a_free_job(c, job);
 clean_up:
        gk20a_dbg_fn("fail");
-        free_priv_cmdbuf(c, wait_cmd);
-        free_priv_cmdbuf(c, incr_cmd);
        gk20a_fence_put(pre_fence);
        gk20a_fence_put(post_fence);
        if (need_job_tracking)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index f6571b6f..0d8746b8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -218,7 +218,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
 void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
-                             struct priv_cmd_entry **entry);
+                             struct priv_cmd_entry *entry);
 int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);
 int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 7a71c4eb..767738ea 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -57,12 +57,11 @@ static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
 }
 static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
-                u32 id, u32 thresh, struct priv_cmd_entry **entry,
+                u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
-                struct gk20a_fence **fence)
+                struct gk20a_fence *fence)
 {
        struct gk20a_channel_syncpt *sp =
                container_of(s, struct gk20a_channel_syncpt, ops);
-        struct priv_cmd_entry *wait_cmd = NULL;
        struct channel_gk20a *c = sp->c;
        int err = 0;
@@ -75,7 +74,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
        if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
                return 0;
-        err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd);
        if (err) {
                gk20a_err(dev_from_gk20a(c->g),
                                "not enough priv cmd buffer space");
@@ -84,21 +83,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
        add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
-        *entry = wait_cmd;
-        *fence = NULL;
        return 0;
 }
 static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
-                       struct priv_cmd_entry **entry,
+                       struct priv_cmd_entry *wait_cmd,
-                       struct gk20a_fence **fence)
+                       struct gk20a_fence *fence)
 {
 #ifdef CONFIG_SYNC
        int i;
        int num_wait_cmds;
        struct sync_fence *sync_fence;
        struct sync_pt *pt;
-        struct priv_cmd_entry *wait_cmd = NULL;
        struct gk20a_channel_syncpt *sp =
                container_of(s, struct gk20a_channel_syncpt, ops);
        struct channel_gk20a *c = sp->c;
@@ -134,7 +130,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
                return 0;
        }
-        err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd);
        if (err) {
                gk20a_err(dev_from_gk20a(c->g),
                                "not enough priv cmd buffer space");
@@ -172,8 +168,6 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
        WARN_ON(i != num_wait_cmds);
        sync_fence_put(sync_fence);
-        *entry = wait_cmd;
-        *fence = NULL;
        return 0;
 #else
        return -ENODEV;
@@ -193,15 +187,14 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
 static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
                                       bool wfi_cmd,
                                       bool register_irq,
-                                       struct priv_cmd_entry **entry,
+                                       struct priv_cmd_entry *incr_cmd,
-                                       struct gk20a_fence **fence,
+                                       struct gk20a_fence *fence,
                                       bool need_sync_fence)
 {
        u32 thresh;
        int incr_cmd_size;
        int off;
        int err;
-        struct priv_cmd_entry *incr_cmd = NULL;
        struct gk20a_channel_syncpt *sp =
                container_of(s, struct gk20a_channel_syncpt, ops);
        struct channel_gk20a *c = sp->c;
@@ -210,7 +203,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
        if (wfi_cmd)
                incr_cmd_size += 2;
-        err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
        if (err)
                return err;
@@ -267,15 +260,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
                }
        }
-        *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
+        err = gk20a_fence_from_syncpt(fence, sp->host1x_pdev, sp->id, thresh,
                                         wfi_cmd, need_sync_fence);
-        *entry = incr_cmd;
+        if (err)
+                goto clean_up_priv_cmd;
        return 0;
+clean_up_priv_cmd:
+        gk20a_free_priv_cmdbuf(c, incr_cmd);
+        return err;
 }
 static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
-                                  struct priv_cmd_entry **entry,
+                                  struct priv_cmd_entry *entry,
-                                  struct gk20a_fence **fence)
+                                  struct gk20a_fence *fence)
 {
        return __gk20a_channel_syncpt_incr(s,
                        true /* wfi */,
@@ -284,8 +283,8 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
 }
 static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
-                              struct priv_cmd_entry **entry,
+                              struct priv_cmd_entry *entry,
-                              struct gk20a_fence **fence,
+                              struct gk20a_fence *fence,
                              bool need_sync_fence)
 {
        /* Don't put wfi cmd to this one since we're not returning
@@ -298,8 +297,8 @@ static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
                                   int wait_fence_fd,
-                                   struct priv_cmd_entry **entry,
+                                   struct priv_cmd_entry *entry,
-                                   struct gk20a_fence **fence,
+                                   struct gk20a_fence *fence,
                                   bool wfi,
                                   bool need_sync_fence)
 {
@@ -500,8 +499,8 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 static int gk20a_channel_semaphore_wait_syncpt(
                struct gk20a_channel_sync *s, u32 id,
-                u32 thresh, struct priv_cmd_entry **entry,
+                u32 thresh, struct priv_cmd_entry *entry,
-                struct gk20a_fence **fence)
+                struct gk20a_fence *fence)
 {
        struct gk20a_channel_semaphore *sema =
                container_of(s, struct gk20a_channel_semaphore, ops);
@@ -525,7 +524,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
 */
 static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
                                         struct sync_fence *fence,
-                                         struct priv_cmd_entry **wait_cmd,
+                                         struct priv_cmd_entry *wait_cmd,
                                         struct gk20a_semaphore **fp_sema)
 {
        struct gk20a_semaphore *sema;
@@ -551,7 +550,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
        gk20a_semaphore_get(sema);
        BUG_ON(!atomic_read(&sema->value));
-        add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false);
+        add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
        /*
         * Make sure that gk20a_channel_semaphore_wait_fd() can create another
@@ -565,8 +564,8 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
 static int gk20a_channel_semaphore_wait_fd(
                struct gk20a_channel_sync *s, int fd,
-                struct priv_cmd_entry **entry,
+                struct priv_cmd_entry *entry,
-                struct gk20a_fence **fence)
+                struct gk20a_fence *fence)
 {
        struct gk20a_channel_semaphore *sema =
                container_of(s, struct gk20a_channel_semaphore, ops);
@@ -574,7 +573,7 @@ static int gk20a_channel_semaphore_wait_fd(
 #ifdef CONFIG_SYNC
        struct gk20a_semaphore *fp_sema;
        struct sync_fence *sync_fence;
-        struct priv_cmd_entry *wait_cmd = NULL;
+        struct priv_cmd_entry *wait_cmd = entry;
        struct wait_fence_work *w = NULL;
        int err, ret, status;
@@ -582,19 +581,24 @@ static int gk20a_channel_semaphore_wait_fd(
        if (!sync_fence)
                return -EINVAL;
-        ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema);
+        ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
        if (ret == 0) {
-                if (fp_sema)
+                if (fp_sema) {
-                        *fence = gk20a_fence_from_semaphore(sema->timeline,
+                        err = gk20a_fence_from_semaphore(fence,
-                                                            fp_sema,
+                                        sema->timeline,
-                                                            &c->semaphore_wq,
+                                        fp_sema,
-                                                            NULL, false, false);
+                                        &c->semaphore_wq,
-                else
+                                        NULL, false, false);
+                        if (err) {
+                                gk20a_semaphore_put(fp_sema);
+                                goto clean_up_priv_cmd;
+                        }
+                } else
                        /*
-                         * Allocate an empty fence. It will instantly return
+                         * Init an empty fence. It will instantly return
                         * from gk20a_fence_wait().
                         */
-                        *fence = gk20a_alloc_fence(NULL, NULL, false);
+                        gk20a_init_fence(fence, NULL, NULL, false);
                sync_fence_put(sync_fence);
                goto skip_slow_path;
@@ -611,18 +615,17 @@ static int gk20a_channel_semaphore_wait_fd(
                goto skip_slow_path;
        }
-        err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
        if (err) {
                gk20a_err(dev_from_gk20a(c->g),
                                "not enough priv cmd buffer space");
-                sync_fence_put(sync_fence);
+                goto clean_up_sync_fence;
-                return -ENOMEM;
        }
        w = kzalloc(sizeof(*w), GFP_KERNEL);
        if (!w) {
                err = -ENOMEM;
-                goto fail_free_cmdbuf;
+                goto clean_up_priv_cmd;
        }
        sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
@@ -631,7 +634,7 @@ static int gk20a_channel_semaphore_wait_fd(
        if (!w->sema) {
                gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
                err = -ENOMEM;
-                goto fail_free_worker;
+                goto clean_up_worker;
        }
        /* worker takes one reference */
@@ -641,6 +644,16 @@ static int gk20a_channel_semaphore_wait_fd(
        /* GPU unblocked when the semaphore value increments. */
        add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
+        /*
+         *  We need to create the fence before adding the waiter to ensure
+         *  that we properly clean up in the event the sync_fence has
+         *  already signaled
+         */
+        err = gk20a_fence_from_semaphore(fence, sema->timeline, w->sema,
+                        &c->semaphore_wq, NULL, false, false);
+        if (err)
+                goto clean_up_sema;
        ret = sync_fence_wait_async(sync_fence, &w->waiter);
        /*
@@ -655,24 +668,22 @@ static int gk20a_channel_semaphore_wait_fd(
                gk20a_semaphore_put(w->sema);
        }
-        /* XXX - this fixes an actual bug, we need to hold a ref to this
-           semaphore while the job is in flight. */
-        *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
-                                            &c->semaphore_wq,
-                                            NULL, false, false);
 skip_slow_path:
-        *entry = wait_cmd;
        return 0;
-fail_free_worker:
+clean_up_sema:
-        if (w && w->sema)
+        /*
-                gk20a_semaphore_put(w->sema);
+         * Release the refs to the semaphore, including
+         * the one for the worker since it will never run.
+         */
+        gk20a_semaphore_put(w->sema);
+        gk20a_semaphore_put(w->sema);
+clean_up_worker:
        kfree(w);
+clean_up_priv_cmd:
+        gk20a_free_priv_cmdbuf(c, entry);
+clean_up_sync_fence:
        sync_fence_put(sync_fence);
-fail_free_cmdbuf:
-        if (wait_cmd)
-                gk20a_free_priv_cmdbuf(c, wait_cmd);
        return err;
 #else
        gk20a_err(dev_from_gk20a(c->g),
@@ -684,12 +695,11 @@ fail_free_cmdbuf:
 static int __gk20a_channel_semaphore_incr(
                struct gk20a_channel_sync *s, bool wfi_cmd,
                struct sync_fence *dependency,
-                struct priv_cmd_entry **entry,
+                struct priv_cmd_entry *incr_cmd,
-                struct gk20a_fence **fence,
+                struct gk20a_fence *fence,
                bool need_sync_fence)
 {
        int incr_cmd_size;
-        struct priv_cmd_entry *incr_cmd = NULL;
        struct gk20a_channel_semaphore *sp =
                container_of(s, struct gk20a_channel_semaphore, ops);
        struct channel_gk20a *c = sp->c;
@@ -704,29 +714,37 @@ static int __gk20a_channel_semaphore_incr(
        }
        incr_cmd_size = 10;
-        err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
        if (err) {
                gk20a_err(dev_from_gk20a(c->g),
                                "not enough priv cmd buffer space");
-                gk20a_semaphore_put(semaphore);
+                goto clean_up_sema;
-                return err;
        }
        /* Release the completion semaphore. */
        add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
-        *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
+        err = gk20a_fence_from_semaphore(fence,
-                                            &c->semaphore_wq,
+                        sp->timeline, semaphore,
-                                            dependency, wfi_cmd,
+                        &c->semaphore_wq,
-                                            need_sync_fence);
+                        dependency, wfi_cmd,
-        *entry = incr_cmd;
+                        need_sync_fence);
+        if (err)
+                goto clean_up_priv_cmd;
        return 0;
+clean_up_priv_cmd:
+        gk20a_free_priv_cmdbuf(c, incr_cmd);
+clean_up_sema:
+        gk20a_semaphore_put(semaphore);
+        return err;
 }
 static int gk20a_channel_semaphore_incr_wfi(
                struct gk20a_channel_sync *s,
-                struct priv_cmd_entry **entry,
+                struct priv_cmd_entry *entry,
-                struct gk20a_fence **fence)
+                struct gk20a_fence *fence)
 {
        return __gk20a_channel_semaphore_incr(s,
                        true /* wfi */,
@@ -736,8 +754,8 @@ static int gk20a_channel_semaphore_incr_wfi(
 static int gk20a_channel_semaphore_incr(
                struct gk20a_channel_sync *s,
-                struct priv_cmd_entry **entry,
+                struct priv_cmd_entry *entry,
-                struct gk20a_fence **fence,
+                struct gk20a_fence *fence,
                bool need_sync_fence)
 {
        /* Don't put wfi cmd to this one since we're not returning
@@ -751,8 +769,8 @@ static int gk20a_channel_semaphore_incr(
 static int gk20a_channel_semaphore_incr_user(
                struct gk20a_channel_sync *s,
                int wait_fence_fd,
-                struct priv_cmd_entry **entry,
+                struct priv_cmd_entry *entry,
-                struct gk20a_fence **fence,
+                struct gk20a_fence *fence,
                bool wfi,
                bool need_sync_fence)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 4b0918de..c3a92ad2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -36,8 +36,8 @@ struct gk20a_channel_sync {
         *    cmdbuf is executed.
         */
        int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
-                           struct priv_cmd_entry **entry,
+                           struct priv_cmd_entry *entry,
-                           struct gk20a_fence **fence);
+                           struct gk20a_fence *fence);
        /* Generate a gpu wait cmdbuf from sync fd.
         * Returns
@@ -46,8 +46,8 @@ struct gk20a_channel_sync {
         *    cmdbuf is executed.
         */
        int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
-                       struct priv_cmd_entry **entry,
+                       struct priv_cmd_entry *entry,
-                       struct gk20a_fence **fence);
+                       struct gk20a_fence *fence);
        /* Increment syncpoint/semaphore.
         * Returns
@@ -55,8 +55,8 @@ struct gk20a_channel_sync {
         *  - a fence that can be passed to wait_cpu() and is_expired().
         */
        int (*incr)(struct gk20a_channel_sync *s,
-                    struct priv_cmd_entry **entry,
+                    struct priv_cmd_entry *entry,
-                    struct gk20a_fence **fence,
+                    struct gk20a_fence *fence,
                    bool need_sync_fence);
        /* Increment syncpoint/semaphore, preceded by a wfi.
@@ -65,8 +65,8 @@ struct gk20a_channel_sync {
         *  - a fence that can be passed to wait_cpu() and is_expired().
         */
        int (*incr_wfi)(struct gk20a_channel_sync *s,
-                        struct priv_cmd_entry **entry,
+                        struct priv_cmd_entry *entry,
-                        struct gk20a_fence **fence);
+                        struct gk20a_fence *fence);
        /* Increment syncpoint/semaphore, so that the returned fence represents
         * work completion (may need wfi) and can be returned to user space.
@@ -77,8 +77,8 @@ struct gk20a_channel_sync {
         */
        int (*incr_user)(struct gk20a_channel_sync *s,
                         int wait_fence_fd,
-                         struct priv_cmd_entry **entry,
+                         struct priv_cmd_entry *entry,
-                         struct gk20a_fence **fence,
+                         struct gk20a_fence *fence,
                         bool wfi,
                         bool need_sync_fence);
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 596dc549..f788829f 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
        return f;
 }
+static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
+{
+        bool valid = f->valid;
+        rmb();
+        return valid;
+}
 int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
 {
-        if (!tegra_platform_is_silicon())
+        if (f && gk20a_fence_is_valid(f)) {
-                timeout = (u32)MAX_SCHEDULE_TIMEOUT;
+                if (!tegra_platform_is_silicon())
-        return f->ops->wait(f, timeout);
+                        timeout = (u32)MAX_SCHEDULE_TIMEOUT;
+                return f->ops->wait(f, timeout);
+        }
+        return 0;
 }
 bool gk20a_fence_is_expired(struct gk20a_fence *f)
 {
-        if (f && f->ops)
+        if (f && gk20a_fence_is_valid(f) && f->ops)
                return f->ops->is_expired(f);
        else
                return true;
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
 #ifdef CONFIG_SYNC
        int fd;
-        if (!f->sync_fence)
+        if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence)
                return -EINVAL;
        fd = get_unused_fd_flags(O_RDWR);
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
 #endif
 }
-struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
+struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
-                                      struct sync_fence *sync_fence, bool wfi)
 {
-        struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
+        struct gk20a_fence *fence;
-        if (!f)
+        fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
+        if (!fence)
                return NULL;
-        kref_init(&f->ref);
+        kref_init(&fence->ref);
+        return fence;
+}
+void gk20a_init_fence(struct gk20a_fence *f,
+                const struct gk20a_fence_ops *ops,
+                struct sync_fence *sync_fence, bool wfi)
+{
+        if (!f)
+                return;
        f->ops = ops;
        f->sync_fence = sync_fence;
        f->wfi = wfi;
        f->syncpt_id = -1;
-        return f;
 }
 /* Fences that are backed by GPU semaphores: */
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
 };
 /* This function takes ownership of the semaphore */
-struct gk20a_fence *gk20a_fence_from_semaphore(
+int gk20a_fence_from_semaphore(
+                struct gk20a_fence *fence_out,
                struct sync_timeline *timeline,
                struct gk20a_semaphore *semaphore,
                wait_queue_head_t *semaphore_wq,
                struct sync_fence *dependency,
                bool wfi, bool need_sync_fence)
 {
-        struct gk20a_fence *f;
+        struct gk20a_fence *f = fence_out;
        struct sync_fence *sync_fence = NULL;
 #ifdef CONFIG_SYNC
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
                                        dependency, "f-gk20a-0x%04x",
                                        gk20a_semaphore_gpu_ro_va(semaphore));
                if (!sync_fence)
-                        return NULL;
+                        return -1;
        }
 #endif
-        f  = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
+        gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi);
        if (!f) {
 #ifdef CONFIG_SYNC
                sync_fence_put(sync_fence);
 #endif
-                return NULL;
+                return -EINVAL;
        }
        f->semaphore = semaphore;
        f->semaphore_wq = semaphore_wq;
-        return f;
+        /* commit previous writes before setting the valid flag */
+        wmb();
+        f->valid = true;
+        return 0;
 }
 #ifdef CONFIG_TEGRA_GK20A
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
        .is_expired = &gk20a_syncpt_fence_is_expired,
 };
-struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+int gk20a_fence_from_syncpt(
-                                            u32 id, u32 value, bool wfi,
+                struct gk20a_fence *fence_out,
-                                            bool need_sync_fence)
+                struct platform_device *host1x_pdev,
+                u32 id, u32 value, bool wfi,
+                bool need_sync_fence)
 {
-        struct gk20a_fence *f;
+        struct gk20a_fence *f = fence_out;
        struct sync_fence *sync_fence = NULL;
 #ifdef CONFIG_SYNC
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
                sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
                                                      "fence");
                if (IS_ERR(sync_fence))
-                        return NULL;
+                        return -1;
        }
 #endif
-        f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
+        gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi);
        if (!f) {
 #ifdef CONFIG_SYNC
                if (sync_fence)
                        sync_fence_put(sync_fence);
 #endif
-                return NULL;
+                return -EINVAL;
        }
        f->host1x_pdev = host1x_pdev;
        f->syncpt_id = id;
        f->syncpt_value = value;
-        return f;
+        /* commit previous writes before setting the valid flag */
+        wmb();
+        f->valid = true;
+        return 0;
 }
 #else
-struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
                                            u32 id, u32 value, bool wfi)
 {
-        return NULL;
+        return -EINVAL;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index 35488ea3..3fe2d8b2 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -31,6 +31,7 @@ struct gk20a_fence_ops;
 struct gk20a_fence {
        /* Valid for all fence types: */
+        bool valid;
        struct kref ref;
        bool wfi;
        struct sync_fence *sync_fence;
@@ -47,21 +48,25 @@ struct gk20a_fence {
 };
 /* Fences can be created from semaphores or syncpoint (id, value) pairs */
-struct gk20a_fence *gk20a_fence_from_semaphore(
+int gk20a_fence_from_semaphore(
+                struct gk20a_fence *fence_out,
                struct sync_timeline *timeline,
                struct gk20a_semaphore *semaphore,
                wait_queue_head_t *semaphore_wq,
                struct sync_fence *dependency,
                bool wfi, bool need_sync_fence);
-struct gk20a_fence *gk20a_fence_from_syncpt(
+int gk20a_fence_from_syncpt(
+                struct gk20a_fence *fence_out,
                struct platform_device *host1x_pdev,
                u32 id, u32 value, bool wfi,
                bool need_sync_fence);
-struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
+struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c);
-                                      struct sync_fence *sync_fence,
-                                      bool wfi);
+void gk20a_init_fence(struct gk20a_fence *f,
+                const struct gk20a_fence_ops *ops,
+                struct sync_fence *sync_fence, bool wfi);
 /* Fence operations */
 void gk20a_fence_put(struct gk20a_fence *f);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b34ff4a7..b2cca072 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -198,6 +198,7 @@ struct priv_cmd_queue {
 };
 struct priv_cmd_entry {
+        bool valid;
        struct mem_desc *mem;
        u32 off;        /* offset in mem, in u32 entries */
        u64 gva;
author	Sachit Kadle <skadle@nvidia.com>	2016-08-22 21:06:30 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-10-20 11:14:04 -0400
commit	63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch)
tree	b91247eebf886f4e987d38eb4069aceace284ecf
parent	3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff)