3 files changed, 34 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9bacb5c9..6015ab5e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -47,7 +47,6 @@ static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
 static void free_priv_cmdbuf(struct channel_gk20a *c,
                             struct priv_cmd_entry *e);
-static void recycle_priv_cmdbuf(struct channel_gk20a *c);
 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
@@ -1179,9 +1178,6 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
        q->size = q->mem.size / sizeof (u32);
-        INIT_LIST_HEAD(&q->head);
-        INIT_LIST_HEAD(&q->free);
        return 0;
 clean_up:
@@ -1193,28 +1189,12 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
 {
        struct vm_gk20a *ch_vm = c->vm;
        struct priv_cmd_queue *q = &c->priv_cmd_q;
-        struct priv_cmd_entry *e;
-        struct list_head *pos, *tmp, *head;
        if (q->size == 0)
                return;
        gk20a_gmmu_unmap_free(ch_vm, &q->mem);
-        /* free used list */
-        head = &q->head;
-        list_for_each_safe(pos, tmp, head) {
-                e = container_of(pos, struct priv_cmd_entry, list);
-                free_priv_cmdbuf(c, e);
-        }
-        /* free free list */
-        head = &q->free;
-        list_for_each_safe(pos, tmp, head) {
-                e = container_of(pos, struct priv_cmd_entry, list);
-                kfree(e);
-        }
        memset(q, 0, sizeof(struct priv_cmd_queue));
 }
@@ -1226,7 +1206,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        struct priv_cmd_entry *e;
        u32 free_count;
        u32 size = orig_size;
-        bool no_retry = false;
        gk20a_dbg_fn("size %d", orig_size);
@@ -1240,17 +1219,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
                        c->hw_chid, q->get, q->put);
-TRY_AGAIN:
        free_count = (q->size - (q->put - q->get) - 1) % q->size;
-        if (size > free_count) {
+        if (size > free_count)
-                if (!no_retry) {
+                return -EAGAIN;
-                        recycle_priv_cmdbuf(c);
-                        no_retry = true;
-                        goto TRY_AGAIN;
-                } else
-                        return -EAGAIN;
-        }
        e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
        if (!e) {
@@ -1280,9 +1252,6 @@ TRY_AGAIN:
        /* we already handled q->put + size > q->size so BUG_ON this */
        BUG_ON(q->put > q->size);
-        /* add new entry to head since we free from head */
-        list_add(&e->list, &q->head);
        *entry = e;
        gk20a_dbg_fn("done");
@@ -1295,65 +1264,9 @@ TRY_AGAIN:
 static void free_priv_cmdbuf(struct channel_gk20a *c,
                             struct priv_cmd_entry *e)
 {
-        if (!e)
-                return;
-        list_del(&e->list);
        kfree(e);
 }
-/* free entries if they're no longer being used */
-static void recycle_priv_cmdbuf(struct channel_gk20a *c)
-{
-        struct priv_cmd_queue *q = &c->priv_cmd_q;
-        struct priv_cmd_entry *e, *tmp;
-        struct list_head *head = &q->head;
-        bool wrap_around, found = false;
-        gk20a_dbg_fn("");
-        /* Find the most recent free entry. Free it and everything before it */
-        list_for_each_entry(e, head, list) {
-                gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
-                        "curr get:put:wrap %d:%d:%d",
-                        c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
-                        c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
-                wrap_around = (c->gpfifo.wrap != e->gp_wrap);
-                if (e->gp_get < e->gp_put) {
-                        if (c->gpfifo.get >= e->gp_put ||
-                            wrap_around) {
-                                found = true;
-                                break;
-                        } else
-                                e->gp_get = c->gpfifo.get;
-                } else if (e->gp_get > e->gp_put) {
-                        if (wrap_around &&
-                            c->gpfifo.get >= e->gp_put) {
-                                found = true;
-                                break;
-                        } else
-                                e->gp_get = c->gpfifo.get;
-                }
-        }
-        if (found)
-                q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
-        else {
-                gk20a_dbg_info("no free entry recycled");
-                return;
-        }
-        list_for_each_entry_safe_continue(e, tmp, head, list) {
-                free_priv_cmdbuf(c, e);
-        }
-        gk20a_dbg_fn("done");
-}
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
                struct nvgpu_alloc_gpfifo_args *args)
 {
@@ -1724,9 +1637,31 @@ fail_unlock:
        gk20a_channel_put(ch);
 }
+static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
+                                        struct priv_cmd_entry *e)
+{
+        struct priv_cmd_queue *q = &c->priv_cmd_q;
+        u32 cmd_entry_start;
+        struct device *d = dev_from_gk20a(c->g);
+        if (!e)
+                return 0;
+        cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
+        if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
+                gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
+        q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
+        free_priv_cmdbuf(c, e);
+        return 0;
+}
 static int gk20a_channel_add_job(struct channel_gk20a *c,
                                 struct gk20a_fence *pre_fence,
                                 struct gk20a_fence *post_fence,
+                                 struct priv_cmd_entry *wait_cmd,
+                                 struct priv_cmd_entry *incr_cmd,
                                 bool skip_buffer_refcounting)
 {
        struct vm_gk20a *vm = c->vm;
@@ -1761,6 +1696,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
                job->mapped_buffers = mapped_buffers;
                job->pre_fence = gk20a_fence_get(pre_fence);
                job->post_fence = gk20a_fence_get(post_fence);
+                job->wait_cmd = wait_cmd;
+                job->incr_cmd = incr_cmd;
                gk20a_channel_timeout_start(c, job);
@@ -1808,6 +1745,11 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
                gk20a_fence_put(job->pre_fence);
                gk20a_fence_put(job->post_fence);
+                /* Free the private command buffers (wait_cmd first and
+                 * then incr_cmd i.e. order of allocation) */
+                gk20a_free_priv_cmdbuf(c, job->wait_cmd);
+                gk20a_free_priv_cmdbuf(c, job->incr_cmd);
                /* job is done. release its vm reference (taken in add_job) */
                gk20a_vm_put(vm);
                /* another bookkeeping taken in add_job. caller must hold a ref
@@ -2114,6 +2056,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        /* TODO! Check for errors... */
        gk20a_channel_add_job(c, pre_fence, post_fence,
+                                wait_cmd, incr_cmd,
                                skip_buffer_refcounting);
        c->cmds_pending = true;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index d5f5e6a2..245db56a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -62,6 +62,8 @@ struct channel_gk20a_job {
        int num_mapped_buffers;
        struct gk20a_fence *pre_fence;
        struct gk20a_fence *post_fence;
+        struct priv_cmd_entry *wait_cmd;
+        struct priv_cmd_entry *incr_cmd;
        struct list_head list;
 };
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2dd4ccf5..ac55e988 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -160,8 +160,6 @@ struct priv_cmd_queue {
        u32 size;       /* num of entries in words */
        u32 put;        /* put for priv cmd queue */
        u32 get;        /* get for priv cmd queue */
-        struct list_head free;  /* list of pre-allocated free entries */
-        struct list_head head;  /* list of used entries */
 };
 struct priv_cmd_entry {