gpu: nvgpu: rework private command buffer free path

We currently allocate private command buffers (wait_cmd and incr_cmd) before submitting the job but we never free them explicitly. When private command queue of the channel is full, we then try to recycle/remove free command buffers. But this recycling happens during submit path, and hence that particular submit path takes much longer Rework this as below : - add reference of command buffers to job structure - when job completes, free the command buffers explicitly - remove the code to recycle buffers since it should not be needed now Note that command buffers need to be freed in order of their allocation. Ensure this with error print before freeing the command buffer entry Bug 200141116 Bug 1698667 Change-Id: Id4b69429d7ad966307e0d122a71ad55076684307 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/827638 (cherry picked from commit c6cefd69b71c9b70d6df5343b13dfcfb3fa99598) Reviewed-on: http://git-master/r/835802 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2015-11-04 03:36:37 -0500
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2015-11-23 11:33:01 -0500
commit: 2d40ebb1caa313d5d12a13f15b2623faa3ad914b (patch)
tree: ac9463d1255e896ec06fb7b03870a5323ff9ceb7 /drivers/gpu
parent: f50d0ffb15aef2cbf419b81cdbc3031097767bff (diff)
3 files changed, 34 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9bacb5c9..6015ab5e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -47,7 +47,6 @@ static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
 static void free_priv_cmdbuf(struct channel_gk20a *c,
                             struct priv_cmd_entry *e);
-static void recycle_priv_cmdbuf(struct channel_gk20a *c);
 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
@@ -1179,9 +1178,6 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
        q->size = q->mem.size / sizeof (u32);
-        INIT_LIST_HEAD(&q->head);
-        INIT_LIST_HEAD(&q->free);
        return 0;
 clean_up:
@@ -1193,28 +1189,12 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
 {
        struct vm_gk20a *ch_vm = c->vm;
        struct priv_cmd_queue *q = &c->priv_cmd_q;
-        struct priv_cmd_entry *e;
-        struct list_head *pos, *tmp, *head;
        if (q->size == 0)
                return;
        gk20a_gmmu_unmap_free(ch_vm, &q->mem);
-        /* free used list */
-        head = &q->head;
-        list_for_each_safe(pos, tmp, head) {
-                e = container_of(pos, struct priv_cmd_entry, list);
-                free_priv_cmdbuf(c, e);
-        }
-        /* free free list */
-        head = &q->free;
-        list_for_each_safe(pos, tmp, head) {
-                e = container_of(pos, struct priv_cmd_entry, list);
-                kfree(e);
-        }
        memset(q, 0, sizeof(struct priv_cmd_queue));
 }
@@ -1226,7 +1206,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        struct priv_cmd_entry *e;
        u32 free_count;
        u32 size = orig_size;
-        bool no_retry = false;
        gk20a_dbg_fn("size %d", orig_size);
@@ -1240,17 +1219,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
                        c->hw_chid, q->get, q->put);
-TRY_AGAIN:
        free_count = (q->size - (q->put - q->get) - 1) % q->size;
-        if (size > free_count) {
+        if (size > free_count)
-                if (!no_retry) {
+                return -EAGAIN;
-                        recycle_priv_cmdbuf(c);
-                        no_retry = true;
-                        goto TRY_AGAIN;
-                } else
-                        return -EAGAIN;
-        }
        e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
        if (!e) {
@@ -1280,9 +1252,6 @@ TRY_AGAIN:
        /* we already handled q->put + size > q->size so BUG_ON this */
        BUG_ON(q->put > q->size);
-        /* add new entry to head since we free from head */
-        list_add(&e->list, &q->head);
        *entry = e;
        gk20a_dbg_fn("done");
@@ -1295,65 +1264,9 @@ TRY_AGAIN:
 static void free_priv_cmdbuf(struct channel_gk20a *c,
                             struct priv_cmd_entry *e)
 {
-        if (!e)
-                return;
-        list_del(&e->list);
        kfree(e);
 }
-/* free entries if they're no longer being used */
-static void recycle_priv_cmdbuf(struct channel_gk20a *c)
-{
-        struct priv_cmd_queue *q = &c->priv_cmd_q;
-        struct priv_cmd_entry *e, *tmp;
-        struct list_head *head = &q->head;
-        bool wrap_around, found = false;
-        gk20a_dbg_fn("");
-        /* Find the most recent free entry. Free it and everything before it */
-        list_for_each_entry(e, head, list) {
-                gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
-                        "curr get:put:wrap %d:%d:%d",
-                        c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
-                        c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
-                wrap_around = (c->gpfifo.wrap != e->gp_wrap);
-                if (e->gp_get < e->gp_put) {
-                        if (c->gpfifo.get >= e->gp_put ||
-                            wrap_around) {
-                                found = true;
-                                break;
-                        } else
-                                e->gp_get = c->gpfifo.get;
-                } else if (e->gp_get > e->gp_put) {
-                        if (wrap_around &&
-                            c->gpfifo.get >= e->gp_put) {
-                                found = true;
-                                break;
-                        } else
-                                e->gp_get = c->gpfifo.get;
-                }
-        }
-        if (found)
-                q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
-        else {
-                gk20a_dbg_info("no free entry recycled");
-                return;
-        }
-        list_for_each_entry_safe_continue(e, tmp, head, list) {
-                free_priv_cmdbuf(c, e);
-        }
-        gk20a_dbg_fn("done");
-}
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
                struct nvgpu_alloc_gpfifo_args *args)
 {
@@ -1724,9 +1637,31 @@ fail_unlock:
        gk20a_channel_put(ch);
 }
+static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
+                                        struct priv_cmd_entry *e)
+{
+        struct priv_cmd_queue *q = &c->priv_cmd_q;
+        u32 cmd_entry_start;
+        struct device *d = dev_from_gk20a(c->g);
+        if (!e)
+                return 0;
+        cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
+        if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
+                gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
+        q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
+        free_priv_cmdbuf(c, e);
+        return 0;
+}
 static int gk20a_channel_add_job(struct channel_gk20a *c,
                                 struct gk20a_fence *pre_fence,
                                 struct gk20a_fence *post_fence,
+                                 struct priv_cmd_entry *wait_cmd,
+                                 struct priv_cmd_entry *incr_cmd,
                                 bool skip_buffer_refcounting)
 {
        struct vm_gk20a *vm = c->vm;
@@ -1761,6 +1696,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
                job->mapped_buffers = mapped_buffers;
                job->pre_fence = gk20a_fence_get(pre_fence);
                job->post_fence = gk20a_fence_get(post_fence);
+                job->wait_cmd = wait_cmd;
+                job->incr_cmd = incr_cmd;
                gk20a_channel_timeout_start(c, job);
@@ -1808,6 +1745,11 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
                gk20a_fence_put(job->pre_fence);
                gk20a_fence_put(job->post_fence);
+                /* Free the private command buffers (wait_cmd first and
+                 * then incr_cmd i.e. order of allocation) */
+                gk20a_free_priv_cmdbuf(c, job->wait_cmd);
+                gk20a_free_priv_cmdbuf(c, job->incr_cmd);
                /* job is done. release its vm reference (taken in add_job) */
                gk20a_vm_put(vm);
                /* another bookkeeping taken in add_job. caller must hold a ref
@@ -2114,6 +2056,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        /* TODO! Check for errors... */
        gk20a_channel_add_job(c, pre_fence, post_fence,
+                                wait_cmd, incr_cmd,
                                skip_buffer_refcounting);
        c->cmds_pending = true;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index d5f5e6a2..245db56a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -62,6 +62,8 @@ struct channel_gk20a_job {
        int num_mapped_buffers;
        struct gk20a_fence *pre_fence;
        struct gk20a_fence *post_fence;
+        struct priv_cmd_entry *wait_cmd;
+        struct priv_cmd_entry *incr_cmd;
        struct list_head list;
 };
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2dd4ccf5..ac55e988 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -160,8 +160,6 @@ struct priv_cmd_queue {
        u32 size;       /* num of entries in words */
        u32 put;        /* put for priv cmd queue */
        u32 get;        /* get for priv cmd queue */
-        struct list_head free;  /* list of pre-allocated free entries */
-        struct list_head head;  /* list of used entries */
 };
 struct priv_cmd_entry {
author	Deepak Nibade <dnibade@nvidia.com>	2015-11-04 03:36:37 -0500
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2015-11-23 11:33:01 -0500
commit	2d40ebb1caa313d5d12a13f15b2623faa3ad914b (patch)
tree	ac9463d1255e896ec06fb7b03870a5323ff9ceb7 /drivers/gpu
parent	f50d0ffb15aef2cbf419b81cdbc3031097767bff (diff)